tools/lqdn/list_deputes.py

34 lines
999 B
Python

# encoding: utf-8
import os
from string import ascii_uppercase
import sys
from time import sleep
from bs4 import BeautifulSoup
import requests
url = "https://wiki.laquadrature.net/index.php?title=Cat%C3%A9gorie:D%C3%A9put%C3%A9s&pagefrom="
data_root = os.environ.get("POLITIKORAMA_DATA_ROOT", "../tmp")
deputes = []
for letter in ascii_uppercase:
# Do not DDOS lqdn wiki ;o)
sleep(.2)
content = requests.get(f"{url}{letter}").text
soup = BeautifulSoup(content, features="lxml")
anchors = soup.find_all("a")
print(letter, end="")
for anchor in anchors:
if anchor.text == anchor.get("title") and not anchor.text.startswith("Deputes"):
deputes.append(anchor.text)
print(".", end="")
sys.stdout.flush()
print()
data_folder = os.path.join(data_root, "liste_deputes.txt")
with open(data_folder, "w", encoding="utf-8") as file_handler:
for depute in sorted(list(set(deputes))):
file_handler.write(f"{depute}\n")