31 lines
882 B
Python
31 lines
882 B
Python
# encoding: utf-8
|
|
|
|
from string import ascii_uppercase
|
|
from time import sleep
|
|
import sys
|
|
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
url = "https://wiki.laquadrature.net/index.php?title=Cat%C3%A9gorie:D%C3%A9put%C3%A9s&pagefrom="
|
|
|
|
deputes = []
|
|
for letter in ascii_uppercase:
|
|
# Do not DDOS lqdn wiki ;o)
|
|
sleep(.2)
|
|
content = requests.get(f"{url}{letter}").text
|
|
soup = BeautifulSoup(content, features="lxml")
|
|
|
|
anchors = soup.find_all("a")
|
|
print(letter, end="")
|
|
for anchor in anchors:
|
|
if anchor.text == anchor.get("title") and not anchor.text.startswith("Deputes"):
|
|
deputes.append(anchor.text)
|
|
print(".", end="")
|
|
sys.stdout.flush()
|
|
print()
|
|
|
|
with open("../tmp/liste_deputes.txt", "w", encoding="utf-8") as file_handler:
|
|
for depute in sorted(list(set(deputes))):
|
|
file_handler.write(f"{depute}\n")
|