First commit
This commit is contained in:
commit
cc37b02aaf
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
tmp/
|
35
assemblee_nationale/load_datas.py
Normal file
35
assemblee_nationale/load_datas.py
Normal file
@ -0,0 +1,35 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import requests
|
||||
|
||||
url = "https://data.assemblee-nationale.fr/static/openData/repository/15/amo/tous_acteurs_mandats_organes_xi_legislature/AMO30_tous_acteurs_tous_mandats_tous_organes_historique.json.zip"
|
||||
|
||||
# Cleaning old data
|
||||
try:
|
||||
os.remove("../tmp/assemblee_nationale.zip")
|
||||
except FileNotFoundError:
|
||||
# No file to remove
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree("../tmp/json")
|
||||
except FileNotFoundError:
|
||||
# No folder to remove
|
||||
pass
|
||||
|
||||
# Download and extract data
|
||||
print("Downloading archive")
|
||||
with requests.get(url, stream=True) as result:
|
||||
result.raise_for_status()
|
||||
with open("../tmp/assemblee_nationale.zip", "wb") as f:
|
||||
for chunk in result.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
print("Unpacking archive")
|
||||
shutil.unpack_archive("../tmp/assemblee_nationale.zip", "../tmp/")
|
||||
|
||||
os.remove("../tmp/assemblee_nationale.zip")
|
1
assemblee_nationale/schemas.txt
Normal file
1
assemblee_nationale/schemas.txt
Normal file
@ -0,0 +1 @@
|
||||
https://www.assemblee-nationale.fr/opendata/Schemas_Entites/AMO/Schemas_Organes.html
|
36
assemblee_nationale/scrap_entities.py
Normal file
36
assemblee_nationale/scrap_entities.py
Normal file
@ -0,0 +1,36 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import csv
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
# Extract representatives
|
||||
print("Scraping entities")
|
||||
with open("../tmp/assemblee_nationale_entities.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["type_code", "country_iso2", "name", "code", "picture", "start", "end"])
|
||||
|
||||
for filename in os.listdir("../tmp/json/organe"):
|
||||
# Loading informations
|
||||
with open(os.path.join("../tmp/json/organe", filename)) as file_handler:
|
||||
organe = json.load(file_handler)["organe"]
|
||||
type_raw = organe["codeType"]
|
||||
name = organe["libelle"]
|
||||
code = organe["uid"]
|
||||
parent = organe["organeParent"]
|
||||
start = organe["viMoDe"].get("dateDebut", organe["viMoDe"].get("dateAgrement", None))
|
||||
end = organe["viMoDe"].get("dateFin", None)
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
type_raw,
|
||||
"FR",
|
||||
name,
|
||||
code,
|
||||
"",
|
||||
start,
|
||||
end,
|
||||
])
|
43
assemblee_nationale/scrap_memberships.py
Normal file
43
assemblee_nationale/scrap_memberships.py
Normal file
@ -0,0 +1,43 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import csv
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
from slugify import slugify
|
||||
|
||||
# Extract representatives
|
||||
print("Scraping memberships")
|
||||
with open("../tmp/assemblee_nationale_memberships.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["representative_slug", "role_code", "entity_code", "start", "end"])
|
||||
|
||||
for filename in os.listdir("../tmp/json/acteur"):
|
||||
# Loading informations
|
||||
with open(os.path.join("../tmp/json/acteur", filename)) as file_handler:
|
||||
acteur = json.load(file_handler)["acteur"]
|
||||
identity = acteur["etatCivil"]["ident"]
|
||||
representative_slug = slugify(f"{identity['prenom']} {identity['nom']}")
|
||||
mandats = acteur["mandats"]["mandat"]
|
||||
if isinstance(mandats, dict):
|
||||
mandats = [mandats]
|
||||
for mandat in mandats:
|
||||
role_code = mandat["infosQualite"].get("codeQualite", "")
|
||||
start = mandat.get("dateDebut", None)
|
||||
end = mandat.get("dateFin", None)
|
||||
organes = mandat["organes"]["organeRef"]
|
||||
if isinstance(organes, str):
|
||||
organes = [organes]
|
||||
for entity_code in organes:
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
representative_slug,
|
||||
role_code,
|
||||
entity_code,
|
||||
start,
|
||||
end,
|
||||
])
|
63
assemblee_nationale/scrap_representatives.py
Normal file
63
assemblee_nationale/scrap_representatives.py
Normal file
@ -0,0 +1,63 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Extract representatives
|
||||
with open("../tmp/assemblee_nationale_representatives.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["code", "name", "picture", "nationality", "sex", "birth_date", "birth_place", "job"])
|
||||
|
||||
for filename in os.listdir("../tmp/json/acteur"):
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
# Loading informations
|
||||
with open(os.path.join("../tmp/json/acteur", filename)) as file_handler:
|
||||
acteur = json.load(file_handler)["acteur"]
|
||||
uid = f"AN_{acteur['uid']['#text'][2:]}"
|
||||
# Identity
|
||||
identity = acteur["etatCivil"]["ident"]
|
||||
if identity["civ"] == "M.":
|
||||
sex = "M"
|
||||
else:
|
||||
sex = "F"
|
||||
fullname = f"{identity['prenom']} {identity['nom']}"
|
||||
birth = acteur["etatCivil"]["infoNaissance"]
|
||||
birth_date = birth["dateNais"]
|
||||
if isinstance(birth["villeNais"], dict):
|
||||
birth_city = None
|
||||
else:
|
||||
birth_city = birth["villeNais"]
|
||||
if isinstance(acteur["profession"]["libelleCourant"], dict):
|
||||
job = None
|
||||
else:
|
||||
job = acteur["profession"]["libelleCourant"]
|
||||
# Picture
|
||||
legislatures = []
|
||||
mandats = acteur["mandats"]["mandat"]
|
||||
if isinstance(mandats, dict):
|
||||
mandats = [mandats]
|
||||
for mandat in mandats:
|
||||
if mandat["legislature"] is not None:
|
||||
legislatures.append(int(mandat["legislature"]))
|
||||
part_1 = "https://www2.assemblee-nationale.fr/static/tribun/"
|
||||
if len(legislatures) == 0:
|
||||
picture = f""
|
||||
else:
|
||||
picture = f"{part_1}{max(legislatures)}/photos/{uid[3:]}.jpg"
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
uid,
|
||||
fullname,
|
||||
picture,
|
||||
"FR",
|
||||
sex,
|
||||
birth_date,
|
||||
birth_city,
|
||||
job,
|
||||
])
|
36
assemblee_nationale/scrap_roles.py
Normal file
36
assemblee_nationale/scrap_roles.py
Normal file
@ -0,0 +1,36 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Extract roles
|
||||
print("Scraping roles")
|
||||
with open("../tmp/assemblee_nationale_roles.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["code", "name"])
|
||||
|
||||
roles = []
|
||||
for filename in os.listdir("../tmp/json/acteur"):
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
# Loading informations
|
||||
with open(os.path.join("../tmp/json/acteur", filename)) as file_handler:
|
||||
acteur = json.load(file_handler)["acteur"]
|
||||
mandats = acteur["mandats"]["mandat"]
|
||||
if isinstance(mandats, dict):
|
||||
mandats = [mandats]
|
||||
for mandat in mandats:
|
||||
role = mandat["infosQualite"].get("codeQualite", "")
|
||||
if role not in roles and role is not None:
|
||||
roles.append(role)
|
||||
for role in roles:
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
role,
|
||||
role,
|
||||
])
|
66
assemblee_nationale/scrap_types.py
Normal file
66
assemblee_nationale/scrap_types.py
Normal file
@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
"""
|
||||
Tool used to upload representatives from French National Assembly.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
TYPES = {
|
||||
"API": "Assemblée parlementaire internationale",
|
||||
"ASSEMBLEE": "Assemblée Nationale",
|
||||
"CJR": "Cour de Justice de la République",
|
||||
"CMP": "Commission mixte paritaire",
|
||||
"CNPE": "Commission d’enquête",
|
||||
"CNPS": "Commission spéciale",
|
||||
"COMNL": "Autre commission permanente",
|
||||
"COMPER": "Commission permanente législative",
|
||||
"COMSENAT": "Commission sénatoriale",
|
||||
"COMSPSENAT": "Commission spéciale sénatoriale",
|
||||
"CONFPT": "Conférence des Présidents",
|
||||
"CONSTITU": "Conseil constitutionnel",
|
||||
"DELEG": "Délégation parlementaire",
|
||||
"DELEGBUREAU": "Délégation du Bureau de l'Assemblée Nationale",
|
||||
"DELEGSENAT": "Délégation sénatoriale",
|
||||
"GA": "Groupe d'amitié",
|
||||
"GE": "Groupe d'études",
|
||||
"GEVI": "Groupe d’études à vocation internationale",
|
||||
"GOUVERNEMENT": "Gouvernement",
|
||||
"GP": "Groupe politique",
|
||||
"GROUPESENAT": "Groupe sénatorial",
|
||||
"HCJ": "Haut Cour de Justice",
|
||||
"MINISTERE": "Ministère",
|
||||
"MISINFO": "Mission d’information",
|
||||
"MISINFOCOM": "Mission d’information commune",
|
||||
"MISINFOPRE": "Missions d’information de la conférence des Présidents",
|
||||
"OFFPAR": "Office parlementaire ou délégation mixte",
|
||||
"ORGEXTPARL": "Organisme extra parlementaire",
|
||||
"PARPOL": "Parti politique",
|
||||
"PRESREP": "Présidence de la République",
|
||||
"SENAT": "Sénat",
|
||||
}
|
||||
|
||||
|
||||
# Extract types
|
||||
print("Scraping types")
|
||||
with open("../tmp/assemblee_nationale_types.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["code", "name"])
|
||||
|
||||
types = {}
|
||||
for filename in os.listdir("../tmp/json/organe"):
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
# Loading informations
|
||||
with open(os.path.join("../tmp/json/organe", filename)) as file_handler:
|
||||
organe = json.load(file_handler)["organe"]
|
||||
if organe["codeType"].upper() not in types:
|
||||
types[organe["codeType"].upper()] = TYPES.get(organe["codeType"].upper(), organe["codeType"].upper())
|
||||
for type_code in types:
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
type_code,
|
||||
types[type_code],
|
||||
])
|
30
lqdn/list_deputes.py
Normal file
30
lqdn/list_deputes.py
Normal file
@ -0,0 +1,30 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from string import ascii_uppercase
|
||||
from time import sleep
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
url = "https://wiki.laquadrature.net/index.php?title=Cat%C3%A9gorie:D%C3%A9put%C3%A9s&pagefrom="
|
||||
|
||||
deputes = []
|
||||
for letter in ascii_uppercase:
|
||||
# Do not DDOS lqdn wiki ;o)
|
||||
sleep(.2)
|
||||
content = requests.get(f"{url}{letter}").text
|
||||
soup = BeautifulSoup(content, features="lxml")
|
||||
|
||||
anchors = soup.find_all("a")
|
||||
print(letter, end="")
|
||||
for anchor in anchors:
|
||||
if anchor.text == anchor.get("title") and not anchor.text.startswith("Deputes"):
|
||||
deputes.append(anchor.text)
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
with open("../tmp/liste_deputes.txt", "w", encoding="utf-8") as file_handler:
|
||||
for depute in sorted(list(set(deputes))):
|
||||
file_handler.write(f"{depute}\n")
|
61
lqdn/scrap_representatives.py
Normal file
61
lqdn/scrap_representatives.py
Normal file
@ -0,0 +1,61 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import csv
|
||||
from datetime import datetime
|
||||
import locale
|
||||
import re
|
||||
from string import ascii_uppercase
|
||||
from time import sleep
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from slugify import slugify
|
||||
|
||||
url = "https://wiki.laquadrature.net/index.php?title=Cat%C3%A9gorie:D%C3%A9put%C3%A9s&pagefrom="
|
||||
locale.setlocale(locale.LC_ALL, "FR")
|
||||
|
||||
deputes = []
|
||||
with open("../tmp/liste_deputes.txt", encoding="utf-8") as file_handler:
|
||||
deputes = file_handler.read().splitlines()
|
||||
|
||||
# Extract representatives
|
||||
with open("../tmp/lqdn_representatives.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["name", "picture", "nationality", "sex", "birth_date", "birth_place", "job"])
|
||||
|
||||
for depute in deputes:
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
# Loading informations
|
||||
content = requests.get(f"https://wiki.laquadrature.net/{depute}").text
|
||||
soup = BeautifulSoup(content, features="lxml")
|
||||
deputy = soup.find("span", attrs={"class": "mw-headline"})
|
||||
# Identity
|
||||
fullname = deputy.text.split(":")[1].split(",")[0].strip()
|
||||
if "Né le" in content:
|
||||
sex = "M"
|
||||
else:
|
||||
sex = "F"
|
||||
birth = soup.find(text=re.compile("Née? le")).parent.parent
|
||||
birth_date = birth.contents[1].strip()
|
||||
birth_date = datetime.strptime(birth_date, "%d %B %Y").strftime("%Y-%m-%d")
|
||||
birth_city = birth.contents[3].strip("(").split()[0].strip()
|
||||
try:
|
||||
job = soup.find(text=re.compile("Profession")).parent.parent
|
||||
job_name = job.contents[1].split(":")[1].strip()
|
||||
except:
|
||||
job_name = ""
|
||||
# Picture
|
||||
picture = soup.find("img", attrs={"alt": fullname})["src"]
|
||||
picture = f"https://wiki.laquadrature.net{picture}"
|
||||
# CSV line
|
||||
writer.writerow([
|
||||
fullname,
|
||||
picture,
|
||||
"FR",
|
||||
sex,
|
||||
birth_date,
|
||||
birth_city,
|
||||
job_name,
|
||||
])
|
62
lqdn/scrap_stances.py
Normal file
62
lqdn/scrap_stances.py
Normal file
@ -0,0 +1,62 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from datetime import datetime
|
||||
from string import ascii_uppercase
|
||||
from time import sleep
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import csv
|
||||
import requests
|
||||
from slugify import slugify
|
||||
|
||||
with open("../tmp/liste_deputes.txt") as file_handler:
|
||||
deputes = file_handler.read().splitlines()
|
||||
|
||||
with open("../tmp/lqdn_stances.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["name", "slug", "matter", "subject", "date", "extract", "source_url"])
|
||||
|
||||
for depute in deputes:
|
||||
# Do not DDOS lqdn wiki ;o)
|
||||
sleep(.2)
|
||||
content = requests.get(f"https://wiki.laquadrature.net/{depute}").text
|
||||
soup = BeautifulSoup(content, features="lxml")
|
||||
deputy = soup.find("span", attrs={"class": "mw-headline"})
|
||||
if deputy is not None:
|
||||
stance_author = deputy.text.split(",")[0].split(":")[1].strip()
|
||||
else:
|
||||
stance_author = depute
|
||||
quotes = soup.find_all("h5")
|
||||
for quote in quotes:
|
||||
try:
|
||||
stance_date = datetime.strptime(quote.text.split()[0], "%d/%m/%Y")
|
||||
stance_subject = " ".join(quote.text.split()[1:])
|
||||
except:
|
||||
stance_date = None
|
||||
stance_subject = quote.text
|
||||
# TODO: Set the matter accordingly to the subject
|
||||
stance_matter = stance_subject.split(":")[0]
|
||||
if quote.find("a"):
|
||||
stance_link = quote.find("a").get("href")
|
||||
else:
|
||||
stance_link = None
|
||||
# quote
|
||||
quotes = []
|
||||
block = quote.find_next_sibling()
|
||||
if block is not None:
|
||||
while block is not None and block.name == "blockquote":
|
||||
quotes.append(block.text)
|
||||
block = block.find_next_sibling()
|
||||
stance_quote = "\n".join(quotes)
|
||||
writer.writerow([
|
||||
stance_author,
|
||||
slugify(stance_author),
|
||||
stance_matter,
|
||||
stance_subject,
|
||||
datetime.strftime(stance_date, "%Y-%m-%d") if stance_date is not None else None,
|
||||
stance_quote,
|
||||
stance_link,
|
||||
])
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
46
lqdn/scrap_votes.py
Normal file
46
lqdn/scrap_votes.py
Normal file
@ -0,0 +1,46 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from datetime import datetime
|
||||
from string import ascii_uppercase
|
||||
from time import sleep
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import csv
|
||||
import requests
|
||||
from slugify import slugify
|
||||
|
||||
with open("lqdn_representatives.txt") as file_handler:
|
||||
deputes = file_handler.read().splitlines()
|
||||
|
||||
with open("lqdn_votes.csv", "w", encoding="utf-8", newline="") as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerow(["name", "slug", "matter", "subject", "date", "extract", "source_url"])
|
||||
|
||||
for depute in deputes:
|
||||
# Do not DDOS lqdn wiki ;o)
|
||||
sleep(.2)
|
||||
content = requests.get(f"https://wiki.laquadrature.net/{depute}").text
|
||||
soup = BeautifulSoup(content, features="lxml")
|
||||
deputy = soup.find("span", attrs={"class": "mw-headline"})
|
||||
if deputy is not None:
|
||||
stance_author = deputy.text.split(",")[0].split(":")[1].strip()
|
||||
else:
|
||||
stance_author = depute
|
||||
print(stance_author)
|
||||
votes = soup.find("span", attrs={"id": "Votes"}).parent.find_next_sibling("ul")
|
||||
if votes is not None:
|
||||
for vote in votes.find_all("li"):
|
||||
pass##print(f" {vote}")
|
||||
writer.writerow([
|
||||
stance_author,
|
||||
slugify(stance_author),
|
||||
stance_matter,
|
||||
stance_subject,
|
||||
stance_date,
|
||||
stance_quote,
|
||||
stance_link,
|
||||
])
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
print()
|
33
parltrack/json_reader.py
Normal file
33
parltrack/json_reader.py
Normal file
@ -0,0 +1,33 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import json
|
||||
|
||||
# Default block size
|
||||
SIZE_BLOCK = 32768
|
||||
|
||||
|
||||
def json_reader(filepath, size_block=SIZE_BLOCK):
|
||||
# Initiate reading
|
||||
level = 0
|
||||
in_string = False
|
||||
loop = True
|
||||
element = ""
|
||||
with open(filepath) as f:
|
||||
content = f.read(size_block)
|
||||
while loop:
|
||||
for char in content:
|
||||
if char == '"':
|
||||
in_string != in_string
|
||||
if not in_string and char == "{":
|
||||
level += 1
|
||||
if level > 0:
|
||||
element += char
|
||||
if not in_string and char == "}":
|
||||
level -= 1
|
||||
if level == 0:
|
||||
yield json.loads(element)
|
||||
element = ""
|
||||
# Reading next block
|
||||
content = f.read(size_block)
|
||||
if content == "":
|
||||
loop = False
|
286
parltrack/parltrack.py
Normal file
286
parltrack/parltrack.py
Normal file
@ -0,0 +1,286 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from datetime import datetime
|
||||
from io import StringIO, BytesIO
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
||||
import click
|
||||
import requests
|
||||
|
||||
from app.model.address import AddressModel
|
||||
from app.model.contact import ContactModel
|
||||
from app.model.entity import EntityModel
|
||||
from app.model.membership import MembershipModel
|
||||
from app.model.representative import RepresentativeModel
|
||||
from app.model.type import TypeModel
|
||||
from command.json_reader import json_reader
|
||||
|
||||
import json
|
||||
|
||||
|
||||
def import_representatives(filepath):
|
||||
click.echo("Importing representatives from parltrack")
|
||||
click.echo(" Reading file")
|
||||
with open(filepath) as f:
|
||||
meps = json.load(f)
|
||||
for representative in meps:
|
||||
click.echo(".", nl=False)
|
||||
|
||||
#for representative in json_reader(filepath):
|
||||
# click.echo(".", nl=False)
|
||||
|
||||
|
||||
|
||||
|
||||
def toto():
|
||||
# Delete only things related to "Assemblée Nationale" !
|
||||
MembershipModel.query.delete() #filter_by(source="Assemblée Nationale").delete()
|
||||
RepresentativeModel.query.filter_by(source="Assemblée Nationale").delete()
|
||||
AddressModel.query.filter_by(source="Assemblée Nationale").delete()
|
||||
ContactModel.query.filter_by(source="Assemblée Nationale").delete()
|
||||
EntityModel.query.filter_by(source="Assemblée Nationale").delete()
|
||||
TypeModel.query.filter_by(source="Assemblée Nationale").delete()
|
||||
|
||||
url = "https://data.assemblee-nationale.fr/static/openData/repository/15/amo/tous_acteurs_mandats_organes_xi_legislature/AMO30_tous_acteurs_tous_mandats_tous_organes_historique.json.zip"
|
||||
if False:
|
||||
datas = BytesIO()
|
||||
result = requests.get(url, stream=True)
|
||||
datas.write(result.content)
|
||||
datas.seek(0)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
with zipfile.ZipFile(datas, "r") as zip_ref:
|
||||
zip_ref.extractall(tmpdir)
|
||||
print(tmpdir)
|
||||
for root, dirs, files in os.walk(tmpdir):
|
||||
if root.endswith("acteur"):
|
||||
for filename in files:
|
||||
print(os.path.join(root, filename))
|
||||
for filename in files[:1]:
|
||||
with open(os.path.join(root, filename)) as filehandler:
|
||||
data = json.load(filehandler)
|
||||
print(json.dumps(data, indent=2))
|
||||
# Testing
|
||||
tmpdir = "C:\\Users\\tbouchet\\Downloads\\json"
|
||||
click.echo(" ", nl=False)
|
||||
for root, dirs, files in os.walk(tmpdir):
|
||||
if root.endswith("organe"):
|
||||
with click.progressbar(files, label="Entities") as progress_files:
|
||||
entities = []
|
||||
for filename in progress_files:
|
||||
#print(filename)
|
||||
with open(os.path.join(root, filename)) as filehandler:
|
||||
data = json.load(filehandler)["organe"]
|
||||
|
||||
# Type
|
||||
# file:///C:/Users/tbouchet/Downloads/html/Schemas_Entites/AMO/Schemas_Organes.html
|
||||
type_types = {
|
||||
"API": "Assemblée parlementaire internationale",
|
||||
"ASSEMBLEE": "Assemblée nationale",
|
||||
"ASSEXT": "Autres conseils",
|
||||
"ASSTOM": "Assemblée territoriale d’Outre-Mer",
|
||||
"CES": "Conseil économique, social et environnemental",
|
||||
"CJR": "Cour de justice de la République",
|
||||
"CMP": "Commissions mixtes paritaires",
|
||||
"CNPE": "Commissions d’enquêtes",
|
||||
"CNPS": "Commissions spéciales",
|
||||
"COMMUNE": "Conseil Municipal",
|
||||
"COMNL": "Autres commissions permanentes",
|
||||
"COMPER": "Commissions permanentes législatives",
|
||||
"COMSENAT": "Commissions du Sénat",
|
||||
"COMSPSENAT": "Commissions spéciales du Sénat",
|
||||
"CONFPT": "CONFPT",
|
||||
"CONSTITU": "Conseil constitutionnel",
|
||||
"DELEG": "Délégation parlementaire",
|
||||
"DELEGBUREAU": "Délégation du Bureau (de l’AN)",
|
||||
"DELEGSENAT": "Délégation du Sénat",
|
||||
"DEPARTEMENT": "Conseil général ou départemental",
|
||||
"EUROPE": "Mandat européen",
|
||||
"GA": "Groupe d’amitié",
|
||||
"GE": "Groupe d’études",
|
||||
"GEVI": "Groupe d’études à vocation internationale",
|
||||
"GOUVERNEMENT": "Gouvernement",
|
||||
"GP": "Groupe politique",
|
||||
"GROUPESENAT": "Groupe Sénat",
|
||||
"HCJ": "Haute Cour de justice",
|
||||
"INTCO": "Intercommunalité",
|
||||
"MINISTERE": "Ministère",
|
||||
"MISINFO": "Missions d’informations",
|
||||
"MISINFOCOM": "Missions d’information communes",
|
||||
"MISINFOPRE": "Missions d’information de la conférence des Présidents",
|
||||
"OFFPAR": "Office parlementaire ou délégation mixte",
|
||||
"ORGAINT": "Organisme international",
|
||||
"ORGEXTPARL": "Organisme extra parlementaire",
|
||||
"PARPOL": "Parti Politique",
|
||||
"PRESREP": "Présidence de la République",
|
||||
"REGION": "Conseil régional",
|
||||
"SENAT": "Mandat de sénateur",
|
||||
}
|
||||
type = TypeModel.query.filter_by(name = type_types[data["codeType"]]).first()
|
||||
if type is None:
|
||||
type = TypeModel()
|
||||
type.source = "Assemblée Nationale"
|
||||
type.source_uid = data["codeType"]
|
||||
type.name = type_types[data["codeType"]]
|
||||
type.save()
|
||||
|
||||
# Entity
|
||||
entity = EntityModel(
|
||||
source = "Assemblée Nationale",
|
||||
source_uid = data["uid"],
|
||||
type_id = type.id,
|
||||
name = data["libelle"],
|
||||
code = data["libelleAbrev"],
|
||||
country_id = country.id,
|
||||
)
|
||||
if data["organeParent"] is not None:
|
||||
parent = EntityModel.query.filter_by(source_uid=data["organeParent"]).first()
|
||||
if parent is not None:
|
||||
entity.parent_id = parent.id
|
||||
else:
|
||||
print(data["uid"], data["organeParent"])
|
||||
entity.save()
|
||||
|
||||
for root, dirs, files in os.walk(tmpdir):
|
||||
if root.endswith("acteur"):
|
||||
with click.progressbar(files, label="Representatives") as progress_files:
|
||||
for filename in progress_files:
|
||||
with open(os.path.join(root, filename)) as filehandler:
|
||||
data = json.load(filehandler)["acteur"]
|
||||
|
||||
# Representative
|
||||
representative = RepresentativeModel()
|
||||
representative.source = "Assemblée Nationale"
|
||||
representative.source_uid = data["uid"]["#text"]
|
||||
nom = data["etatCivil"]["ident"]["nom"]
|
||||
prenom = data["etatCivil"]["ident"]["prenom"]
|
||||
representative.name = f"{prenom} {nom}"
|
||||
representative.nationality_id = country.id
|
||||
representative.birth_date = datetime.strptime(
|
||||
data["etatCivil"]["infoNaissance"]["dateNais"], "%Y-%m-%d"
|
||||
)
|
||||
if isinstance(data["etatCivil"]["infoNaissance"]["villeNais"], str):
|
||||
representative.birth_place = data["etatCivil"]["infoNaissance"][
|
||||
"villeNais"
|
||||
]
|
||||
if isinstance(data["profession"]["libelleCourant"], str):
|
||||
representative.profession = data["profession"]["libelleCourant"]
|
||||
representative.save()
|
||||
|
||||
# Address
|
||||
if data["adresses"].get("adresse", "") != "":
|
||||
address_types = {
|
||||
"0": "Parliament address",
|
||||
"1": "Address",
|
||||
"2": "Constituency address",
|
||||
}
|
||||
|
||||
def manage_address(data_address):
|
||||
if data_address["type"] in address_types:
|
||||
address = AddressModel()
|
||||
address.representative_id = representative.id
|
||||
address.source = "Assemblée Nationale"
|
||||
address.source_uid = data_address["uid"]
|
||||
address.name = address_types[data_address["type"]]
|
||||
address.country_id = country.id
|
||||
address.number = data_address["numeroRue"]
|
||||
address.street = data_address["nomRue"]
|
||||
address.miscellaneous = data_address[
|
||||
"complementAdresse"
|
||||
]
|
||||
address.city = data_address["ville"]
|
||||
address.zipcode = data_address["codePostal"]
|
||||
address.save()
|
||||
|
||||
if isinstance(data["adresses"]["adresse"], list):
|
||||
for data_address in data["adresses"]["adresse"]:
|
||||
manage_address(data_address)
|
||||
elif isinstance(data["adresses"]["adresse"], dict):
|
||||
manage_address(data["adresses"]["adresse"])
|
||||
|
||||
# Contact
|
||||
contact_types = {
|
||||
"3": "Phone (Press contact)",
|
||||
"11": "Phone",
|
||||
"12": "Fax",
|
||||
"15": "Email",
|
||||
"22": "Website",
|
||||
"23": "Senate URL",
|
||||
"24": "Twitter",
|
||||
"25": "Facebook",
|
||||
}
|
||||
|
||||
def manage_contact(data_contact):
|
||||
if data_contact["type"] in contact_types:
|
||||
contact = ContactModel()
|
||||
contact.representative_id = representative.id
|
||||
contact.source = "Assemblée Nationale"
|
||||
contact.source_uid = data_contact["uid"]
|
||||
if data_contact["adresseDeRattachement"] is not None:
|
||||
address = AddressModel.query.filter_by(
|
||||
source_uid=data_contact["adresseDeRattachement"]
|
||||
).first()
|
||||
if address is not None:
|
||||
contact.address_id = address.id
|
||||
contact.name = contact_types[data_contact["type"]]
|
||||
contact.value = data_contact["valElec"]
|
||||
contact.save()
|
||||
|
||||
if isinstance(data["adresses"]["adresse"], list):
|
||||
for data_contact in data["adresses"]["adresse"]:
|
||||
manage_contact(data_contact)
|
||||
elif isinstance(data["adresses"]["adresse"], dict):
|
||||
manage_contact(data["adresses"]["adresse"])
|
||||
|
||||
# Unknown addresses ?
|
||||
if isinstance(data["adresses"]["adresse"], list):
|
||||
for data_address in data["adresses"]["adresse"]:
|
||||
if data_address["type"] not in dict(
|
||||
address_types, **contact_types
|
||||
):
|
||||
print(
|
||||
f" => Unkown address type : {data_address['type']} in file {filename} : {data_address['typeLibelle']}"
|
||||
)
|
||||
elif isinstance(data["adresses"]["adresse"], dict):
|
||||
data_address = data["adresses"]["adresse"]
|
||||
if data_address["type"] not in dict(
|
||||
address_types, **contact_types
|
||||
):
|
||||
print(
|
||||
f" => Unkown address type : {data_address['type']} in file {filename} : {data_address['typeLibelle']}"
|
||||
)
|
||||
|
||||
if data["mandats"].get("mandat", "") != "":
|
||||
# Membership
|
||||
membership_types = {
|
||||
"Membre": "Member",
|
||||
}
|
||||
|
||||
def manage_membership(data_membership):
|
||||
if data_membership["infosQualite"]["codeQualite"] in membership_types:
|
||||
entity = EntityModel.query.filter_by(source_uid=data_membership["organes"]["organeRef"]).first()
|
||||
if entity is None:
|
||||
print("Organe inconnu", data_membership["organes"]["organeRef"])
|
||||
return
|
||||
membership = MembershipModel()
|
||||
membership.representative_id = representative.id
|
||||
membership.role = membership_types[data_membership["infosQualite"]["codeQualite"]]
|
||||
membership.country_id = country.id
|
||||
if data_membership["dateDebut"] is not None:
|
||||
membership.start = datetime.strptime(
|
||||
data_membership["dateDebut"], "%Y-%m-%d"
|
||||
)
|
||||
if data_membership["dateFin"] is not None:
|
||||
membership.end = datetime.strptime(
|
||||
data_membership["dateFin"], "%Y-%m-%d"
|
||||
)
|
||||
membership.entity_id = entity.id
|
||||
membership.save()
|
||||
|
||||
if isinstance(data["mandats"]["mandat"], list):
|
||||
for data_membership in data["mandats"]["mandat"]:
|
||||
manage_membership(data_membership)
|
||||
elif isinstance(data["mandats"]["mandat"], dict):
|
||||
manage_membership(data["mandats"]["mandat"])
|
Loading…
Reference in New Issue
Block a user