2024-10-17 18:19:34 +02:00
|
|
|
import json
|
|
|
|
import geopandas as gpd
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from shapely.geometry import Point
|
|
|
|
|
|
|
|
# Charger le fichier HTML
|
|
|
|
with open("liste.html", "r") as file:
|
|
|
|
html = file.read()
|
|
|
|
|
|
|
|
# Analyser le code HTML avec BeautifulSoup
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
|
|
|
|
# Trouver toutes les balises <article> qui contiennent les informations des antennes
|
|
|
|
articles = soup.find_all("article", class_="node node--type-hp-antenne node--view-mode-map-result")
|
|
|
|
|
|
|
|
# Liste pour stocker les données de chaque antenne
|
|
|
|
antennes = []
|
|
|
|
|
|
|
|
# Parcourir chaque article et extraire les informations
|
|
|
|
for article in articles:
|
|
|
|
titre = article.find("button", class_="nsb action-title").text.strip()
|
|
|
|
adresse = article.find("p", class_="address").text.strip()
|
2024-10-22 00:26:54 +02:00
|
|
|
website = article.find("a", class_="icon arrow").get("href").strip()
|
|
|
|
violences = article.find("li", class_="icon violences")
|
|
|
|
sexualities = article.find("li", class_="icon sexualities")
|
|
|
|
detection = article.find("li", class_="icon detection")
|
|
|
|
contraception = article.find("li", class_="icon contraception")
|
|
|
|
abortion = article.find("li", class_="icon abortion")
|
2024-10-17 18:19:34 +02:00
|
|
|
|
|
|
|
# Gérer les articles qui n'ont pas de numéro de téléphone
|
|
|
|
telephone = ""
|
|
|
|
try:
|
|
|
|
telephone = article.find("p", class_="tel").text.strip()
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
latitude = float(article["data-latlng"].split(",")[0].replace("[", "").replace('"', ""))
|
|
|
|
longitude = float(article["data-latlng"].split(",")[1].replace("]", "").replace('"', ""))
|
|
|
|
geometry = Point(longitude, latitude)
|
|
|
|
|
|
|
|
antenne = {
|
|
|
|
"type": "Feature",
|
2024-10-21 20:07:56 +02:00
|
|
|
"geometry":geometry,
|
2024-10-22 00:26:54 +02:00
|
|
|
"nom": titre,
|
|
|
|
"adresse": adresse,
|
|
|
|
"contact:phone": telephone,
|
|
|
|
"contact:website": 'https://www.planning-familial.org/fr/'+website,
|
|
|
|
"family_planning:handles:violences": ('yes' if violences else 'no'),
|
|
|
|
"family_planning:handles:sexualities": ('yes' if sexualities else 'no'),
|
|
|
|
"family_planning:handles:detection": ('yes' if detection else 'no'),
|
|
|
|
"family_planning:handles:abortion": ('yes' if abortion else 'no'),
|
|
|
|
"family_planning:handles:contraception": ('yes' if contraception else 'no'),
|
2024-10-17 18:19:34 +02:00
|
|
|
}
|
|
|
|
|
2024-10-22 00:26:54 +02:00
|
|
|
# print(antenne)
|
2024-10-21 20:07:56 +02:00
|
|
|
|
2024-10-17 18:19:34 +02:00
|
|
|
antennes.append(antenne)
|
|
|
|
|
|
|
|
# Convertir la liste des antennes en un GeoDataFrame
|
|
|
|
gdf = gpd.GeoDataFrame(antennes)
|
|
|
|
|
|
|
|
# Exporter le GeoDataFrame au format GeoJSON
|
2024-10-21 20:07:56 +02:00
|
|
|
gdf.to_file("antennes_planning_familial.json", driver="GeoJSON")
|