mapping-geojson-osm/etalab_data/planing_familial/scrap.py

64 lines
2.4 KiB
Python
Raw Normal View History

2024-10-17 18:19:34 +02:00
import json
import geopandas as gpd
from bs4 import BeautifulSoup
from shapely.geometry import Point
# Charger le fichier HTML
with open("liste.html", "r") as file:
html = file.read()
# Analyser le code HTML avec BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Trouver toutes les balises <article> qui contiennent les informations des antennes
articles = soup.find_all("article", class_="node node--type-hp-antenne node--view-mode-map-result")
# Liste pour stocker les données de chaque antenne
antennes = []
# Parcourir chaque article et extraire les informations
for article in articles:
titre = article.find("button", class_="nsb action-title").text.strip()
adresse = article.find("p", class_="address").text.strip()
2024-10-22 00:26:54 +02:00
website = article.find("a", class_="icon arrow").get("href").strip()
violences = article.find("li", class_="icon violences")
sexualities = article.find("li", class_="icon sexualities")
detection = article.find("li", class_="icon detection")
contraception = article.find("li", class_="icon contraception")
abortion = article.find("li", class_="icon abortion")
2024-10-17 18:19:34 +02:00
# Gérer les articles qui n'ont pas de numéro de téléphone
telephone = ""
try:
telephone = article.find("p", class_="tel").text.strip()
except AttributeError:
pass
latitude = float(article["data-latlng"].split(",")[0].replace("[", "").replace('"', ""))
longitude = float(article["data-latlng"].split(",")[1].replace("]", "").replace('"', ""))
geometry = Point(longitude, latitude)
antenne = {
"type": "Feature",
2024-10-21 20:07:56 +02:00
"geometry":geometry,
2024-10-22 00:26:54 +02:00
"nom": titre,
"adresse": adresse,
"contact:phone": telephone,
2024-10-24 10:52:17 +02:00
"contact:website": 'https://www.planning-familial.org'+website,
2024-10-22 00:26:54 +02:00
"family_planning:handles:violences": ('yes' if violences else 'no'),
"family_planning:handles:sexualities": ('yes' if sexualities else 'no'),
"family_planning:handles:detection": ('yes' if detection else 'no'),
"family_planning:handles:abortion": ('yes' if abortion else 'no'),
"family_planning:handles:contraception": ('yes' if contraception else 'no'),
2024-10-17 18:19:34 +02:00
}
2024-10-22 00:26:54 +02:00
# print(antenne)
2024-10-21 20:07:56 +02:00
2024-10-17 18:19:34 +02:00
antennes.append(antenne)
# Convertir la liste des antennes en un GeoDataFrame
gdf = gpd.GeoDataFrame(antennes)
# Exporter le GeoDataFrame au format GeoJSON
2024-10-21 20:07:56 +02:00
gdf.to_file("antennes_planning_familial.json", driver="GeoJSON")