scripts/pirate_program/index.py

22 lines
562 B
Python
Raw Normal View History

2024-06-10 15:42:10 +02:00
import requests
from bs4 import BeautifulSoup
import json
# URL de la page
url = "https://partipirate.org/programTags"
# Récupérer le contenu HTML de la page
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
# Extraire les ancres "#main figcaption a" et leurs attributs "href"
links = []
for a in soup.select("#main figcaption a"):
href = a.get("href")
if href:
links.append(href)
# Enregistrer les liens dans un fichier JSON
with open("partipirate_links.json", "w") as f:
json.dump(links, f, indent=4)