Ajout (temporaire) d'exports pour traquer les problèmes d'import des pages

Voir #244
This commit is contained in:
Jean-Marie Favreau 2024-12-05 18:58:53 +01:00
parent 3d70de9c1b
commit bda14c6ccb

View File

@ -3,6 +3,12 @@ from ..extractor_facebook import FacebookEvent
import json5 import json5
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import json import json
import os
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
# A class dedicated to get events from a facebook events page # A class dedicated to get events from a facebook events page
@ -13,10 +19,24 @@ class CExtractor(TwoStepsExtractor):
def build_event_url_list(self, content): def build_event_url_list(self, content):
soup = BeautifulSoup(content, "html.parser") soup = BeautifulSoup(content, "html.parser")
found = False
links = soup.find_all("a") links = soup.find_all("a")
for link in links: for link in links:
if link.get("href").startswith('https://www.facebook.com/events/'): if link.get("href").startswith('https://www.facebook.com/events/'):
self.add_event_url(link.get('href').split('?')[0]) self.add_event_url(link.get('href').split('?')[0])
found = True
if not found:
directory = "errors/"
if not os.path.exists(directory):
os.makedirs(directory)
now = datetime.now()
filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html"
logger.warning("cannot find any event link in events page. Save content page in " + filename)
with open(filename, "w") as text_file:
text_file.write(content)
def add_event_from_content( def add_event_from_content(
@ -42,4 +62,7 @@ class CExtractor(TwoStepsExtractor):
event["published"] = published event["published"] = published
self.add_event(default_values, **event) self.add_event(default_values, **event)
else:
logger.warning("cannot find any event in page")