Ajout (temporaire) d'exports pour traquer les problèmes d'import des pages
Voir #244
This commit is contained in:
parent
3d70de9c1b
commit
bda14c6ccb
@ -3,6 +3,12 @@ from ..extractor_facebook import FacebookEvent
|
||||
import json5
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# A class dedicated to get events from a facebook events page
|
||||
@ -13,10 +19,24 @@ class CExtractor(TwoStepsExtractor):
|
||||
def build_event_url_list(self, content):
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
found = False
|
||||
links = soup.find_all("a")
|
||||
for link in links:
|
||||
if link.get("href").startswith('https://www.facebook.com/events/'):
|
||||
self.add_event_url(link.get('href').split('?')[0])
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
directory = "errors/"
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
now = datetime.now()
|
||||
filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html"
|
||||
logger.warning("cannot find any event link in events page. Save content page in " + filename)
|
||||
with open(filename, "w") as text_file:
|
||||
text_file.write(content)
|
||||
|
||||
|
||||
|
||||
|
||||
def add_event_from_content(
|
||||
@ -42,4 +62,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
event["published"] = published
|
||||
|
||||
self.add_event(default_values, **event)
|
||||
else:
|
||||
logger.warning("cannot find any event in page")
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user