Ajout (temporaire) d'exports pour traquer les problèmes d'import des pages
Voir #244
This commit is contained in:
parent
3d70de9c1b
commit
bda14c6ccb
@ -3,6 +3,12 @@ from ..extractor_facebook import FacebookEvent
|
|||||||
import json5
|
import json5
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# A class dedicated to get events from a facebook events page
|
# A class dedicated to get events from a facebook events page
|
||||||
@ -13,10 +19,24 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
found = False
|
||||||
links = soup.find_all("a")
|
links = soup.find_all("a")
|
||||||
for link in links:
|
for link in links:
|
||||||
if link.get("href").startswith('https://www.facebook.com/events/'):
|
if link.get("href").startswith('https://www.facebook.com/events/'):
|
||||||
self.add_event_url(link.get('href').split('?')[0])
|
self.add_event_url(link.get('href').split('?')[0])
|
||||||
|
found = True
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
directory = "errors/"
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
now = datetime.now()
|
||||||
|
filename = directory + now.strftime("%Y%m%d_%H%M%S") + ".html"
|
||||||
|
logger.warning("cannot find any event link in events page. Save content page in " + filename)
|
||||||
|
with open(filename, "w") as text_file:
|
||||||
|
text_file.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def add_event_from_content(
|
def add_event_from_content(
|
||||||
@ -42,4 +62,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
event["published"] = published
|
event["published"] = published
|
||||||
|
|
||||||
self.add_event(default_values, **event)
|
self.add_event(default_values, **event)
|
||||||
|
else:
|
||||||
|
logger.warning("cannot find any event in page")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user