From 6ca4fe79647b6129ee3cf7e13fbb05a1f9de4a61 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Thu, 29 Aug 2024 01:11:26 +0200 Subject: [PATCH] =?UTF-8?q?Un=20=C3=A9v=C3=A9nement=20fb=20peut=20correspo?= =?UTF-8?q?ndre=20=C3=A0=20plusieurs=20=C3=A9v=C3=A9nements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- experimentations/get_facebook_events.py | 4 +- src/agenda_culturel/celery.py | 2 - .../custom_extractors/fbevents.py | 11 ++-- .../import_tasks/extractor_facebook.py | 55 ++++++++++++++----- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/experimentations/get_facebook_events.py b/experimentations/get_facebook_events.py index 9d016aa..573147b 100755 --- a/experimentations/get_facebook_events.py +++ b/experimentations/get_facebook_events.py @@ -29,8 +29,8 @@ from src.agenda_culturel.import_tasks.custom_extractors import * if __name__ == "__main__": u2e = URL2Events(ChromiumHeadlessDownloader(), fbevents.CExtractor()) - url = "https://www.facebook.com/laJeteeClermont/events" - url_human = "https://www.facebook.com/laJeteeClermont/events" + url = "https://www.facebook.com/laJeteeClermont/upcoming_hosted_events" + url_human = "https://www.facebook.com/laJeteeClermont/upcoming_hosted_events" try: events = u2e.process(url, url_human, cache = "cache-lajetee-fb.html", default_values = {"location": "La Jetée"}, published = True) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 1ed40ca..d4d4304 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -239,8 +239,6 @@ def import_events_from_url(self, url, cat): url, published=False, default_values=values ) - # TODO: use celery to import the associated events - # convert it to json json_events = json.dumps(events, default=str) diff --git a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py index 6a4cbaf..38f7a88 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/fbevents.py @@ -38,11 +38,10 @@ class CExtractor(TwoStepsExtractor): ) if fevent is not None: - event = fevent.build_event(event_url) - event["published"] = published - print([e.elements for e in fevent.neighbor_events]) + for event in fevent.build_events(event_url): + event["published"] = published - if "category" in default_values: - event["category"] = default_values["category"] - self.add_event(**event) + if "category" in default_values: + event["category"] = default_values["category"] + self.add_event(**event) diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py index 7f5f374..2069479 100644 --- a/src/agenda_culturel/import_tasks/extractor_facebook.py +++ b/src/agenda_culturel/import_tasks/extractor_facebook.py @@ -32,6 +32,7 @@ class FacebookEvent: "price_info", "cover_media_renderer", "id", + "parent_if_exists_or_self", "day_time_sentence", "event_place", "comet_neighboring_siblings", @@ -52,12 +53,20 @@ class FacebookEvent: "event_place": {"event_place_name": ["name"]}, } - def __init__(self, i, event): - self.fragments = {} - self.elements = {} - self.neighbor_events = None - self.possible_end_timestamp = [] - self.add_fragment(i, event) + def __init__(self, *args): + if len(args) == 1: + other = args[0] + self.fragments = other.fragments + self.elements = other.elements + self.neighbor_events = None + else: + i = args[0] + event = args[1] + self.fragments = {} + self.elements = {} + self.neighbor_events = None + self.possible_end_timestamp = [] + self.add_fragment(i, event) def get_element(self, key): return self.elements[key] if key in self.elements else None @@ -177,8 +186,6 @@ class FacebookEvent: return event def build_event(self, url): - self.get_element("image") - return { "title": self.get_element("name"), "category": None, @@ -195,6 +202,26 @@ class FacebookEvent: "image_alt": self.get_element("image"), } + def get_parent_id(self): + return self.get_element("parent_if_exists_or_self")["id"] + + + def build_events(self, url): + + if self.neighbor_events is None or len(self.neighbor_events) == 0: + return [self.build_event(url)] + else: + url_base = "https://www.facebook.com/events/" + self.get_parent_id() + "/" + result = [] + for nb_e in self.neighbor_events: + # we create a copy of the event + clone = FacebookEvent(self) + # we set start and end timestamp accordnig to the neighbor + clone.elements["start_timestamp"] = nb_e.elements["start_timestamp"] + clone.elements["end_timestamp"] = nb_e.elements["end_timestamp"] + ## we generate the event + result.append(clone.build_event(url_base + nb_e.elements["id"] + "/")) + return result class FacebookEventExtractor(Extractor): @@ -230,13 +257,13 @@ class FacebookEventExtractor(Extractor): if fevent is not None: self.set_header(url) - event = fevent.build_event(url) - logger.warning("published: " + str(published)) - event["published"] = published + for event in fevent.build_events(url): + logger.warning("published: " + str(published)) + event["published"] = published - if "category" in default_values: - event["category"] = default_values["category"] - self.add_event(**event) + if "category" in default_values: + event["category"] = default_values["category"] + self.add_event(**event) return self.get_structure()