Un événement fb peut correspondre à plusieurs événements

This commit is contained in:
Jean-Marie Favreau 2024-08-29 01:11:26 +02:00
parent 81601ec5da
commit 6ca4fe7964
4 changed files with 48 additions and 24 deletions

View File

@ -29,8 +29,8 @@ from src.agenda_culturel.import_tasks.custom_extractors import *
if __name__ == "__main__":
u2e = URL2Events(ChromiumHeadlessDownloader(), fbevents.CExtractor())
url = "https://www.facebook.com/laJeteeClermont/events"
url_human = "https://www.facebook.com/laJeteeClermont/events"
url = "https://www.facebook.com/laJeteeClermont/upcoming_hosted_events"
url_human = "https://www.facebook.com/laJeteeClermont/upcoming_hosted_events"
try:
events = u2e.process(url, url_human, cache = "cache-lajetee-fb.html", default_values = {"location": "La Jetée"}, published = True)

View File

@ -239,8 +239,6 @@ def import_events_from_url(self, url, cat):
url, published=False, default_values=values
)
# TODO: use celery to import the associated events
# convert it to json
json_events = json.dumps(events, default=str)

View File

@ -38,11 +38,10 @@ class CExtractor(TwoStepsExtractor):
)
if fevent is not None:
event = fevent.build_event(event_url)
event["published"] = published
print([e.elements for e in fevent.neighbor_events])
for event in fevent.build_events(event_url):
event["published"] = published
if "category" in default_values:
event["category"] = default_values["category"]
self.add_event(**event)
if "category" in default_values:
event["category"] = default_values["category"]
self.add_event(**event)

View File

@ -32,6 +32,7 @@ class FacebookEvent:
"price_info",
"cover_media_renderer",
"id",
"parent_if_exists_or_self",
"day_time_sentence",
"event_place",
"comet_neighboring_siblings",
@ -52,12 +53,20 @@ class FacebookEvent:
"event_place": {"event_place_name": ["name"]},
}
def __init__(self, i, event):
self.fragments = {}
self.elements = {}
self.neighbor_events = None
self.possible_end_timestamp = []
self.add_fragment(i, event)
def __init__(self, *args):
if len(args) == 1:
other = args[0]
self.fragments = other.fragments
self.elements = other.elements
self.neighbor_events = None
else:
i = args[0]
event = args[1]
self.fragments = {}
self.elements = {}
self.neighbor_events = None
self.possible_end_timestamp = []
self.add_fragment(i, event)
def get_element(self, key):
return self.elements[key] if key in self.elements else None
@ -177,8 +186,6 @@ class FacebookEvent:
return event
def build_event(self, url):
self.get_element("image")
return {
"title": self.get_element("name"),
"category": None,
@ -195,6 +202,26 @@ class FacebookEvent:
"image_alt": self.get_element("image"),
}
def get_parent_id(self):
return self.get_element("parent_if_exists_or_self")["id"]
def build_events(self, url):
if self.neighbor_events is None or len(self.neighbor_events) == 0:
return [self.build_event(url)]
else:
url_base = "https://www.facebook.com/events/" + self.get_parent_id() + "/"
result = []
for nb_e in self.neighbor_events:
# we create a copy of the event
clone = FacebookEvent(self)
# we set start and end timestamp accordnig to the neighbor
clone.elements["start_timestamp"] = nb_e.elements["start_timestamp"]
clone.elements["end_timestamp"] = nb_e.elements["end_timestamp"]
## we generate the event
result.append(clone.build_event(url_base + nb_e.elements["id"] + "/"))
return result
class FacebookEventExtractor(Extractor):
@ -230,13 +257,13 @@ class FacebookEventExtractor(Extractor):
if fevent is not None:
self.set_header(url)
event = fevent.build_event(url)
logger.warning("published: " + str(published))
event["published"] = published
for event in fevent.build_events(url):
logger.warning("published: " + str(published))
event["published"] = published
if "category" in default_values:
event["category"] = default_values["category"]
self.add_event(**event)
if "category" in default_values:
event["category"] = default_values["category"]
self.add_event(**event)
return self.get_structure()