diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py index 22dbb3d..d74c1c2 100644 --- a/src/agenda_culturel/import_tasks/extractor_facebook.py +++ b/src/agenda_culturel/import_tasks/extractor_facebook.py @@ -23,6 +23,7 @@ class SimpleFacebookEvent: class FacebookEvent: name = "event" + # keys corresponds to javascript elements that are containing interesting data keys = [ [ "start_time_formatted", @@ -40,17 +41,22 @@ class FacebookEvent: ["event_description"], ["start_timestamp", "end_timestamp"], ] + # rules are defined by a sub-key within intersesting data where elements will be find + # each pair in the associated list is a key of our model and a path within FB data to + # get the corresponding field rules = { - "event_description": {"description": ["text"]}, - "cover_media_renderer": { - "image_alt": ["cover_photo", "photo", "accessibility_caption"], - "image": ["cover_photo", "photo", "full_image", "uri"], - }, - "event_creator": { - "event_creator_name": ["name"], - "event_creator_url": ["url"], - }, - "event_place": {"event_place_name": ["name"]}, + "event_description": [("description", ["text"])], + "cover_media_renderer": [ + ("image_alt", ["cover_photo", "photo", "accessibility_caption"]), + ("image", ["cover_photo", "photo", "full_image", "uri"]), + ("image", ["cover_media", 0, "full_image", "uri"]), + ("image_alt", ["cover_media", 0, "accessibility_caption"]), + ], + "event_creator": + [("event_creator_name", ["name"]), + ("event_creator_url", ["url"]), + ], + "event_place": [("event_place_name", ["name"])], } def __init__(self, *args): @@ -98,16 +104,15 @@ class FacebookEvent: if k == "comet_neighboring_siblings": self.get_neighbor_events(event[k]) elif k in FacebookEvent.rules: - for nk, rule in FacebookEvent.rules[ - k - ].items(): + for nk, rule in FacebookEvent.rules[k]: error = False c = event[k] for ki in rule: - if c is not None: + if c is not None and ki in c or (isinstance(c, list) and ki < len(c)): c = c[ki] else: error = True + break if not error: self.elements[nk] = c else: