des expérimentations pour avoir un meilleur import des événements facebook

This commit is contained in:
Jean-Marie Favreau 2023-11-11 16:55:04 +01:00
parent c1c4eac147
commit b57928664c

View File

@ -8,37 +8,100 @@ from selenium import webdriver
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import json import json
class SimpleEvent:
def __init__(self, data):
self.elements = {}
for key in ["id", "start_timestamp", "end_timestamp"]:
self.elements[key] = data[key] if key in data else None
if "parent_event" in data:
self.parent = SimpleEvent(data["parent_event"])
class Event: class Event:
name = "event" name = "event"
keys = ["start_time_formatted", 'start_timestamp', 'is_past', "name", "price_info", "cover_media_renderer", "event_creator", "id", "day_time_sentence", "event_place", "comet_neighboring_siblings"] keys = [
["start_time_formatted", 'start_timestamp',
'is_past',
"name",
"price_info",
"cover_media_renderer",
"event_creator",
"id",
"day_time_sentence",
"event_place",
"comet_neighboring_siblings"],
["event_description"]
]
rules = {
"event_description": { "description": ["text"]},
"cover_media_renderer": {"image_alt": ["cover_photo", "photo", "accessibility_caption"], "image": ["cover_photo", "photo", "full_image", "uri"]},
"event_creator": { "event_creator_name": ["name"], "event_creator_url": ["url"] },
"event_place": {"event_place_name": ["name"] }
}
def __init__(self, event): def __init__(self, i, event):
self.data = event self.fragments = {}
self.elements = {}
self.neighbor_events = None
self.add_fragment(i, event)
def add_fragment(self, i, event):
self.fragments[i] = event
for k in Event.keys[i]:
if k == "comet_neighboring_siblings":
self.get_neighbor_events(event[k])
elif k in Event.rules:
for nk, rule in Event.rules[k].items():
c = event[k]
for ki in rule:
c = c[ki]
self.elements[nk] = c
else:
self.elements[k] = event[k]
def get_neighbor_events(self, data):
self.neighbor_events = [SimpleEvent(d) for d in data]
def __str__(self): def __str__(self):
return self.data["name"] return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events])
def find_event_in_array(array): def consolidate_current_event(self):
if self.neighbor_events is not None and "id" in self.elements:
id = self.elements["id"]
for ne in self.neighbor_events:
if ne.elements["id"] == id:
self.elements["end_timestamp"] = ne.elements["end_timestamp"]
def find_event_fragment_in_array(array, event):
if isinstance(array, dict): if isinstance(array, dict):
#print([k for k in array])
if len(Event.keys) == len([k for k in Event.keys if k in array]): for i, ks in enumerate(Event.keys):
return Event(array) if len(ks) == len([k for k in ks if k in array]):
else: if event is None:
for k in array: event = Event(i, array,)
v = Event.find_event_in_array(array[k]) else:
if v != None: event.add_fragment(i, array)
return v else:
for k in array:
event = Event.find_event_fragment_in_array(array[k], event)
elif isinstance(array, list): elif isinstance(array, list):
for e in array: for e in array:
v = Event.find_event_in_array(e) event = Event.find_event_fragment_in_array(e, event)
if v != None:
return v if event is not None:
return None event.consolidate_current_event()
return event
#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315" #url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
@ -53,7 +116,7 @@ hash = result.hexdigest()
filename = os.path.join(cachedir, hash + ".html") filename = os.path.join(cachedir, hash + ".html")
if os.path.isfile(filename): if os.path.isfile(filename):
#print("Use cache") # print("Use cache")
with open(filename) as f: with open(filename) as f:
doc = "\n".join(f.readlines()) doc = "\n".join(f.readlines())
else: else:
@ -77,9 +140,12 @@ else:
soup = BeautifulSoup(doc) soup = BeautifulSoup(doc)
event = None
for json_script in soup.find_all('script', type="application/json"): for json_script in soup.find_all('script', type="application/json"):
json_txt = json_script.get_text() json_txt = json_script.get_text()
json_struct = json.loads(json_txt) json_struct = json.loads(json_txt)
event = Event.find_event_in_array(json_struct)
if event != None: event = Event.find_event_fragment_in_array(json_struct, event)
print(event)
print(event)