des expérimentations pour avoir un meilleur import des événements facebook
This commit is contained in:
parent
c1c4eac147
commit
b57928664c
@ -8,37 +8,100 @@ from selenium import webdriver
|
|||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
class SimpleEvent:
|
||||||
|
|
||||||
|
def __init__(self, data):
|
||||||
|
self.elements = {}
|
||||||
|
|
||||||
|
for key in ["id", "start_timestamp", "end_timestamp"]:
|
||||||
|
self.elements[key] = data[key] if key in data else None
|
||||||
|
|
||||||
|
if "parent_event" in data:
|
||||||
|
self.parent = SimpleEvent(data["parent_event"])
|
||||||
|
|
||||||
|
|
||||||
class Event:
|
class Event:
|
||||||
|
|
||||||
name = "event"
|
name = "event"
|
||||||
keys = ["start_time_formatted", 'start_timestamp', 'is_past', "name", "price_info", "cover_media_renderer", "event_creator", "id", "day_time_sentence", "event_place", "comet_neighboring_siblings"]
|
keys = [
|
||||||
|
["start_time_formatted", 'start_timestamp',
|
||||||
|
'is_past',
|
||||||
|
"name",
|
||||||
|
"price_info",
|
||||||
|
"cover_media_renderer",
|
||||||
|
"event_creator",
|
||||||
|
"id",
|
||||||
|
"day_time_sentence",
|
||||||
|
"event_place",
|
||||||
|
"comet_neighboring_siblings"],
|
||||||
|
["event_description"]
|
||||||
|
]
|
||||||
|
rules = {
|
||||||
|
"event_description": { "description": ["text"]},
|
||||||
|
"cover_media_renderer": {"image_alt": ["cover_photo", "photo", "accessibility_caption"], "image": ["cover_photo", "photo", "full_image", "uri"]},
|
||||||
|
"event_creator": { "event_creator_name": ["name"], "event_creator_url": ["url"] },
|
||||||
|
"event_place": {"event_place_name": ["name"] }
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(self, event):
|
def __init__(self, i, event):
|
||||||
self.data = event
|
self.fragments = {}
|
||||||
|
self.elements = {}
|
||||||
|
self.neighbor_events = None
|
||||||
|
self.add_fragment(i, event)
|
||||||
|
|
||||||
|
def add_fragment(self, i, event):
|
||||||
|
self.fragments[i] = event
|
||||||
|
|
||||||
|
for k in Event.keys[i]:
|
||||||
|
if k == "comet_neighboring_siblings":
|
||||||
|
self.get_neighbor_events(event[k])
|
||||||
|
elif k in Event.rules:
|
||||||
|
for nk, rule in Event.rules[k].items():
|
||||||
|
c = event[k]
|
||||||
|
for ki in rule:
|
||||||
|
c = c[ki]
|
||||||
|
self.elements[nk] = c
|
||||||
|
else:
|
||||||
|
self.elements[k] = event[k]
|
||||||
|
|
||||||
|
|
||||||
|
def get_neighbor_events(self, data):
|
||||||
|
self.neighbor_events = [SimpleEvent(d) for d in data]
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.data["name"]
|
return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events])
|
||||||
|
|
||||||
def find_event_in_array(array):
|
def consolidate_current_event(self):
|
||||||
|
if self.neighbor_events is not None and "id" in self.elements:
|
||||||
|
id = self.elements["id"]
|
||||||
|
for ne in self.neighbor_events:
|
||||||
|
if ne.elements["id"] == id:
|
||||||
|
self.elements["end_timestamp"] = ne.elements["end_timestamp"]
|
||||||
|
|
||||||
|
def find_event_fragment_in_array(array, event):
|
||||||
if isinstance(array, dict):
|
if isinstance(array, dict):
|
||||||
#print([k for k in array])
|
|
||||||
if len(Event.keys) == len([k for k in Event.keys if k in array]):
|
for i, ks in enumerate(Event.keys):
|
||||||
return Event(array)
|
if len(ks) == len([k for k in ks if k in array]):
|
||||||
else:
|
if event is None:
|
||||||
for k in array:
|
event = Event(i, array,)
|
||||||
v = Event.find_event_in_array(array[k])
|
else:
|
||||||
if v != None:
|
event.add_fragment(i, array)
|
||||||
return v
|
else:
|
||||||
|
for k in array:
|
||||||
|
event = Event.find_event_fragment_in_array(array[k], event)
|
||||||
elif isinstance(array, list):
|
elif isinstance(array, list):
|
||||||
for e in array:
|
for e in array:
|
||||||
v = Event.find_event_in_array(e)
|
event = Event.find_event_fragment_in_array(e, event)
|
||||||
if v != None:
|
|
||||||
return v
|
if event is not None:
|
||||||
return None
|
event.consolidate_current_event()
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
|
#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
|
||||||
@ -53,7 +116,7 @@ hash = result.hexdigest()
|
|||||||
filename = os.path.join(cachedir, hash + ".html")
|
filename = os.path.join(cachedir, hash + ".html")
|
||||||
|
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
#print("Use cache")
|
# print("Use cache")
|
||||||
with open(filename) as f:
|
with open(filename) as f:
|
||||||
doc = "\n".join(f.readlines())
|
doc = "\n".join(f.readlines())
|
||||||
else:
|
else:
|
||||||
@ -77,9 +140,12 @@ else:
|
|||||||
|
|
||||||
soup = BeautifulSoup(doc)
|
soup = BeautifulSoup(doc)
|
||||||
|
|
||||||
|
event = None
|
||||||
for json_script in soup.find_all('script', type="application/json"):
|
for json_script in soup.find_all('script', type="application/json"):
|
||||||
json_txt = json_script.get_text()
|
json_txt = json_script.get_text()
|
||||||
json_struct = json.loads(json_txt)
|
json_struct = json.loads(json_txt)
|
||||||
event = Event.find_event_in_array(json_struct)
|
|
||||||
if event != None:
|
event = Event.find_event_fragment_in_array(json_struct, event)
|
||||||
print(event)
|
|
||||||
|
print(event)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user