Ajout d'un script qui arrive à récupérer les infos d'un événement depuis son URL facebook (et c'était chaud)

2023-10-15 16:22:52 +02:00 · 2023-10-15 16:22:52 +02:00 · a21b9d030e
commit a21b9d030e
parent 380271b48d
1 changed files with 85 additions and 0 deletions
--- a/experimentations/get_facebook_event.py
+++ b/experimentations/get_facebook_event.py
@ -0,0 +1,85 @@
+#!/usr/bin/python3
+# coding: utf-8
+
+import requests
+import hashlib
+import os
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+
+from bs4 import BeautifulSoup
+
+import json
+
+class Event:
+
+    name = "event"
+    keys = ["start_time_formatted", 'start_timestamp', 'is_past', "name", "price_info", "cover_media_renderer", "event_creator", "id", "day_time_sentence", "event_place", "comet_neighboring_siblings"]
+
+    def __init__(self, event):
+        self.data = event
+
+    def __str__(self):
+        return self.data["name"]
+
+    def find_event_in_array(array):
+        if isinstance(array, dict):
+            #print([k for k in array])
+            if len(Event.keys) == len([k for k in Event.keys if k in array]):
+                return Event(array)
+            else:
+                for k in array:
+                    v = Event.find_event_in_array(array[k])
+                    if v != None:
+                        return v
+        elif isinstance(array, list):
+            for e in array:
+                    v = Event.find_event_in_array(e)
+                    if v != None:
+                        return v
+        return None
+
+
+#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
+url="https://www.facebook.com/events/2294199997432316/2294200007432315/"
+#url_cal = "https://www.facebook.com/events/ical/export/?eid=993406668581410"
+#url="https://jmtrivial.info"
+
+cachedir = "cache"
+result = hashlib.md5(url.encode())
+hash = result.hexdigest()
+
+filename = os.path.join(cachedir, hash + ".html")
+
+if os.path.isfile(filename):
+    #print("Use cache")
+    with open(filename) as f:
+        doc = "\n".join(f.readlines())
+else:
+    print("Download page")
+
+    options = Options()
+    options.add_argument("--headless=new")
+    service = Service("/usr/bin/chromedriver")
+
+    driver = webdriver.Chrome(service=service, options=options)
+    driver.get(url)
+    doc = driver.page_source
+    driver.quit()
+
+    dir = os.path.dirname(filename)
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+    with open(filename, "w") as text_file:
+        text_file.write(doc)
+
+
+soup = BeautifulSoup(doc)
+
+for json_script in soup.find_all('script', type="application/json"):
+    json_txt = json_script.get_text()
+    json_struct = json.loads(json_txt)
+    event = Event.find_event_in_array(json_struct)
+    if event != None:
+        print(event)