Ajout d'un script qui arrive à récupérer les infos d'un événement depuis son URL facebook (et c'était chaud)
This commit is contained in:
parent
380271b48d
commit
a21b9d030e
85
experimentations/get_facebook_event.py
Executable file
85
experimentations/get_facebook_event.py
Executable file
@ -0,0 +1,85 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.service import Service
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
class Event:
|
||||||
|
|
||||||
|
name = "event"
|
||||||
|
keys = ["start_time_formatted", 'start_timestamp', 'is_past', "name", "price_info", "cover_media_renderer", "event_creator", "id", "day_time_sentence", "event_place", "comet_neighboring_siblings"]
|
||||||
|
|
||||||
|
def __init__(self, event):
|
||||||
|
self.data = event
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.data["name"]
|
||||||
|
|
||||||
|
def find_event_in_array(array):
|
||||||
|
if isinstance(array, dict):
|
||||||
|
#print([k for k in array])
|
||||||
|
if len(Event.keys) == len([k for k in Event.keys if k in array]):
|
||||||
|
return Event(array)
|
||||||
|
else:
|
||||||
|
for k in array:
|
||||||
|
v = Event.find_event_in_array(array[k])
|
||||||
|
if v != None:
|
||||||
|
return v
|
||||||
|
elif isinstance(array, list):
|
||||||
|
for e in array:
|
||||||
|
v = Event.find_event_in_array(e)
|
||||||
|
if v != None:
|
||||||
|
return v
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
|
||||||
|
url="https://www.facebook.com/events/2294199997432316/2294200007432315/"
|
||||||
|
#url_cal = "https://www.facebook.com/events/ical/export/?eid=993406668581410"
|
||||||
|
#url="https://jmtrivial.info"
|
||||||
|
|
||||||
|
cachedir = "cache"
|
||||||
|
result = hashlib.md5(url.encode())
|
||||||
|
hash = result.hexdigest()
|
||||||
|
|
||||||
|
filename = os.path.join(cachedir, hash + ".html")
|
||||||
|
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
#print("Use cache")
|
||||||
|
with open(filename) as f:
|
||||||
|
doc = "\n".join(f.readlines())
|
||||||
|
else:
|
||||||
|
print("Download page")
|
||||||
|
|
||||||
|
options = Options()
|
||||||
|
options.add_argument("--headless=new")
|
||||||
|
service = Service("/usr/bin/chromedriver")
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(service=service, options=options)
|
||||||
|
driver.get(url)
|
||||||
|
doc = driver.page_source
|
||||||
|
driver.quit()
|
||||||
|
|
||||||
|
dir = os.path.dirname(filename)
|
||||||
|
if not os.path.exists(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
with open(filename, "w") as text_file:
|
||||||
|
text_file.write(doc)
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(doc)
|
||||||
|
|
||||||
|
for json_script in soup.find_all('script', type="application/json"):
|
||||||
|
json_txt = json_script.get_text()
|
||||||
|
json_struct = json.loads(json_txt)
|
||||||
|
event = Event.find_event_in_array(json_struct)
|
||||||
|
if event != None:
|
||||||
|
print(event)
|
Loading…
Reference in New Issue
Block a user