Extraction des informations depuis le ical
This commit is contained in:
parent
7a0cd494a2
commit
fed61a8589
@ -10,6 +10,7 @@ from selenium import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import icalendar
|
||||
from datetime import datetime, date
|
||||
|
||||
|
||||
|
||||
@ -61,27 +62,127 @@ class ChromiumHeadlessDownloader(Downloader):
|
||||
class Extractor(ABC):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
self.header = {}
|
||||
self.events = []
|
||||
|
||||
@abstractmethod
|
||||
def extract(self, content, url, url_human = None):
|
||||
pass
|
||||
|
||||
def set_header(self, url):
|
||||
self.header["url"] = url
|
||||
self.header["date"] = datetime.now()
|
||||
|
||||
def clear_events(self):
|
||||
self.events = []
|
||||
|
||||
def add_event(self, title, category, start_day, location, description, tags, url=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None):
|
||||
if title is None:
|
||||
print("ERROR: cannot import an event without name")
|
||||
return
|
||||
if start_day is None:
|
||||
print("ERROR: cannot import an event without start day")
|
||||
return
|
||||
|
||||
event = {
|
||||
"title": title,
|
||||
"category": category,
|
||||
"start_day": start_day,
|
||||
"location": location,
|
||||
"descritpion": description,
|
||||
"tags": tags
|
||||
}
|
||||
if url is not None:
|
||||
event["url"] = url
|
||||
if url_human is not None:
|
||||
event["url_human"] = url_human
|
||||
if start_time is not None:
|
||||
event["start_time"] = start_time
|
||||
if end_day is not None:
|
||||
event["end_day"] = end_day
|
||||
if end_time is not None:
|
||||
event["end_time"] = end_time
|
||||
|
||||
if last_modified is not None:
|
||||
event["last_modified"] = last_modified
|
||||
|
||||
self.events.append(event)
|
||||
|
||||
def default_value_if_exists(self, default_values, key):
|
||||
return default_values[key] if default_values is not None and key in default_values else None
|
||||
|
||||
def get_structure(self):
|
||||
return { "header": self.header, "events": self.events}
|
||||
|
||||
class ICALExtractor(Extractor):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def extract(self, content, url, url_human = None):
|
||||
def get_item_from_vevent(self, event, name, raw = False):
|
||||
try:
|
||||
r = event.decoded(name)
|
||||
if raw:
|
||||
return r
|
||||
else:
|
||||
return r.decode()
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_dt_item_from_vevent(self, event, name):
|
||||
item = self.get_item_from_vevent(event, name, raw = True)
|
||||
|
||||
day = None
|
||||
time = None
|
||||
|
||||
if item is not None:
|
||||
if isinstance(item, datetime):
|
||||
day = item.date()
|
||||
time = item.time()
|
||||
elif isinstance(item, date):
|
||||
day = item
|
||||
time = None
|
||||
|
||||
return day, time
|
||||
|
||||
|
||||
def extract(self, content, url, url_human = None, default_values = None):
|
||||
print("Extracting ical events from {}".format(url))
|
||||
self.set_header(url)
|
||||
self.clear_events()
|
||||
|
||||
calendar = icalendar.Calendar.from_ical(content)
|
||||
|
||||
for event in calendar.walk('VEVENT'):
|
||||
print(event)
|
||||
title = self.get_item_from_vevent(event, "SUMMARY")
|
||||
category = self.default_value_if_exists(default_values, "category")
|
||||
|
||||
return []
|
||||
start_day, start_time = self.get_dt_item_from_vevent(event, "DTSTART")
|
||||
|
||||
end_day, end_time = self.get_dt_item_from_vevent(event, "DTEND")
|
||||
|
||||
location = self.default_value_if_exists(default_values, "location")
|
||||
|
||||
description = self.get_item_from_vevent(event, "DESCRIPTION")
|
||||
|
||||
last_modified = self.get_item_from_vevent(event, "LAST_MODIFIED")
|
||||
|
||||
uuid = self.get_item_from_vevent(event, "UID")
|
||||
|
||||
if uuid is not None:
|
||||
event_url = url + "#" + uuid
|
||||
|
||||
tags = self.default_value_if_exists(default_values, "tags")
|
||||
|
||||
last_modified = self.get_item_from_vevent(event, "LAST-MODIFIED", raw = True)
|
||||
|
||||
rrule = self.get_item_from_vevent(event, "RRULE", raw = True)
|
||||
if rrule is not None:
|
||||
print("Recurrent event not yet supported", rrule)
|
||||
|
||||
self.add_event(title, category, start_day, location, description, tags, url=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified)
|
||||
|
||||
return self.get_structure()
|
||||
|
||||
|
||||
|
||||
@ -92,8 +193,7 @@ class URL2Events:
|
||||
self.downloader = downloader
|
||||
self.extractor = extractor
|
||||
|
||||
|
||||
def process(self, url, url_human = None, cache = None):
|
||||
def process(self, url, url_human = None, cache = None, default_values = None):
|
||||
|
||||
if cache and os.path.exists(cache):
|
||||
print("Loading cache ({})".format(cache))
|
||||
@ -110,7 +210,7 @@ class URL2Events:
|
||||
with open(cache, "w") as text_file:
|
||||
text_file.write(content)
|
||||
|
||||
return self.extractor.extract(content, url, url_human)
|
||||
return self.extractor.extract(content, url, url_human, default_values)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -119,6 +219,6 @@ if __name__ == "__main__":
|
||||
url = "https://calendar.google.com/calendar/ical/programmation.lesaugustes%40gmail.com/public/basic.ics"
|
||||
url_human = "https://www.cafelesaugustes.fr/la-programmation/"
|
||||
|
||||
events = u2e.process(url, url_human, cache = "cache-augustes.ical")
|
||||
events = u2e.process(url, url_human, cache = "cache-augustes.ical", default_values = {"category": "Autre", "location": "Café lecture les Augustes"})
|
||||
|
||||
print(events)
|
||||
#print(events)
|
Loading…
Reference in New Issue
Block a user