124 lines
2.9 KiB
Python
124 lines
2.9 KiB
Python
|
#!/usr/bin/python3
|
||
|
# coding: utf-8
|
||
|
|
||
|
|
||
|
from abc import ABC, abstractmethod
|
||
|
from urllib.parse import urlparse
|
||
|
import urllib.request
|
||
|
import os
|
||
|
from selenium import webdriver
|
||
|
from selenium.webdriver.chrome.service import Service
|
||
|
from selenium.webdriver.chrome.options import Options
|
||
|
import icalendar
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
class Downloader(ABC):
|
||
|
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
|
||
|
@abstractmethod
|
||
|
def download(self, url):
|
||
|
pass
|
||
|
|
||
|
class SimpleDownloader(Downloader):
|
||
|
|
||
|
def __init__(self):
|
||
|
super().__init__()
|
||
|
|
||
|
|
||
|
def download(self, url):
|
||
|
print("Downloading {}".format(url))
|
||
|
|
||
|
try:
|
||
|
resource = urllib.request.urlopen(url)
|
||
|
data = resource.read().decode(resource.headers.get_content_charset())
|
||
|
return data
|
||
|
except:
|
||
|
return None
|
||
|
|
||
|
|
||
|
|
||
|
class ChromiumHeadlessDownloader(Downloader):
|
||
|
|
||
|
def __init__(self):
|
||
|
super().__init__()
|
||
|
options = Options()
|
||
|
options.add_argument("--headless=new")
|
||
|
service = Service("/usr/bin/chromedriver")
|
||
|
self.driver = webdriver.Chrome(service=service, options=options)
|
||
|
|
||
|
|
||
|
def download(self, url):
|
||
|
print("Download {}".format(url))
|
||
|
|
||
|
self.driver.get(url)
|
||
|
return driver.page_source
|
||
|
|
||
|
|
||
|
class Extractor(ABC):
|
||
|
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
|
||
|
@abstractmethod
|
||
|
def extract(self, content, url, url_human = None):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class ICALExtractor(Extractor):
|
||
|
|
||
|
def __init__(self):
|
||
|
super().__init__()
|
||
|
|
||
|
def extract(self, content, url, url_human = None):
|
||
|
print("Extracting ical events from {}".format(url))
|
||
|
|
||
|
calendar = icalendar.Calendar.from_ical(content)
|
||
|
|
||
|
for event in calendar.walk('VEVENT'):
|
||
|
print(event)
|
||
|
|
||
|
return []
|
||
|
|
||
|
|
||
|
|
||
|
class URL2Events:
|
||
|
|
||
|
def __init__(self, downloader, extractor):
|
||
|
|
||
|
self.downloader = downloader
|
||
|
self.extractor = extractor
|
||
|
|
||
|
|
||
|
def process(self, url, url_human = None, cache = None):
|
||
|
|
||
|
if cache and os.path.exists(cache):
|
||
|
print("Loading cache ({})".format(cache))
|
||
|
with open(cache) as f:
|
||
|
content = "\n".join(f.readlines())
|
||
|
else:
|
||
|
content = self.downloader.download(url)
|
||
|
|
||
|
if cache:
|
||
|
print("Saving cache ({})".format(cache))
|
||
|
dir = os.path.dirname(cache)
|
||
|
if dir != "" and not os.path.exists(dir):
|
||
|
os.makedirs(dir)
|
||
|
with open(cache, "w") as text_file:
|
||
|
text_file.write(content)
|
||
|
|
||
|
return self.extractor.extract(content, url, url_human)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
|
||
|
u2e = URL2Events(SimpleDownloader(), ICALExtractor())
|
||
|
url = "https://calendar.google.com/calendar/ical/programmation.lesaugustes%40gmail.com/public/basic.ics"
|
||
|
url_human = "https://www.cafelesaugustes.fr/la-programmation/"
|
||
|
|
||
|
events = u2e.process(url, url_human, cache = "cache-augustes.ical")
|
||
|
|
||
|
print(events)
|