On introduit un outil pour importer les événements depuis une page

qui a un lien google agenda (ou équivalent)
This commit is contained in:
Jean-Marie Favreau 2024-09-01 19:00:29 +02:00
parent 2dde114ad8
commit 9b429f6951
4 changed files with 111 additions and 6 deletions

View File

@ -199,7 +199,10 @@ class Extractor(ABC):
)
def get_structure(self):
return {"header": self.header, "events": self.events}
if len(self.events) == 0:
return None
else:
return {"header": self.header, "events": self.events}
def clean_url(url):
from .extractor_ical import ICALExtractor
@ -213,8 +216,9 @@ class Extractor(ABC):
def get_default_extractors(single_event=False):
from .extractor_ical import ICALExtractor
from .extractor_facebook import FacebookEventExtractor
from .extractor_ggcal_link import GoogleCalendarLinkEventExtractor
if single_event:
return [FacebookEventExtractor(single_event=True)]
return [FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()]
else:
return [ICALExtractor(), FacebookEventExtractor(single_event=False)]
return [ICALExtractor(), FacebookEventExtractor(), GoogleCalendarLinkEventExtractor()]

View File

@ -225,8 +225,7 @@ class FacebookEvent:
class FacebookEventExtractor(Extractor):
def __init__(self, single_event=False):
self.single_event = single_event
def __init__(self):
super().__init__()
def clean_url(url):

View File

@ -0,0 +1,65 @@
from datetime import datetime
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from .extractor import *
from .generic_extractors import *
import json
import logging
logger = logging.getLogger(__name__)
class GoogleCalendarLinkEventExtractor(Extractor):
def __init__(self):
super().__init__()
self.possible_urls = ["https://calendar.google.com/calendar/", "https://addtocalendar.com/"]
def extract(
self, content, url, url_human=None, default_values=None, published=False
):
# default_values are not used
soup = BeautifulSoup(content, "html.parser")
for ggu in self.possible_urls:
link_calendar = soup.select('a[href^="' + ggu + '"]')
if len(link_calendar) != 0:
gg_cal = GGCalendar(link_calendar[0]["href"])
if gg_cal.is_valid_event():
start_day = gg_cal.start_day
start_time = gg_cal.start_time
description = gg_cal.description.replace(' ', '')
end_day = gg_cal.end_day
end_time = gg_cal.end_time
location = gg_cal.location
title = gg_cal.title
url_human = url
self.set_header(url)
self.add_event(
title=title,
category=None,
start_day=start_day,
location=location,
description=description,
tags=None,
uuids=[url],
recurrences=None,
url_human=url_human,
start_time=start_time,
end_day=end_day,
end_time=end_time,
published=published,
image=None,
)
break
logger.warning("ça marche")
return self.get_structure()

View File

@ -18,12 +18,32 @@ class GGCalendar:
self.url = url
self.extract_info()
def filter_keys(params):
result = {}
for k, v in params.items():
if k.startswith('e[0]'):
result[k.replace('e[0][', '')[:-1]] = v
else:
result[k] = v
return result
def is_valid_event(self):
return self.start_day is not None and self.title is not None
def extract_info(self):
parsed_url = urlparse(self.url.replace("#", "%23"))
params = parse_qs(parsed_url.query)
params = GGCalendar.filter_keys(params)
logger.warning(self.url)
logger.warning(params)
self.location = params["location"][0] if "location" in params else None
self.title = params["text"][0] if "text" in params else params["title"][0] if "title" in params else None
self.description = params["description"][0] if "description" in params else None
self.location = params["location"][0] if "location" in params else None
self.title = params["text"][0] if "text" in params else None
if "dates" in params:
dates = [x.replace(" ", "+") for x in params["dates"][0].split("/")]
if len(dates) > 0:
@ -37,7 +57,24 @@ class GGCalendar:
else:
self.end_day = None
self.end_time = None
elif "date_start" in params:
date = parser.parse(params["date_start"][0])
self.start_day = date.date()
self.start_time = date.time()
if "date_end" in params:
dateend = parser.parse(params["date_end"][0])
if dateend != date:
self.end_day = dateend.date()
self.end_time = dateend.time()
else:
self.end_day = None
self.end_time = None
if self.start_time == datetime.time(0):
self.start_time = None
else:
self.end_day = None
self.end_time = None
else:
raise Exception("Unable to find a date in google calendar URL")
self.start_day = None