162 lines
6.1 KiB
Python
162 lines
6.1 KiB
Python
import icalendar
|
|
import warnings
|
|
|
|
from icalendar import vDatetime
|
|
import bbcode
|
|
|
|
from datetime import datetime, date
|
|
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
|
|
|
from .extractor import *
|
|
|
|
from celery.utils.log import get_task_logger
|
|
|
|
logger = get_task_logger(__name__)
|
|
|
|
|
|
|
|
class ICALExtractor(Extractor):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def get_item_from_vevent(self, event, name, raw = False):
|
|
try:
|
|
r = event.decoded(name)
|
|
if raw:
|
|
return r
|
|
else:
|
|
return r.decode()
|
|
except:
|
|
return None
|
|
|
|
def get_dt_item_from_vevent(self, event, name):
|
|
item = self.get_item_from_vevent(event, name, raw = True)
|
|
|
|
day = None
|
|
time = None
|
|
|
|
if item is not None:
|
|
if isinstance(item, datetime):
|
|
day = item.date()
|
|
time = item.time()
|
|
elif isinstance(item, date):
|
|
day = item
|
|
time = None
|
|
|
|
return day, time
|
|
|
|
def clean_url(url):
|
|
return url
|
|
|
|
|
|
def extract(self, content, url, url_human = None, default_values = None, published = False):
|
|
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
|
|
|
|
print("Extracting ical events from {}".format(url))
|
|
self.set_header(url)
|
|
self.clear_events()
|
|
self.uuids = {}
|
|
|
|
calendar = icalendar.Calendar.from_ical(content)
|
|
|
|
for event in calendar.walk('VEVENT'):
|
|
title = self.get_item_from_vevent(event, "SUMMARY")
|
|
category = self.default_value_if_exists(default_values, "category")
|
|
|
|
start_day, start_time = self.get_dt_item_from_vevent(event, "DTSTART")
|
|
|
|
end_day, end_time = self.get_dt_item_from_vevent(event, "DTEND")
|
|
|
|
location = self.get_item_from_vevent(event, "LOCATION")
|
|
if location is None:
|
|
location = self.default_value_if_exists(default_values, "location")
|
|
|
|
description = self.get_item_from_vevent(event, "DESCRIPTION")
|
|
if description is not None:
|
|
soup = BeautifulSoup(description, features="lxml")
|
|
delimiter = '\n'
|
|
for line_break in soup.findAll('br'):
|
|
line_break.replaceWith(delimiter)
|
|
description = soup.get_text()
|
|
|
|
last_modified = self.get_item_from_vevent(event, "LAST_MODIFIED")
|
|
|
|
uuid = self.get_item_from_vevent(event, "UID")
|
|
|
|
if uuid is not None:
|
|
if uuid in self.uuids:
|
|
self.uuids[uuid] += 1
|
|
uuid += ":{:04}".format(self.uuids[uuid] - 1)
|
|
else:
|
|
self.uuids[uuid] = 1
|
|
event_url = url + "#" + uuid
|
|
|
|
uuidrel = None
|
|
related_to = self.get_item_from_vevent(event, "RELATED-TO")
|
|
if related_to is not None:
|
|
if related_to in self.uuids:
|
|
self.uuids[related_to] += 1
|
|
uuidrel = url + "#" + related_to + ":{:04}".format(self.uuids[related_to] - 1)
|
|
# possible limitation: if the ordering is not original then related
|
|
|
|
|
|
tags = self.default_value_if_exists(default_values, "tags")
|
|
|
|
last_modified = self.get_item_from_vevent(event, "LAST-MODIFIED", raw = True)
|
|
|
|
recurrence_entries = {}
|
|
for e in ["RRULE", "EXRULE", "EXDATE", "RDATE"]:
|
|
i = self.get_item_from_vevent(event, e, raw = True)
|
|
if i is not None:
|
|
recurrence_entries[e] = i
|
|
|
|
if start_day is not None and len(recurrence_entries) != 0:
|
|
recurrences = ""
|
|
|
|
for k, r in recurrence_entries.items():
|
|
if isinstance(r, list):
|
|
recurrences += "\n".join([k + ":" + e.to_ical().decode() for e in r]) + "\n"
|
|
else:
|
|
recurrences += k + ":" + r.to_ical().decode() + "\n"
|
|
else:
|
|
recurrences = None
|
|
|
|
if title is not None:
|
|
luuids = [event_url]
|
|
if uuidrel is not None:
|
|
luuids += [uuidrel]
|
|
self.add_event(title, category, start_day, location, description, tags, recurrences=recurrences, uuids=luuids, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified, published=published)
|
|
|
|
|
|
return self.get_structure()
|
|
|
|
|
|
# A variation on ICAL extractor that removes any even named "Busy"
|
|
class ICALNoBusyExtractor(ICALExtractor):
|
|
|
|
def add_event(self, title, category, start_day, location, description, tags, uuids, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None):
|
|
if title != 'Busy':
|
|
super().add_event(title, category, start_day, location, description, tags, uuids, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt)
|
|
|
|
|
|
# A variation on ICAL extractor that remove any visual composer anchors
|
|
class ICALNoVCExtractor(ICALExtractor):
|
|
|
|
def __init__(self):
|
|
self.parser = bbcode.Parser(newline="\n", drop_unrecognized=True, install_defaults=False)
|
|
self.parser.add_simple_formatter("vc_row", "%(value)s")
|
|
self.parser.add_simple_formatter("vc_column", "%(value)s")
|
|
self.parser.add_simple_formatter("vc_column_text", "%(value)s")
|
|
self.parser.add_simple_formatter("vc_raw_html", "")
|
|
super().__init__()
|
|
|
|
def clean_vc(self, text):
|
|
if text is None:
|
|
return text
|
|
else:
|
|
result = self.parser.format(text)
|
|
return result
|
|
|
|
def add_event(self, title, category, start_day, location, description, tags, uuids, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None):
|
|
super().add_event(title, category, start_day, location, self.clean_vc(description), tags, uuids, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt) |