diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 75d7aa8..09b136c 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -98,6 +98,8 @@ def run_recurrent_import(self, pk): extractor = ICALExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.ICALNOBUSY: extractor = ICALNoBusyExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.ICALNOVC: + extractor = ICALNoVCExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/extractor_ical.py b/src/agenda_culturel/import_tasks/extractor_ical.py index d7eb14b..8f825dd 100644 --- a/src/agenda_culturel/import_tasks/extractor_ical.py +++ b/src/agenda_culturel/import_tasks/extractor_ical.py @@ -2,11 +2,18 @@ import icalendar import warnings from icalendar import vDatetime +import bbcode + from datetime import datetime, date from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from .extractor import * +from celery.utils.log import get_task_logger + +logger = get_task_logger(__name__) + + class ICALExtractor(Extractor): @@ -112,9 +119,32 @@ class ICALExtractor(Extractor): return self.get_structure() +# A variation on ICAL extractor that removes any even named "Busy" class ICALNoBusyExtractor(ICALExtractor): def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None): if title != 'Busy': super().add_event(title, category, start_day, location, description, tags, uuid, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt) + +# A variation on ICAL extractor that remove any visual composer anchors +class ICALNoVCExtractor(ICALExtractor): + + def __init__(self): + self.parser = bbcode.Parser(newline="\n", drop_unrecognized=True, install_defaults=False) + self.parser.add_simple_formatter("vc_row", "%(value)s") + self.parser.add_simple_formatter("vc_column", "%(value)s") + self.parser.add_simple_formatter("vc_column_text", "%(value)s") + self.parser.add_simple_formatter("vc_raw_html", "") + super().__init__() + + def clean_vc(self, text): + if text is None: + return text + else: + result = self.parser.format(text) + logger.warning(result) + return result + + def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None): + super().add_event(title, category, start_day, location, self.clean_vc(description), tags, uuid, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt) \ No newline at end of file diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index b2be653..ec48de1 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -717,6 +717,7 @@ class RecurrentImport(models.Model): class PROCESSOR(models.TextChoices): ICAL = "ical", _("ical") ICALNOBUSY = "icalnobusy", _("ical no busy") + ICALNOVC = "icalnovc", _("ical no VC") class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple") diff --git a/src/requirements.txt b/src/requirements.txt index 62a4233..bf62e49 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -35,4 +35,5 @@ django-ckeditor==6.7.0 django-recurrence==1.11.1 icalendar==5.0.11 lxml==5.1.0 +bbcode==1.1.0