diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 6c83ebc..dcbd4a1 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -130,6 +130,8 @@ def run_recurrent_import(self, pk): extractor = wordpress_mec.CExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.FBEVENTS: extractor = fbevents.CExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.C3C: + extractor = c3c.CExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/custom_extractors/c3c.py b/src/agenda_culturel/import_tasks/custom_extractors/c3c.py index 4281767..08c5048 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/c3c.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/c3c.py @@ -1,5 +1,6 @@ from ..generic_extractors import * from bs4 import BeautifulSoup +from datetime import timedelta # A class dedicated to get events from La Cour des 3 Coquins # URL: https://billetterie-c3c.clermont-ferrand.fr// @@ -46,56 +47,87 @@ class CExtractor(TwoStepsExtractor): else: image = None - description = soup.select_one(".presentation") - duree = soup.select_one("#criteres .DUREE-V .valeur-critere li") - if duree is not None: - duree = self.parse_french_time(duree.text) + description = soup.select_one(".presentation").get_text() + duration = soup.select_one("#criteres .DUREE-V .valeur-critere li") + if duration is not None: + duration = self.parse_french_time(duration.text) location = self.nom_lieu - tags = [] + categories = [] for t in soup.select(".sous-titre span"): classes = t.get("class") if classes and len(classes) > 0: if classes[0].startswith("LIEU-"): location = t.text elif classes[0].startswith("THEMATIQUE-"): - tag = self.category_c3c2agenda(t.text) - if tag is not None: - tags.append(tag) + cat = self.category_c3c2agenda(t.text) + if cat is not None: + categories.append(cat) # TODO: parser les dates, récupérer les heures () - dates = [self.parse_french_date(o.get("value")) for o in soup.select("select.datedleb_resa option")] - - - - print("EVENT ", event_url) - print("- ", title) - print("- ", image) - print("- ", len(description)) - print("- ", duree) - print("- ", location) - print("- ", tags) - print("- ", dates) - - return + dates = [o.get("value") for o in soup.select("select.datedleb_resa option")] + + patternCodeSite = re.compile(r'.*gsw_vars\["CODEPRESTATAIRE"\] = "(.*?)";.*', flags=re.DOTALL) + patternCodeObject = re.compile(r'.*gsw_vars\["CODEPRESTATION"\] = "(.*?)";.*', flags=re.DOTALL) + scripts = soup.find_all('script') + codeSite = "" + idObject = "" + for script in scripts: + if(patternCodeSite.match(str(script.string))): + data = patternCodeSite.match(script.string) + codeSite = data.groups()[0] + if(patternCodeObject.match(str(script.string))): + data = patternCodeObject.match(script.string) + idObject = data.groups()[0] - url_human = event_url + pause = self.downloader.pause + self.downloader.pause = False + # get exact schedule need two supplementary requests + datetimes = [] + if codeSite != "" and idObject != "": + for date in dates: + # the first page is required such that the server knows the selected date + page1 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=searchAjax&cid=2&afficheDirectDispo=" + date + "&type_prestataire=V&cle_fiche=PRESTATION-V-" + codeSite + "-" + idObject + "&datedeb=" + date) + # then we get the form with hours + page2 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=detailTarifsPrestationAjax&prestation=V-" + codeSite + "-" + idObject) + soup2 = BeautifulSoup(page2, "html.parser") + times = [o.text for o in soup2.select("#quart_en_cours_spec option")] + for t in times: + startdate = self.parse_french_date(date) + starttime = self.parse_french_time(t) + start = datetime.datetime.combine(startdate, starttime) + enddate = None + endtime = None + if duration is not None: + end = start + timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second) + enddate = end.date() + endtime = end.time() + datetimes.append((startdate, starttime, enddate, endtime)) + self.downloader.pause = pause - self.add_event_with_props( - event_url, - None, - None, - start_day, - location, - description, - tags, - recurrences=None, - uuids=[event_url], - url_human=url_human, - start_time=start_time, - end_day=end_day, - end_time=end_time, - published=published, - image=image, - ) + category = None + if "category" in default_values: + category = default_values["category"] + if len(categories) > 0: + category = categories[0] + + for dt in datetimes: + + self.add_event_with_props( + event_url, + title, + category, + dt[0], + location, + description, + [], + recurrences=None, + uuids=[event_url], + url_human=url_human, + start_time=dt[1], + end_day=dt[2], + end_time=dt[3], + published=published, + image=image, + ) diff --git a/src/agenda_culturel/import_tasks/downloader.py b/src/agenda_culturel/import_tasks/downloader.py index 67e86a1..c964fae 100644 --- a/src/agenda_culturel/import_tasks/downloader.py +++ b/src/agenda_culturel/import_tasks/downloader.py @@ -83,6 +83,7 @@ class ChromiumHeadlessDownloader(Downloader): self.service = Service("/usr/bin/chromedriver") self.driver = webdriver.Chrome(service=self.service, options=self.options) + def download(self, url, referer=None, post=None): if post: raise Exception("POST method with Chromium headless not yet implemented") diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index e4ec159..9fce25c 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -104,7 +104,7 @@ class Extractor(ABC): s = "0" else: # format heures - m = re.search("([0-9]+) [Hh:.]", text) + m = re.search("([0-9]+)[ ]*[Hh:.]", text) if m: h = m.group(1) m = "0" diff --git a/src/agenda_culturel/migrations/0072_alter_recurrentimport_processor.py b/src/agenda_culturel/migrations/0072_alter_recurrentimport_processor.py new file mode 100644 index 0000000..8f80e6b --- /dev/null +++ b/src/agenda_culturel/migrations/0072_alter_recurrentimport_processor.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2024-09-04 21:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0071_alter_contactmessage_message_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='recurrentimport', + name='processor', + field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page"), ('cour3coquins', 'la cour des 3 coquins')], default='ical', max_length=20, verbose_name='Processor'), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index b4c858e..db980a3 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -1241,6 +1241,7 @@ class RecurrentImport(models.Model): LAPUCEALOREILLE = "lapucealoreille", _("la puce à l'oreille") MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC") FBEVENTS = "Facebook events", _("Événements d'une page") + C3C = "cour3coquins", _("la cour des 3 coquins") class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple") diff --git a/src/agenda_culturel/templates/agenda_culturel/cancel_import_confirm.html b/src/agenda_culturel/templates/agenda_culturel/cancel_import_confirm.html index c6c728e..5a55936 100644 --- a/src/agenda_culturel/templates/agenda_culturel/cancel_import_confirm.html +++ b/src/agenda_culturel/templates/agenda_culturel/cancel_import_confirm.html @@ -14,7 +14,7 @@ {{ form }}