diff --git a/experimentations/get_la_raymonde.py b/experimentations/get_la_raymonde.py new file mode 100755 index 0000000..b5b91f6 --- /dev/null +++ b/experimentations/get_la_raymonde.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 +# coding: utf-8 + +import os +import json +import sys + +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) +sys.path.append(parent + "/src") + +from src.agenda_culturel.import_tasks.downloader import * +from src.agenda_culturel.import_tasks.extractor import * +from src.agenda_culturel.import_tasks.importer import * +from src.agenda_culturel.import_tasks.custom_extractors import * + + + + + +if __name__ == "__main__": + + u2e = URL2Events(SimpleDownloader(), laraymonde.CExtractor()) + url = "https://www.raymondbar.net/" + url_human = "https://www.raymondbar.net/" + + try: + events = u2e.process(url, url_human, cache = "cache-la-raymonde.html", default_values = {"location": "La Raymonde", "category": "Fêtes & Concerts"}, published = True) + + exportfile = "events-la-raymonde.json" + print("Saving events to file {}".format(exportfile)) + with open(exportfile, "w") as f: + json.dump(events, f, indent=4, default=str) + except Exception as e: + print("Exception: " + str(e)) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 26c5e6d..cf7d753 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -150,6 +150,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id): extractor = arachnee.CExtractor() elif rimport.processor == RecurrentImport.PROCESSOR.LERIO: extractor = lerio.CExtractor() + elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE: + extractor = laraymonde.CExtractor() else: extractor = None diff --git a/src/agenda_culturel/import_tasks/custom_extractors/laraymonde.py b/src/agenda_culturel/import_tasks/custom_extractors/laraymonde.py new file mode 100644 index 0000000..4249bba --- /dev/null +++ b/src/agenda_culturel/import_tasks/custom_extractors/laraymonde.py @@ -0,0 +1,67 @@ +from ..generic_extractors import * +from bs4 import BeautifulSoup +from datetime import datetime + +# A class dedicated to get events from Raymond Bar +# URL: https://www.raymondbar.net/ +class CExtractor(TwoStepsExtractorNoPause): + + def __init__(self): + super().__init__() + + def build_event_url_list(self, content, infuture_days=180): + + soup = BeautifulSoup(content, "html.parser") + + links = soup.select(".showsList .showMore") + if links: + for l in links: + print(l["href"]) + self.add_event_url(l["href"]) + + + def add_event_from_content( + self, + event_content, + event_url, + url_human=None, + default_values=None, + published=False, + ): + soup = BeautifulSoup(event_content, "html.parser") + + title = soup.select_one(".showDesc h4 a.summary").text + start_day = soup.select_one(".showDate .value-title") + + if not start_day is None: + start_day = start_day["title"] + if not start_day is None: + start_day = start_day.split("T")[0] + if start_day is None: + print("impossible de récupérer la date") + return + description = soup.select_one('.showDetails.description').text + image = soup.select('.showDetails.description img') + if not image is None: + image_alt = image[-1]["alt"] + image = image[-1]["src"] + + self.add_event_with_props( + default_values, + event_url, + title, + None, + start_day, + None, + description, + [], + recurrences=None, + uuids=[event_url], + url_human=event_url, + start_time=None, + end_day=None, + end_time=None, + published=published, + image=image, + image_alt=image_alt + ) \ No newline at end of file diff --git a/src/agenda_culturel/import_tasks/custom_extractors/lerio.py b/src/agenda_culturel/import_tasks/custom_extractors/lerio.py index 117693c..0d859f2 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/lerio.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/lerio.py @@ -18,7 +18,6 @@ class CExtractor(TwoStepsExtractorNoPause): links = soup.select("td.seance_link a") if links: for l in links: - print(l["href"]) self.add_event_url(l["href"]) def to_text_select_one(soup, filter): diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index fa164bc..1f40866 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -2009,6 +2009,7 @@ class RecurrentImport(models.Model): C3C = "cour3coquins", _("la cour des 3 coquins") ARACHNEE = "arachnee", _("Arachnée concert") LERIO = "rio", _('Le Rio') + LARAYMONDE = "raymonde", _('La Raymonde') class DOWNLOADER(models.TextChoices): SIMPLE = "simple", _("simple")