Ajout source médiathèques

2025-01-19 13:50:41 +01:00 · 2025-01-19 13:50:41 +01:00 · 87a1168aae
commit 87a1168aae
parent 9cceba7dc0
4 changed files with 158 additions and 0 deletions
--- a/experimentations/get_meditheques_clermont.py
+++ b/experimentations/get_meditheques_clermont.py
@ -0,0 +1,44 @@
 #!/usr/bin/python3
 # coding: utf-8
 import os
 import json
 import sys
 # getting the name of the directory
 # where the this file is present.
 current = os.path.dirname(os.path.realpath(__file__))
 # Getting the parent directory name
 # where the current directory is present.
 parent = os.path.dirname(current)
 # adding the parent directory to 
 # the sys.path.
 sys.path.append(parent)
 sys.path.append(parent + "/src")
 from src.agenda_culturel.import_tasks.downloader import *
 from src.agenda_culturel.import_tasks.extractor import *
 from src.agenda_culturel.import_tasks.importer import *
 from src.agenda_culturel.import_tasks.custom_extractors import *
 if __name__ == "__main__":
    u2e = URL2Events(SimpleDownloader(), iguana_agenda.CExtractor())
    url = "https://bibliotheques-clermontmetropole.eu/iguana/Service.PubContainer.cls?uuid=a4a1f992-06da-4ff4-9176-4af0a095c7d1"
    url_human = "https://bibliotheques-clermontmetropole.eu/iguana/www.main.cls?surl=AGENDA_Tout%20lagenda"
    try:
        events = u2e.process(url, url_human, cache = "cache-mediatheques.html", default_values = {}, published = True)
        exportfile = "events-mediatheques.json"
        print("Saving events to file {}".format(exportfile))
        with open(exportfile, "w") as f:
            json.dump(events, f, indent=4, default=str)
    except Exception as e:
        print("Exception: " + str(e))
--- a/src/agenda_culturel/celery.py
+++ b/src/agenda_culturel/celery.py
@ -154,6 +154,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
        extractor = laraymonde.CExtractor()
    elif rimport.processor == RecurrentImport.PROCESSOR.APIDAE:
        extractor = apidae_tourisme.CExtractor()
    elif rimport.processor == RecurrentImport.PROCESSOR.IGUANA:
        extractor = iguana_agenda.CExtractor()
    else:
        extractor = None
--- a/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py
+++ b/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py
@ -0,0 +1,111 @@
 from ..generic_extractors import *
 from bs4 import BeautifulSoup
 from datetime import datetime
 # A class dedicated to get events from Raymond Bar
 # URL: https://www.raymondbar.net/
 class CExtractor(TwoStepsExtractorNoPause):
    def __init__(self):
        super().__init__()
    def guess_category(self, category):
        if "Cinéma" in category:
            return "Cinéma"
        if "Conférence" in category or "Rencontres" in category:
            return "Rencontres & débats"
        if "Lecture" in category or "Conte" in category:
            return "Spectacles"
        if "Atelier" in category or "Jeux" in category or "":
            return "Animations & Ateliers"
        if "Numérique" in category:
            return "Rendez-vous locaux"
        return "Sans catégorie"
    def guess_tags_from_category(self, category):
        tags = []
        if "Lecture" in category:
            tags.append("📖 lecture")
        if "Jeux" in category:
            tags.append("🎲 jeux")
        return tags
    def build_event_url_list(self, content, infuture_days=180):
        soup = BeautifulSoup(content, "html.parser")
        root_address_human = self.url_human.split('?')[0]
        root_address = self.url.split('Service')[0]
        items = soup.select("li.listItem")
        if items:
            for item in items:
                elems = item["onclick"].split('"')
                v = elems[3].split('^')[1]
                contentItem = elems[1]
                multidate = item.select_one('.until.maindate').text != ''
                if not multidate:
                    url_human = root_address_human + '?p=*&v=' + v + "#contentitem=" + contentItem
                    url = 	root_address + 'Service.PubItem.cls?action=get&instance=*&uuid=' + contentItem
                    self.add_event_url(url)
                    self.add_event_url_human(url, url_human)
    def add_event_from_content(
        self,
        event_content,
        event_url,
        url_human=None,
        default_values=None,
        published=False,
    ):
        soup = BeautifulSoup(event_content, "xml")
        title = soup.select_one("Title").text
        content = soup.select_one("Content").text
        soup = BeautifulSoup(content, "html.parser")
        image = soup.select_one(".image img")["src"]
        description = soup.select_one(".rightcolumn .content").text
        location = soup.select_one(".infos .location").text
        public = soup.select_one(".infos .public").text
        start_day = Extractor.parse_french_date(soup.select_one(".infos .date .from").text)
        start_time = Extractor.parse_french_time(soup.select_one(".infos .date .time").text)
        acces = soup.select_one(".infos .acces").text
        category = soup.select_one(".rightcolumn .category").text
        infos = soup.select_one('.infos').text
        description = description + "\n" + infos
        tags = self.guess_tags_from_category(category)
        category = self.guess_category(category)
        if "Tout-petits" in public or "Jeunesse" in public:
            tags.append("🎈 jeune public")
        if "Accès libre" in acces:
            tags.append("💶 gratuit")
        self.add_event_with_props(
                    default_values,
                    event_url,
                    title,
                    category,
                    start_day,
                    location,
                    description,
                    tags,
                    recurrences=None,
                    uuids=[event_url],
                    url_human=event_url,
                    start_time=start_time,
                    end_day=None,
                    end_time=None,
                    published=published,
                    image=image,
                    image_alt=""
                )
--- a/src/agenda_culturel/models.py
+++ b/src/agenda_culturel/models.py
@ -2011,6 +2011,7 @@ class RecurrentImport(models.Model):
        LERIO = "rio", _('Le Rio')
        LARAYMONDE = "raymonde", _('La Raymonde')
        APIDAE = 'apidae', _('Agenda apidae tourisme')
        IGUANA = 'iguana', _('Agenda iguana (médiathèques)')
    class DOWNLOADER(models.TextChoices):
        SIMPLE = "simple", _("simple")