Ajout source médiathèques
This commit is contained in:
parent
9cceba7dc0
commit
87a1168aae
44
experimentations/get_meditheques_clermont.py
Executable file
44
experimentations/get_meditheques_clermont.py
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
sys.path.append(parent + "/src")
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), iguana_agenda.CExtractor())
|
||||||
|
url = "https://bibliotheques-clermontmetropole.eu/iguana/Service.PubContainer.cls?uuid=a4a1f992-06da-4ff4-9176-4af0a095c7d1"
|
||||||
|
url_human = "https://bibliotheques-clermontmetropole.eu/iguana/www.main.cls?surl=AGENDA_Tout%20lagenda"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-mediatheques.html", default_values = {}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-mediatheques.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -154,6 +154,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
|||||||
extractor = laraymonde.CExtractor()
|
extractor = laraymonde.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.APIDAE:
|
elif rimport.processor == RecurrentImport.PROCESSOR.APIDAE:
|
||||||
extractor = apidae_tourisme.CExtractor()
|
extractor = apidae_tourisme.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.IGUANA:
|
||||||
|
extractor = iguana_agenda.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
@ -0,0 +1,111 @@
|
|||||||
|
from ..generic_extractors import *
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# A class dedicated to get events from Raymond Bar
|
||||||
|
# URL: https://www.raymondbar.net/
|
||||||
|
class CExtractor(TwoStepsExtractorNoPause):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def guess_category(self, category):
|
||||||
|
if "Cinéma" in category:
|
||||||
|
return "Cinéma"
|
||||||
|
if "Conférence" in category or "Rencontres" in category:
|
||||||
|
return "Rencontres & débats"
|
||||||
|
if "Lecture" in category or "Conte" in category:
|
||||||
|
return "Spectacles"
|
||||||
|
if "Atelier" in category or "Jeux" in category or "":
|
||||||
|
return "Animations & Ateliers"
|
||||||
|
if "Numérique" in category:
|
||||||
|
return "Rendez-vous locaux"
|
||||||
|
|
||||||
|
return "Sans catégorie"
|
||||||
|
|
||||||
|
|
||||||
|
def guess_tags_from_category(self, category):
|
||||||
|
tags = []
|
||||||
|
if "Lecture" in category:
|
||||||
|
tags.append("📖 lecture")
|
||||||
|
if "Jeux" in category:
|
||||||
|
tags.append("🎲 jeux")
|
||||||
|
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def build_event_url_list(self, content, infuture_days=180):
|
||||||
|
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
root_address_human = self.url_human.split('?')[0]
|
||||||
|
root_address = self.url.split('Service')[0]
|
||||||
|
|
||||||
|
items = soup.select("li.listItem")
|
||||||
|
if items:
|
||||||
|
for item in items:
|
||||||
|
elems = item["onclick"].split('"')
|
||||||
|
v = elems[3].split('^')[1]
|
||||||
|
contentItem = elems[1]
|
||||||
|
multidate = item.select_one('.until.maindate').text != ''
|
||||||
|
if not multidate:
|
||||||
|
url_human = root_address_human + '?p=*&v=' + v + "#contentitem=" + contentItem
|
||||||
|
url = root_address + 'Service.PubItem.cls?action=get&instance=*&uuid=' + contentItem
|
||||||
|
self.add_event_url(url)
|
||||||
|
self.add_event_url_human(url, url_human)
|
||||||
|
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
|
||||||
|
soup = BeautifulSoup(event_content, "xml")
|
||||||
|
|
||||||
|
|
||||||
|
title = soup.select_one("Title").text
|
||||||
|
content = soup.select_one("Content").text
|
||||||
|
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
image = soup.select_one(".image img")["src"]
|
||||||
|
description = soup.select_one(".rightcolumn .content").text
|
||||||
|
location = soup.select_one(".infos .location").text
|
||||||
|
public = soup.select_one(".infos .public").text
|
||||||
|
start_day = Extractor.parse_french_date(soup.select_one(".infos .date .from").text)
|
||||||
|
start_time = Extractor.parse_french_time(soup.select_one(".infos .date .time").text)
|
||||||
|
acces = soup.select_one(".infos .acces").text
|
||||||
|
category = soup.select_one(".rightcolumn .category").text
|
||||||
|
infos = soup.select_one('.infos').text
|
||||||
|
|
||||||
|
description = description + "\n" + infos
|
||||||
|
|
||||||
|
tags = self.guess_tags_from_category(category)
|
||||||
|
category = self.guess_category(category)
|
||||||
|
if "Tout-petits" in public or "Jeunesse" in public:
|
||||||
|
tags.append("🎈 jeune public")
|
||||||
|
if "Accès libre" in acces:
|
||||||
|
tags.append("💶 gratuit")
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
default_values,
|
||||||
|
event_url,
|
||||||
|
title,
|
||||||
|
category,
|
||||||
|
start_day,
|
||||||
|
location,
|
||||||
|
description,
|
||||||
|
tags,
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=event_url,
|
||||||
|
start_time=start_time,
|
||||||
|
end_day=None,
|
||||||
|
end_time=None,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
image_alt=""
|
||||||
|
)
|
@ -2011,6 +2011,7 @@ class RecurrentImport(models.Model):
|
|||||||
LERIO = "rio", _('Le Rio')
|
LERIO = "rio", _('Le Rio')
|
||||||
LARAYMONDE = "raymonde", _('La Raymonde')
|
LARAYMONDE = "raymonde", _('La Raymonde')
|
||||||
APIDAE = 'apidae', _('Agenda apidae tourisme')
|
APIDAE = 'apidae', _('Agenda apidae tourisme')
|
||||||
|
IGUANA = 'iguana', _('Agenda iguana (médiathèques)')
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user