Intégration Graine de spectacles

Fix #278
This commit is contained in:
Jean-Marie Favreau 2025-02-01 14:48:52 +01:00
parent 02014a243b
commit 9b898d26da
6 changed files with 85 additions and 140 deletions

View File

@ -29,7 +29,7 @@ from src.agenda_culturel.import_tasks.custom_extractors import *
if __name__ == "__main__":
u2e = URL2Events(ChromiumHeadlessDownloader(), c3c.CExtractor())
u2e = URL2Events(ChromiumHeadlessDownloader(), billetterie_cf.CExtractor())
url = "https://billetterie-c3c.clermont-ferrand.fr/"
url_human = "https://billetterie-c3c.clermont-ferrand.fr/"

View File

@ -0,0 +1,44 @@
#!/usr/bin/python3
# coding: utf-8
import os
import json
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
sys.path.append(parent + "/src")
from src.agenda_culturel.import_tasks.downloader import *
from src.agenda_culturel.import_tasks.extractor import *
from src.agenda_culturel.import_tasks.importer import *
from src.agenda_culturel.import_tasks.custom_extractors import *
if __name__ == "__main__":
u2e = URL2Events(ChromiumHeadlessDownloader(), billetterie_cf.CExtractor())
url = "https://billetterie-gds.clermont-ferrand.fr/"
url_human = "https://billetterie-gds.clermont-ferrand.fr/"
try:
events = u2e.process(url, url_human, cache = "cache-gds.html", default_values = {}, published = True)
exportfile = "events-gds.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)
except Exception as e:
print("Exception: " + str(e))

View File

@ -144,8 +144,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
extractor = wordpress_mec.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.FBEVENTS:
extractor = fbevents.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.C3C:
extractor = c3c.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.BILLETTERIECF:
extractor = billetterie_cf.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.ARACHNEE:
extractor = arachnee.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:

View File

@ -1,136 +0,0 @@
from ..generic_extractors import *
from bs4 import BeautifulSoup
from datetime import timedelta
# A class dedicated to get events from La Cour des 3 Coquins
# URL: https://billetterie-c3c.clermont-ferrand.fr//
class CExtractor(TwoStepsExtractor):
nom_lieu = "La Cour des 3 Coquins"
def category_c3c2agenda(self, category):
if not category:
return None
mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
mapping_tag = {"Théâtre": "🎭 théâtre", "Concert": "🎵 concert", "Projection": None}
if category in mapping:
return mapping[category], mapping_tag[category]
else:
return None, None
def build_event_url_list(self, content):
soup = BeautifulSoup(content, "html.parser")
events = soup.select("div.fiche-info")
for e in events:
e_url = e.select_one("a.btn.lien_savoir_plus")["href"]
if e_url != "":
e_url = self.url + "/" + e_url
self.add_event_url(e_url)
def add_event_from_content(
self,
event_content,
event_url,
url_human=None,
default_values=None,
published=False,
):
soup = BeautifulSoup(event_content, "html.parser")
title = soup.select_one("h1")
if title:
title = title.text
image = soup.select_one("#media .swiper-slide img")
if image:
image = image["src"]
else:
image = None
description = soup.select_one(".presentation").get_text()
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
if not duration is None:
duration = Extractor.parse_french_time(duration.text)
location = self.nom_lieu
categories = []
tags = []
for t in soup.select(".sous-titre span"):
classes = t.get("class")
if classes and len(classes) > 0:
if classes[0].startswith("LIEU-"):
location = t.text
elif classes[0].startswith("THEMATIQUE-"):
cat, tag = self.category_c3c2agenda(t.text)
if cat:
categories.append(cat)
if tag:
tags.append(tag)
# TODO: parser les dates, récupérer les heures ()
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
patternCodeSite = re.compile(r'.*gsw_vars\["CODEPRESTATAIRE"\] = "(.*?)";.*', flags=re.DOTALL)
patternCodeObject = re.compile(r'.*gsw_vars\["CODEPRESTATION"\] = "(.*?)";.*', flags=re.DOTALL)
scripts = soup.find_all('script')
codeSite = ""
idObject = ""
for script in scripts:
if(patternCodeSite.match(str(script.string))):
data = patternCodeSite.match(script.string)
codeSite = data.groups()[0]
if(patternCodeObject.match(str(script.string))):
data = patternCodeObject.match(script.string)
idObject = data.groups()[0]
pause = self.downloader.pause
self.downloader.pause = False
# get exact schedule need two supplementary requests
datetimes = []
if codeSite != "" and idObject != "":
for date in dates:
# the first page is required such that the server knows the selected date
page1 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=searchAjax&cid=2&afficheDirectDispo=" + date + "&type_prestataire=V&cle_fiche=PRESTATION-V-" + codeSite + "-" + idObject + "&datedeb=" + date)
# then we get the form with hours
page2 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=detailTarifsPrestationAjax&prestation=V-" + codeSite + "-" + idObject)
soup2 = BeautifulSoup(page2, "html.parser")
times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
for t in times:
startdate = Extractor.parse_french_date(date)
starttime = Extractor.parse_french_time(t)
start = datetime.datetime.combine(startdate, starttime)
enddate = None
endtime = None
if duration is not None:
end = start + timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second)
enddate = end.date()
endtime = end.time()
datetimes.append((startdate, starttime, enddate, endtime))
self.downloader.pause = pause
category = None
if len(categories) > 0:
category = categories[0]
for dt in datetimes:
self.add_event_with_props(
default_values,
event_url,
title,
category,
dt[0],
location,
description,
tags,
recurrences=None,
uuids=[event_url],
url_human=url_human,
start_time=dt[1],
end_day=dt[2],
end_time=dt[3],
published=published,
image=image,
)

View File

@ -0,0 +1,37 @@
# Generated by Django 4.2.9 on 2025-02-01 14:32
from django.db import migrations, models
def rename_c3c(apps, schema_editor):
RecurrentImport = apps.get_model("agenda_culturel", "RecurrentImport")
for instance in RecurrentImport.objects.all():
if str(instance.processor) == "cour3coquins":
instance.processor = "Billetterie CF"
instance.save()
def rename_c3c_backward(apps, schema_editor):
RecurrentImport = apps.get_model("agenda_culturel", "RecurrentImport")
for instance in RecurrentImport.objects.all():
if str(instance.processor) == "Billetterie CF":
instance.processor = "cour3coquins"
instance.save()
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0140_alter_event_created_by_user_and_more'),
]
operations = [
migrations.AlterField(
model_name='recurrentimport',
name='processor',
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page FB"), ('Billetterie CF', 'Billetterie Clermont-Ferrand'), ('arachnee', 'Arachnée concert'), ('rio', 'Le Rio'), ('raymonde', 'La Raymonde'), ('apidae', 'Agenda apidae tourisme'), ('iguana', 'Agenda iguana (médiathèques)')], default='ical', max_length=20, verbose_name='Processor'),
),
migrations.RunPython(
rename_c3c,
rename_c3c_backward,
),
]

View File

@ -2098,7 +2098,7 @@ class RecurrentImport(models.Model):
LAPUCEALOREILLE = "lapucealoreille", _("la puce à l'oreille")
MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC")
FBEVENTS = "Facebook events", _("Événements d'une page FB")
C3C = "cour3coquins", _("la cour des 3 coquins")
BILLETTERIECF = "Billetterie CF", _("Billetterie Clermont-Ferrand")
ARACHNEE = "arachnee", _("Arachnée concert")
LERIO = "rio", _('Le Rio')
LARAYMONDE = "raymonde", _('La Raymonde')