Import cour des 3 coquins fonctionnel

This commit is contained in:
Jean-Marie Favreau 2024-09-04 23:56:51 +02:00
parent ff681ed93a
commit e6e19cbcba
7 changed files with 96 additions and 42 deletions

View File

@ -130,6 +130,8 @@ def run_recurrent_import(self, pk):
extractor = wordpress_mec.CExtractor() extractor = wordpress_mec.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.FBEVENTS: elif rimport.processor == RecurrentImport.PROCESSOR.FBEVENTS:
extractor = fbevents.CExtractor() extractor = fbevents.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.C3C:
extractor = c3c.CExtractor()
else: else:
extractor = None extractor = None

View File

@ -1,5 +1,6 @@
from ..generic_extractors import * from ..generic_extractors import *
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import timedelta
# A class dedicated to get events from La Cour des 3 Coquins # A class dedicated to get events from La Cour des 3 Coquins
# URL: https://billetterie-c3c.clermont-ferrand.fr// # URL: https://billetterie-c3c.clermont-ferrand.fr//
@ -46,56 +47,87 @@ class CExtractor(TwoStepsExtractor):
else: else:
image = None image = None
description = soup.select_one(".presentation") description = soup.select_one(".presentation").get_text()
duree = soup.select_one("#criteres .DUREE-V .valeur-critere li") duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
if duree is not None: if duration is not None:
duree = self.parse_french_time(duree.text) duration = self.parse_french_time(duration.text)
location = self.nom_lieu location = self.nom_lieu
tags = [] categories = []
for t in soup.select(".sous-titre span"): for t in soup.select(".sous-titre span"):
classes = t.get("class") classes = t.get("class")
if classes and len(classes) > 0: if classes and len(classes) > 0:
if classes[0].startswith("LIEU-"): if classes[0].startswith("LIEU-"):
location = t.text location = t.text
elif classes[0].startswith("THEMATIQUE-"): elif classes[0].startswith("THEMATIQUE-"):
tag = self.category_c3c2agenda(t.text) cat = self.category_c3c2agenda(t.text)
if tag is not None: if cat is not None:
tags.append(tag) categories.append(cat)
# TODO: parser les dates, récupérer les heures () # TODO: parser les dates, récupérer les heures ()
dates = [self.parse_french_date(o.get("value")) for o in soup.select("select.datedleb_resa option")] dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
patternCodeSite = re.compile(r'.*gsw_vars\["CODEPRESTATAIRE"\] = "(.*?)";.*', flags=re.DOTALL)
patternCodeObject = re.compile(r'.*gsw_vars\["CODEPRESTATION"\] = "(.*?)";.*', flags=re.DOTALL)
print("EVENT ", event_url) scripts = soup.find_all('script')
print("- ", title) codeSite = ""
print("- ", image) idObject = ""
print("- ", len(description)) for script in scripts:
print("- ", duree) if(patternCodeSite.match(str(script.string))):
print("- ", location) data = patternCodeSite.match(script.string)
print("- ", tags) codeSite = data.groups()[0]
print("- ", dates) if(patternCodeObject.match(str(script.string))):
data = patternCodeObject.match(script.string)
return idObject = data.groups()[0]
url_human = event_url pause = self.downloader.pause
self.downloader.pause = False
# get exact schedule need two supplementary requests
datetimes = []
if codeSite != "" and idObject != "":
for date in dates:
# the first page is required such that the server knows the selected date
page1 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=searchAjax&cid=2&afficheDirectDispo=" + date + "&type_prestataire=V&cle_fiche=PRESTATION-V-" + codeSite + "-" + idObject + "&datedeb=" + date)
# then we get the form with hours
page2 = self.downloader.get_content("https://billetterie-c3c.clermont-ferrand.fr/booking?action=detailTarifsPrestationAjax&prestation=V-" + codeSite + "-" + idObject)
soup2 = BeautifulSoup(page2, "html.parser")
times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
for t in times:
startdate = self.parse_french_date(date)
starttime = self.parse_french_time(t)
start = datetime.datetime.combine(startdate, starttime)
enddate = None
endtime = None
if duration is not None:
end = start + timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second)
enddate = end.date()
endtime = end.time()
datetimes.append((startdate, starttime, enddate, endtime))
self.downloader.pause = pause
self.add_event_with_props( category = None
event_url, if "category" in default_values:
None, category = default_values["category"]
None, if len(categories) > 0:
start_day, category = categories[0]
location,
description, for dt in datetimes:
tags,
recurrences=None, self.add_event_with_props(
uuids=[event_url], event_url,
url_human=url_human, title,
start_time=start_time, category,
end_day=end_day, dt[0],
end_time=end_time, location,
published=published, description,
image=image, [],
) recurrences=None,
uuids=[event_url],
url_human=url_human,
start_time=dt[1],
end_day=dt[2],
end_time=dt[3],
published=published,
image=image,
)

View File

@ -83,6 +83,7 @@ class ChromiumHeadlessDownloader(Downloader):
self.service = Service("/usr/bin/chromedriver") self.service = Service("/usr/bin/chromedriver")
self.driver = webdriver.Chrome(service=self.service, options=self.options) self.driver = webdriver.Chrome(service=self.service, options=self.options)
def download(self, url, referer=None, post=None): def download(self, url, referer=None, post=None):
if post: if post:
raise Exception("POST method with Chromium headless not yet implemented") raise Exception("POST method with Chromium headless not yet implemented")

View File

@ -104,7 +104,7 @@ class Extractor(ABC):
s = "0" s = "0"
else: else:
# format heures # format heures
m = re.search("([0-9]+) [Hh:.]", text) m = re.search("([0-9]+)[ ]*[Hh:.]", text)
if m: if m:
h = m.group(1) h = m.group(1)
m = "0" m = "0"

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.9 on 2024-09-04 21:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0071_alter_contactmessage_message_and_more'),
]
operations = [
migrations.AlterField(
model_name='recurrentimport',
name='processor',
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page"), ('cour3coquins', 'la cour des 3 coquins')], default='ical', max_length=20, verbose_name='Processor'),
),
]

View File

@ -1241,6 +1241,7 @@ class RecurrentImport(models.Model):
LAPUCEALOREILLE = "lapucealoreille", _("la puce à l'oreille") LAPUCEALOREILLE = "lapucealoreille", _("la puce à l'oreille")
MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC") MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC")
FBEVENTS = "Facebook events", _("Événements d'une page") FBEVENTS = "Facebook events", _("Événements d'une page")
C3C = "cour3coquins", _("la cour des 3 coquins")
class DOWNLOADER(models.TextChoices): class DOWNLOADER(models.TextChoices):
SIMPLE = "simple", _("simple") SIMPLE = "simple", _("simple")

View File

@ -14,7 +14,7 @@
{{ form }} {{ form }}
<footer> <footer>
<div class="grid"> <div class="grid buttons">
<a href="{{ cancel_url }}" role="button" class="secondary">Annuler</a> <a href="{{ cancel_url }}" role="button" class="secondary">Annuler</a>
<input type="submit" value="Confirmer"> <input type="submit" value="Confirmer">
</div> </div>