parent
bf773686f9
commit
ac3d6796cf
43
experimentations/get_le_rio.py
Executable file
43
experimentations/get_le_rio.py
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), lerio.CExtractor())
|
||||||
|
url = "https://www.cinemalerio.com/evenements/"
|
||||||
|
url_human = "https://www.cinemalerio.com/evenements/"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-le-rio.html", default_values = {"location": "Cinéma le Rio", "category": "Cinéma"}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-le-roi.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -147,6 +147,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
|||||||
extractor = c3c.CExtractor()
|
extractor = c3c.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.ARACHNEE:
|
elif rimport.processor == RecurrentImport.PROCESSOR.ARACHNEE:
|
||||||
extractor = arachnee.CExtractor()
|
extractor = arachnee.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:
|
||||||
|
extractor = lerio.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
91
src/agenda_culturel/import_tasks/custom_extractors/lerio.py
Normal file
91
src/agenda_culturel/import_tasks/custom_extractors/lerio.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
from ..generic_extractors import *
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# A class dedicated to get events from Cinéma Le Rio (Clermont-Ferrand)
|
||||||
|
# URL: https://www.cinemalerio.com/evenements/
|
||||||
|
class CExtractor(TwoStepsExtractorNoPause):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.possible_dates = {}
|
||||||
|
self.theater = None
|
||||||
|
|
||||||
|
def build_event_url_list(self, content, infuture_days=180):
|
||||||
|
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
links = soup.select("td.seance_link a")
|
||||||
|
if links:
|
||||||
|
for l in links:
|
||||||
|
print(l["href"])
|
||||||
|
self.add_event_url(l["href"])
|
||||||
|
|
||||||
|
def to_text_select_one(soup, filter):
|
||||||
|
e = soup.select_one(filter)
|
||||||
|
if e is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return e.text
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
|
||||||
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
|
title = soup.select_one("h1").text
|
||||||
|
|
||||||
|
alerte_date = CExtractor.to_text_select_one(soup, ".alerte_date")
|
||||||
|
if alerte_date is None:
|
||||||
|
return
|
||||||
|
dh = alerte_date.split("à")
|
||||||
|
# if date is not found, we skip
|
||||||
|
if len(dh) != 2:
|
||||||
|
return
|
||||||
|
|
||||||
|
date = Extractor.parse_french_date(dh[0], default_year=datetime.now().year)
|
||||||
|
time = Extractor.parse_french_time(dh[1])
|
||||||
|
|
||||||
|
synopsis = CExtractor.to_text_select_one(soup, ".synopsis_bloc")
|
||||||
|
special_titre = CExtractor.to_text_select_one(soup, ".alerte_titre")
|
||||||
|
special = CExtractor.to_text_select_one(soup, ".alerte_text")
|
||||||
|
|
||||||
|
# it's not a specific event: we skip it
|
||||||
|
special_lines = None if special is None else special.split('\n')
|
||||||
|
if special is None or len(special_lines) == 0 or \
|
||||||
|
(len(special_lines) == 1 and special_lines[0].strip().startswith('En partenariat')):
|
||||||
|
return
|
||||||
|
|
||||||
|
description = "\n\n".join([x for x in [synopsis, special_titre, special] if not x is None])
|
||||||
|
|
||||||
|
image = soup.select_one(".col1 img")
|
||||||
|
image_alt = None
|
||||||
|
if not image is None:
|
||||||
|
image_alt = image["alt"]
|
||||||
|
image = image["src"]
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
default_values,
|
||||||
|
event_url,
|
||||||
|
title,
|
||||||
|
None,
|
||||||
|
date,
|
||||||
|
None,
|
||||||
|
description,
|
||||||
|
[],
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=event_url,
|
||||||
|
start_time=time,
|
||||||
|
end_day=None,
|
||||||
|
end_time=None,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
image_alt=image_alt
|
||||||
|
)
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.9 on 2024-11-29 13:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('agenda_culturel', '0121_contactmessage_related_event'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='recurrentimport',
|
||||||
|
name='processor',
|
||||||
|
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', "la puce à l'oreille"), ('Plugin wordpress MEC', 'Plugin wordpress MEC'), ('Facebook events', "Événements d'une page FB"), ('cour3coquins', 'la cour des 3 coquins'), ('arachnee', 'Arachnée concert'), ('rio', 'Le Rio')], default='ical', max_length=20, verbose_name='Processor'),
|
||||||
|
),
|
||||||
|
]
|
@ -1767,6 +1767,7 @@ class RecurrentImport(models.Model):
|
|||||||
FBEVENTS = "Facebook events", _("Événements d'une page FB")
|
FBEVENTS = "Facebook events", _("Événements d'une page FB")
|
||||||
C3C = "cour3coquins", _("la cour des 3 coquins")
|
C3C = "cour3coquins", _("la cour des 3 coquins")
|
||||||
ARACHNEE = "arachnee", _("Arachnée concert")
|
ARACHNEE = "arachnee", _("Arachnée concert")
|
||||||
|
LERIO = "rio", _('Le Rio')
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
Reference in New Issue
Block a user