Ajout import la Raymonde

Fix #234
This commit is contained in:
Jean-Marie Favreau 2025-01-18 12:23:48 +01:00
parent 0ff5dbe917
commit 15aa712199
5 changed files with 114 additions and 1 deletions

View File

@ -0,0 +1,44 @@
#!/usr/bin/python3
# coding: utf-8
import os
import json
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
sys.path.append(parent + "/src")
from src.agenda_culturel.import_tasks.downloader import *
from src.agenda_culturel.import_tasks.extractor import *
from src.agenda_culturel.import_tasks.importer import *
from src.agenda_culturel.import_tasks.custom_extractors import *
if __name__ == "__main__":
u2e = URL2Events(SimpleDownloader(), laraymonde.CExtractor())
url = "https://www.raymondbar.net/"
url_human = "https://www.raymondbar.net/"
try:
events = u2e.process(url, url_human, cache = "cache-la-raymonde.html", default_values = {"location": "La Raymonde", "category": "Fêtes & Concerts"}, published = True)
exportfile = "events-la-raymonde.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)
except Exception as e:
print("Exception: " + str(e))

View File

@ -150,6 +150,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
extractor = arachnee.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LERIO:
extractor = lerio.CExtractor()
elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE:
extractor = laraymonde.CExtractor()
else:
extractor = None

View File

@ -0,0 +1,67 @@
from ..generic_extractors import *
from bs4 import BeautifulSoup
from datetime import datetime
# A class dedicated to get events from Raymond Bar
# URL: https://www.raymondbar.net/
class CExtractor(TwoStepsExtractorNoPause):
def __init__(self):
super().__init__()
def build_event_url_list(self, content, infuture_days=180):
soup = BeautifulSoup(content, "html.parser")
links = soup.select(".showsList .showMore")
if links:
for l in links:
print(l["href"])
self.add_event_url(l["href"])
def add_event_from_content(
self,
event_content,
event_url,
url_human=None,
default_values=None,
published=False,
):
soup = BeautifulSoup(event_content, "html.parser")
title = soup.select_one(".showDesc h4 a.summary").text
start_day = soup.select_one(".showDate .value-title")
if not start_day is None:
start_day = start_day["title"]
if not start_day is None:
start_day = start_day.split("T")[0]
if start_day is None:
print("impossible de récupérer la date")
return
description = soup.select_one('.showDetails.description').text
image = soup.select('.showDetails.description img')
if not image is None:
image_alt = image[-1]["alt"]
image = image[-1]["src"]
self.add_event_with_props(
default_values,
event_url,
title,
None,
start_day,
None,
description,
[],
recurrences=None,
uuids=[event_url],
url_human=event_url,
start_time=None,
end_day=None,
end_time=None,
published=published,
image=image,
image_alt=image_alt
)

View File

@ -18,7 +18,6 @@ class CExtractor(TwoStepsExtractorNoPause):
links = soup.select("td.seance_link a")
if links:
for l in links:
print(l["href"])
self.add_event_url(l["href"])
def to_text_select_one(soup, filter):

View File

@ -2009,6 +2009,7 @@ class RecurrentImport(models.Model):
C3C = "cour3coquins", _("la cour des 3 coquins")
ARACHNEE = "arachnee", _("Arachnée concert")
LERIO = "rio", _('Le Rio')
LARAYMONDE = "raymonde", _('La Raymonde')
class DOWNLOADER(models.TextChoices):
SIMPLE = "simple", _("simple")