Ajout de l'import de la programmation de la Comédie
This commit is contained in:
parent
37350f9edc
commit
995aa4b2d3
43
experimentations/get_lacomedie_events.py
Executable file
43
experimentations/get_lacomedie_events.py
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), LaComedieExtractor())
|
||||||
|
url = "https://lacomediedeclermont.com/saison23-24/wp-admin/admin-ajax.php?action=load_dates_existantes"
|
||||||
|
url_human = "https://lacomediedeclermont.com/saison23-24/"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-lacomedie.html", default_values = {"location": "La Comédie de Clermont"}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-lacomedie.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -33,7 +33,7 @@ if __name__ == "__main__":
|
|||||||
url_human = "https://www.lacoope.org/concerts-calendrier/"
|
url_human = "https://www.lacoope.org/concerts-calendrier/"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
events = u2e.process(url, url_human, cache = "cache-lacoope.ical", default_values = {"category": "Concert", "location": "La Coopérative"}, published = True)
|
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Concert", "location": "La Coopérative"}, published = True)
|
||||||
|
|
||||||
exportfile = "events-lacoope.json"
|
exportfile = "events-lacoope.json"
|
||||||
print("Saving events to file {}".format(exportfile))
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
@ -103,6 +103,8 @@ def run_recurrent_import(self, pk):
|
|||||||
extractor = ICALNoVCExtractor()
|
extractor = ICALNoVCExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.LACOOPE:
|
elif rimport.processor == RecurrentImport.PROCESSOR.LACOOPE:
|
||||||
extractor = LaCoopeExtractor()
|
extractor = LaCoopeExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.LACOMEDIE:
|
||||||
|
extractor = LaComedieExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
@ -62,3 +62,68 @@ class LaCoopeExtractor(TwoStepsExtractor):
|
|||||||
url_human = event_url
|
url_human = event_url
|
||||||
|
|
||||||
self.add_event_with_props(event_url, title, category, start_day, location, description, tags, recurrences=None, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, published=published, image=image)
|
self.add_event_with_props(event_url, title, category, start_day, location, description, tags, recurrences=None, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, published=published, image=image)
|
||||||
|
|
||||||
|
|
||||||
|
# A class dedicated to get events from La Coopérative de Mai:
|
||||||
|
# URL: https://lacomediedeclermont.com/saison23-24/wp-admin/admin-ajax.php?action=load_dates_existantes
|
||||||
|
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
|
||||||
|
class LaComedieExtractor(TwoStepsExtractor):
|
||||||
|
|
||||||
|
nom_lieu = "La Comédie de Clermont"
|
||||||
|
|
||||||
|
def category_comedie2agenda(self, category):
|
||||||
|
mapping = { "Théâtre": "Théâtre", "Danse": "Danse", "Rencontre": "Autre", "Sortie de résidence": "Autre", "PopCorn Live": "Autre"}
|
||||||
|
if category in mapping:
|
||||||
|
return mapping[category]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build_event_url_list(self, content):
|
||||||
|
self.event_urls = []
|
||||||
|
dates = json5.loads(content)["data"][0]
|
||||||
|
|
||||||
|
url = self.url.split("?")[0]
|
||||||
|
for d in dates:
|
||||||
|
if not self.only_future or self.now <= datetime.date.fromisoformat(d):
|
||||||
|
events = self.downloader.get_content(url, post={'action': "load_evenements_jour", "jour": d})
|
||||||
|
if events:
|
||||||
|
events = json5.loads(events)
|
||||||
|
if "data" in events:
|
||||||
|
events = events["data"][0]
|
||||||
|
soup = BeautifulSoup(events, "html.parser")
|
||||||
|
events = soup.select("div.unedatedev")
|
||||||
|
for e in events:
|
||||||
|
e_url = e.select('a')[0]["href"] + "#" + d # a "fake" url specific for each day of this show
|
||||||
|
self.event_urls.append(e_url)
|
||||||
|
self.add_event_start_day(e_url, d)
|
||||||
|
t = str(e.select('div#datecal')[0]).split(' ')[-1].split('<')[0]
|
||||||
|
self.add_event_start_time(e_url, t)
|
||||||
|
title = e.select('a')[0].contents[0]
|
||||||
|
self.add_event_title(e_url, title)
|
||||||
|
category = e.select("div#lieuevtcal span")
|
||||||
|
if len(category) > 0:
|
||||||
|
category = self.category_comedie2agenda(category[-1].contents[0])
|
||||||
|
if category is not None:
|
||||||
|
self.add_event_category(e_url, category)
|
||||||
|
location = e.select("div#lieuevtcal")[0].contents[-1].split("•")[-1]
|
||||||
|
self.add_event_location(e_url, location)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def add_event_from_content(self, event_content, event_url, url_human = None, default_values = None, published = False):
|
||||||
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
|
image = soup.select("#imgspec img")
|
||||||
|
if image:
|
||||||
|
image = image[0]["src"]
|
||||||
|
else:
|
||||||
|
image = None
|
||||||
|
|
||||||
|
description = soup.select("#descspec")[0].get_text().replace("Lire plus...", "")
|
||||||
|
|
||||||
|
url_human = event_url
|
||||||
|
|
||||||
|
self.add_event_with_props(event_url, None, None, None, None, description, [], recurrences=None, uuid=event_url, url_human=url_human, published=published, image=image)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse, urlencode
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import os
|
import os
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
@ -13,16 +13,16 @@ class Downloader(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def download(self, url):
|
def download(self, url, post=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_content(self, url, cache = None):
|
def get_content(self, url, cache = None, post = None):
|
||||||
if cache and os.path.exists(cache):
|
if cache and os.path.exists(cache):
|
||||||
print("Loading cache ({})".format(cache))
|
print("Loading cache ({})".format(cache))
|
||||||
with open(cache) as f:
|
with open(cache) as f:
|
||||||
content = "\n".join(f.readlines())
|
content = "\n".join(f.readlines())
|
||||||
else:
|
else:
|
||||||
content = self.download(url)
|
content = self.download(url, post)
|
||||||
|
|
||||||
if cache:
|
if cache:
|
||||||
print("Saving cache ({})".format(cache))
|
print("Saving cache ({})".format(cache))
|
||||||
@ -40,14 +40,19 @@ class SimpleDownloader(Downloader):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
|
||||||
def download(self, url):
|
def download(self, url, post=None):
|
||||||
print("Downloading {}".format(url))
|
print("Downloading {}".format(url))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if post:
|
||||||
|
post_args = urlencode(post).encode()
|
||||||
|
resource = urllib.request.urlopen(url, post_args)
|
||||||
|
else:
|
||||||
resource = urllib.request.urlopen(url)
|
resource = urllib.request.urlopen(url)
|
||||||
data = resource.read().decode(resource.headers.get_content_charset())
|
data = resource.read().decode(resource.headers.get_content_charset())
|
||||||
return data
|
return data
|
||||||
except:
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -63,7 +68,9 @@ class ChromiumHeadlessDownloader(Downloader):
|
|||||||
self.service = Service("/usr/bin/chromedriver")
|
self.service = Service("/usr/bin/chromedriver")
|
||||||
|
|
||||||
|
|
||||||
def download(self, url):
|
def download(self, url, post=None):
|
||||||
|
if post:
|
||||||
|
raise Exception('POST method with Chromium headless not yet implemented')
|
||||||
print("Download {}".format(url))
|
print("Download {}".format(url))
|
||||||
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ from abc import abstractmethod
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
|
|
||||||
from .extractor import *
|
from .extractor import *
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
@ -56,6 +57,21 @@ class TwoStepsExtractor(Extractor):
|
|||||||
def clean_url(url):
|
def clean_url(url):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
def add_event_start_day(self, url, start_day):
|
||||||
|
if not url in self.event_properties:
|
||||||
|
self.event_properties[url] = {}
|
||||||
|
self.event_properties[url]["start_day"] = start_day
|
||||||
|
|
||||||
|
def add_event_start_time(self, url, start_time):
|
||||||
|
if not url in self.event_properties:
|
||||||
|
self.event_properties[url] = {}
|
||||||
|
self.event_properties[url]["start_time"] = start_time
|
||||||
|
|
||||||
|
def add_event_title(self, url, title):
|
||||||
|
if not url in self.event_properties:
|
||||||
|
self.event_properties[url] = {}
|
||||||
|
self.event_properties[url]["title"] = title
|
||||||
|
|
||||||
def add_event_tag(self, url, tag):
|
def add_event_tag(self, url, tag):
|
||||||
if not url in self.event_properties:
|
if not url in self.event_properties:
|
||||||
self.event_properties[url] = {}
|
self.event_properties[url] = {}
|
||||||
@ -63,10 +79,31 @@ class TwoStepsExtractor(Extractor):
|
|||||||
self.event_properties[url]["tags"] = []
|
self.event_properties[url]["tags"] = []
|
||||||
self.event_properties[url]["tags"].append(tag)
|
self.event_properties[url]["tags"].append(tag)
|
||||||
|
|
||||||
|
def add_event_category(self, url, cat):
|
||||||
|
if not url in self.event_properties:
|
||||||
|
self.event_properties[url] = {}
|
||||||
|
self.event_properties[url]["category"] = cat
|
||||||
|
|
||||||
|
def add_event_location(self, url, loc):
|
||||||
|
if not url in self.event_properties:
|
||||||
|
self.event_properties[url] = {}
|
||||||
|
self.event_properties[url]["location"] = loc
|
||||||
|
|
||||||
def add_event_with_props(self, event_url, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None):
|
def add_event_with_props(self, event_url, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None):
|
||||||
|
|
||||||
if event_url in self.event_properties and 'tags' in self.event_properties[event_url]:
|
if event_url in self.event_properties:
|
||||||
|
if 'tags' in self.event_properties[event_url]:
|
||||||
tags = tags + self.event_properties[event_url]['tags']
|
tags = tags + self.event_properties[event_url]['tags']
|
||||||
|
if 'start_day' in self.event_properties[event_url]:
|
||||||
|
start_day = self.event_properties[event_url]['start_day']
|
||||||
|
if 'start_time' in self.event_properties[event_url]:
|
||||||
|
start_time = self.event_properties[event_url]['start_time']
|
||||||
|
if 'title' in self.event_properties[event_url]:
|
||||||
|
title = self.event_properties[event_url]['title']
|
||||||
|
if 'category' in self.event_properties[event_url]:
|
||||||
|
category = self.event_properties[event_url]['category']
|
||||||
|
if 'location' in self.event_properties[event_url]:
|
||||||
|
location = self.event_properties[event_url]['location']
|
||||||
|
|
||||||
self.add_event(title, category, start_day, location, description, tags, uuid, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt)
|
self.add_event(title, category, start_day, location, description, tags, uuid, recurrences, url_human, start_time, end_day, end_time, last_modified, published, image, image_alt)
|
||||||
|
|
||||||
@ -80,10 +117,13 @@ class TwoStepsExtractor(Extractor):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def extract(self, content, url, url_human = None, default_values = None, published = False):
|
def extract(self, content, url, url_human = None, default_values = None, published = False, only_future=True):
|
||||||
|
self.only_future = only_future
|
||||||
|
self.now = datetime.datetime.now().date()
|
||||||
self.set_header(url)
|
self.set_header(url)
|
||||||
self.clear_events()
|
self.clear_events()
|
||||||
|
|
||||||
|
self.url = url
|
||||||
self.event_urls = None
|
self.event_urls = None
|
||||||
self.event_properties.clear()
|
self.event_properties.clear()
|
||||||
|
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.7 on 2024-04-19 21:44
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('agenda_culturel', '0049_alter_recurrentimport_processor'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='recurrentimport',
|
||||||
|
name='processor',
|
||||||
|
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie')], default='ical', max_length=20, verbose_name='Processor'),
|
||||||
|
),
|
||||||
|
]
|
@ -604,7 +604,10 @@ class Event(models.Model):
|
|||||||
# for each event, check if it's a new one, or a one to be updated
|
# for each event, check if it's a new one, or a one to be updated
|
||||||
for event in events:
|
for event in events:
|
||||||
sdate = date.fromisoformat(event.start_day)
|
sdate = date.fromisoformat(event.start_day)
|
||||||
|
if event.end_day:
|
||||||
edate = date.fromisoformat(event.end_day)
|
edate = date.fromisoformat(event.end_day)
|
||||||
|
else:
|
||||||
|
edate = sdate
|
||||||
if min_date is None or min_date > sdate:
|
if min_date is None or min_date > sdate:
|
||||||
min_date = sdate
|
min_date = sdate
|
||||||
if max_date is None or max_date < sdate:
|
if max_date is None or max_date < sdate:
|
||||||
@ -755,6 +758,7 @@ class RecurrentImport(models.Model):
|
|||||||
ICALNOBUSY = "icalnobusy", _("ical no busy")
|
ICALNOBUSY = "icalnobusy", _("ical no busy")
|
||||||
ICALNOVC = "icalnovc", _("ical no VC")
|
ICALNOVC = "icalnovc", _("ical no VC")
|
||||||
LACOOPE = "lacoope", _('lacoope.org')
|
LACOOPE = "lacoope", _('lacoope.org')
|
||||||
|
LACOMEDIE = "lacomedie", _('la comédie')
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
Reference in New Issue
Block a user