Support des imports plugin MEC de Wordpress
This commit is contained in:
parent
3c1368153c
commit
8547a671c4
43
experimentations/get_le_poulailler.py
Executable file
43
experimentations/get_le_poulailler.py
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
|
||||||
|
url = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
|
||||||
|
url_human = "https://www.cabaretlepoulailler.fr/agenda/tout-lagenda/"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-le-poulailler.html", default_values = {"location": "Le Poulailler"}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-le-poulailler.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
43
experimentations/get_les_vinzelles.py
Executable file
43
experimentations/get_les_vinzelles.py
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), wordpress_mec.CExtractor())
|
||||||
|
url = "https://www.lesvinzelles.com/index.php/programme/"
|
||||||
|
url_human = "https://www.lesvinzelles.com/index.php/programme/"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-les-vinzelles.html", default_values = {"location": "Les Vinzelles"}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-les-vinzelles.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -112,6 +112,8 @@ def run_recurrent_import(self, pk):
|
|||||||
extractor = lefotomat.CExtractor()
|
extractor = lefotomat.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.LAPUCEALOREILLE:
|
elif rimport.processor == RecurrentImport.PROCESSOR.LAPUCEALOREILLE:
|
||||||
extractor = lapucealoreille.CExtractor()
|
extractor = lapucealoreille.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.MECWORDPRESS:
|
||||||
|
extractor = wordpress_mec.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
@ -0,0 +1,99 @@
|
|||||||
|
from ..generic_extractors import *
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
# A class dedicated to get events from MEC Wordpress plugin
|
||||||
|
# URL: https://webnus.net/modern-events-calendar/
|
||||||
|
class CExtractor(TwoStepsExtractor):
|
||||||
|
|
||||||
|
def local2agendaCategory(self, category):
|
||||||
|
mapping = {
|
||||||
|
"Musique": "Concert",
|
||||||
|
"CONCERT": "Concert",
|
||||||
|
"VISITE": "Autre",
|
||||||
|
"Spectacle": "Théâtre",
|
||||||
|
"Rencontre": "Autre",
|
||||||
|
"Atelier": "Autre",
|
||||||
|
"Projection": "Autre",
|
||||||
|
}
|
||||||
|
if category in mapping:
|
||||||
|
return mapping[category]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def build_event_url_list(self, content):
|
||||||
|
soup = BeautifulSoup(content, "xml")
|
||||||
|
|
||||||
|
events = soup.select("div.mec-tile-event-content")
|
||||||
|
for e in events:
|
||||||
|
link = e.select("h4.mec-event-title a")
|
||||||
|
if len(link) == 1:
|
||||||
|
url = link[0]["href"]
|
||||||
|
title = link[0].get_text()
|
||||||
|
|
||||||
|
if self.add_event_url(url):
|
||||||
|
print(url, title)
|
||||||
|
self.add_event_title(url, title)
|
||||||
|
|
||||||
|
categories = e.select(".mec-label-normal")
|
||||||
|
if len(categories) == 0:
|
||||||
|
categories = e.select(".mec-category")
|
||||||
|
if len(categories) > 0:
|
||||||
|
category = self.local2agendaCategory(categories[0].get_text())
|
||||||
|
if category is not None:
|
||||||
|
self.add_event_category(url, category)
|
||||||
|
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
soup = BeautifulSoup(event_content, "xml")
|
||||||
|
|
||||||
|
start_day = soup.select(".mec-start-date-label")
|
||||||
|
if start_day and len(start_day) > 0:
|
||||||
|
start_day = self.parse_french_date(start_day[0].get_text())
|
||||||
|
else:
|
||||||
|
start_day = None
|
||||||
|
t = soup.select(".mec-single-event-time .mec-events-abbr")
|
||||||
|
if t:
|
||||||
|
t = t[0].get_text().split("-")
|
||||||
|
start_time = self.parse_french_time(t[0])
|
||||||
|
if len(t) > 1:
|
||||||
|
end_time = self.parse_french_time(t[1])
|
||||||
|
else:
|
||||||
|
end_time = None
|
||||||
|
else:
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
|
|
||||||
|
image = soup.select(".mec-events-event-image img")
|
||||||
|
if image:
|
||||||
|
image = image[0]["src"]
|
||||||
|
else:
|
||||||
|
image = None
|
||||||
|
description = soup.select(".mec-event-content")[0].get_text()
|
||||||
|
|
||||||
|
url_human = event_url
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
event_url,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
start_day,
|
||||||
|
None if "location" not in default_values else default_values["location"],
|
||||||
|
description,
|
||||||
|
None,
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=url_human,
|
||||||
|
start_time=start_time,
|
||||||
|
end_day=None,
|
||||||
|
end_time=end_time,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
)
|
@ -2,6 +2,10 @@ from abc import abstractmethod
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
from .extractor import *
|
from .extractor import *
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
@ -171,6 +175,7 @@ class TwoStepsExtractor(Extractor):
|
|||||||
default_values=None,
|
default_values=None,
|
||||||
published=False,
|
published=False,
|
||||||
only_future=True,
|
only_future=True,
|
||||||
|
ignore_404=True
|
||||||
):
|
):
|
||||||
self.only_future = only_future
|
self.only_future = only_future
|
||||||
self.now = datetime.datetime.now().date()
|
self.now = datetime.datetime.now().date()
|
||||||
@ -195,10 +200,16 @@ class TwoStepsExtractor(Extractor):
|
|||||||
# first download the content associated with this link
|
# first download the content associated with this link
|
||||||
content_event = self.downloader.get_content(event_url)
|
content_event = self.downloader.get_content(event_url)
|
||||||
if content_event is None:
|
if content_event is None:
|
||||||
raise Exception(_("Cannot extract event from url {}").format(event_url))
|
msg = "Cannot extract event from url {}".format(event_url)
|
||||||
# then extract event information from this html document
|
if ignore_404:
|
||||||
self.add_event_from_content(
|
logger.error(msg)
|
||||||
content_event, event_url, url_human, default_values, published
|
else:
|
||||||
)
|
print("go")
|
||||||
|
raise Exception(msg)
|
||||||
|
else:
|
||||||
|
# then extract event information from this html document
|
||||||
|
self.add_event_from_content(
|
||||||
|
content_event, event_url, url_human, default_values, published
|
||||||
|
)
|
||||||
|
|
||||||
return self.get_structure()
|
return self.get_structure()
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 4.2.7 on 2024-06-02 12:01
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('agenda_culturel', '0063_alter_event_exact_location'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='recurrentimport',
|
||||||
|
name='processor',
|
||||||
|
field=models.CharField(choices=[('ical', 'ical'), ('icalnobusy', 'ical no busy'), ('icalnovc', 'ical no VC'), ('lacoope', 'lacoope.org'), ('lacomedie', 'la comédie'), ('lefotomat', 'le fotomat'), ('lapucealoreille', 'la puce à loreille'), ('Plugin wordpress MEC', 'Plugin wordpress MEC')], default='ical', max_length=20, verbose_name='Processor'),
|
||||||
|
),
|
||||||
|
]
|
@ -1194,6 +1194,7 @@ class RecurrentImport(models.Model):
|
|||||||
LACOMEDIE = "lacomedie", _("la comédie")
|
LACOMEDIE = "lacomedie", _("la comédie")
|
||||||
LEFOTOMAT = "lefotomat", _("le fotomat")
|
LEFOTOMAT = "lefotomat", _("le fotomat")
|
||||||
LAPUCEALOREILLE = "lapucealoreille", _("la puce à l" "oreille")
|
LAPUCEALOREILLE = "lapucealoreille", _("la puce à l" "oreille")
|
||||||
|
MECWORDPRESS = "Plugin wordpress MEC", _("Plugin wordpress MEC")
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user