Importation des événements nature du puy de dôme
This commit is contained in:
parent
20040268e7
commit
280f04d22f
44
experimentations/get_puydedome.py
Executable file
44
experimentations/get_puydedome.py
Executable file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# getting the name of the directory
|
||||||
|
# where the this file is present.
|
||||||
|
current = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Getting the parent directory name
|
||||||
|
# where the current directory is present.
|
||||||
|
parent = os.path.dirname(current)
|
||||||
|
|
||||||
|
# adding the parent directory to
|
||||||
|
# the sys.path.
|
||||||
|
sys.path.append(parent)
|
||||||
|
sys.path.append(parent + "/src")
|
||||||
|
|
||||||
|
from src.agenda_culturel.import_tasks.downloader import *
|
||||||
|
from src.agenda_culturel.import_tasks.extractor import *
|
||||||
|
from src.agenda_culturel.import_tasks.importer import *
|
||||||
|
from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
u2e = URL2Events(SimpleDownloader(), apidae_tourisme.CExtractor())
|
||||||
|
url = "https://widgets.apidae-tourisme.com/filter.js?widget[id]=48"
|
||||||
|
url_human = "https://ens.puy-de-dome.fr/agenda.html"
|
||||||
|
|
||||||
|
try:
|
||||||
|
events = u2e.process(url, url_human, cache = "cache-puydedome.html", default_values = {}, published = True)
|
||||||
|
|
||||||
|
exportfile = "events-puydedome.json"
|
||||||
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
with open(exportfile, "w") as f:
|
||||||
|
json.dump(events, f, indent=4, default=str)
|
||||||
|
except Exception as e:
|
||||||
|
print("Exception: " + str(e))
|
@ -152,6 +152,8 @@ def run_recurrent_import_internal(rimport, downloader, req_id):
|
|||||||
extractor = lerio.CExtractor()
|
extractor = lerio.CExtractor()
|
||||||
elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE:
|
elif rimport.processor == RecurrentImport.PROCESSOR.LARAYMONDE:
|
||||||
extractor = laraymonde.CExtractor()
|
extractor = laraymonde.CExtractor()
|
||||||
|
elif rimport.processor == RecurrentImport.PROCESSOR.APIDAE:
|
||||||
|
extractor = apidae_tourisme.CExtractor()
|
||||||
else:
|
else:
|
||||||
extractor = None
|
extractor = None
|
||||||
|
|
||||||
|
@ -0,0 +1,103 @@
|
|||||||
|
from ..generic_extractors import *
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# A class dedicated to get events from apidae-tourisme widgets
|
||||||
|
class CExtractor(TwoStepsExtractorNoPause):
|
||||||
|
|
||||||
|
|
||||||
|
def build_event_url_list(self, content, infuture_days=180):
|
||||||
|
|
||||||
|
# Get line starting with wrapper.querySelector(".results_agenda").innerHTML = "
|
||||||
|
# split using "=" and keep the end
|
||||||
|
# strip it, and remove the first character (") and the two last ones (";)
|
||||||
|
# remove the escapes and parse the contained html
|
||||||
|
for line in content.split("\n"):
|
||||||
|
if line.startswith('wrapper.querySelector(".results_agenda").innerHTML = "'):
|
||||||
|
html = ('"'.join(line.split('"')[3:])).replace('\\"', '"').replace('\\n', "\n").replace('\\/', '/')
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
links = soup.select('a.widgit_result')
|
||||||
|
for l in links:
|
||||||
|
self.add_event_url(l["data-w-href"])
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
# check for htag
|
||||||
|
for line in event_content.split("\n"):
|
||||||
|
if line.strip().startswith("window.location.hash"):
|
||||||
|
ref = line.split('"')[1]
|
||||||
|
break
|
||||||
|
|
||||||
|
# check for content
|
||||||
|
for line in event_content.split("\n"):
|
||||||
|
if line.startswith('detailsWrapper.innerHTML ='):
|
||||||
|
html = ('"'.join(line.split('"')[1:])).replace('\\"', '"').replace('\\n', "\n").replace('\\/', '/')
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
title = soup.select_one('h2.widgit_title').text.strip()
|
||||||
|
image = soup.select_one('img')
|
||||||
|
image_alt = image["alt"]
|
||||||
|
image = image["src"]
|
||||||
|
description = soup.select('div.desc')
|
||||||
|
description = '\n'.join([d.text for d in description])
|
||||||
|
openings = soup.select_one('.openings .mts').text.strip().split("\n")[0]
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
|
if "tous les" in openings:
|
||||||
|
start_day = None
|
||||||
|
else:
|
||||||
|
start_day = Extractor.parse_french_date(openings)
|
||||||
|
details = openings.split("de")
|
||||||
|
if len(details) > 1:
|
||||||
|
hours = details[1].split("à")
|
||||||
|
start_time = Extractor.parse_french_time(hours[0])
|
||||||
|
if len(hours) > 1:
|
||||||
|
end_time = Extractor.parse_french_time(hours[1])
|
||||||
|
|
||||||
|
contact = soup.select_one(".contact")
|
||||||
|
sa = False
|
||||||
|
location = []
|
||||||
|
for c in contact.children:
|
||||||
|
if c.name == 'h2' and c.text.strip() == "Adresse":
|
||||||
|
sa = True
|
||||||
|
else:
|
||||||
|
if c.name == 'h2' and sa:
|
||||||
|
break
|
||||||
|
if c.name == 'p' and sa:
|
||||||
|
e = c.text.strip()
|
||||||
|
if e != "":
|
||||||
|
location.append(e)
|
||||||
|
|
||||||
|
location = ', '.join(location)
|
||||||
|
|
||||||
|
websites = soup.select("a.website")
|
||||||
|
event_url = url_human + "#" + ref
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
default_values,
|
||||||
|
event_url,
|
||||||
|
title,
|
||||||
|
None,
|
||||||
|
start_day,
|
||||||
|
location,
|
||||||
|
description,
|
||||||
|
[],
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=event_url,
|
||||||
|
start_time=start_time,
|
||||||
|
end_day=start_day,
|
||||||
|
end_time=end_time,
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
image_alt=image_alt
|
||||||
|
)
|
||||||
|
return
|
@ -2010,6 +2010,7 @@ class RecurrentImport(models.Model):
|
|||||||
ARACHNEE = "arachnee", _("Arachnée concert")
|
ARACHNEE = "arachnee", _("Arachnée concert")
|
||||||
LERIO = "rio", _('Le Rio')
|
LERIO = "rio", _('Le Rio')
|
||||||
LARAYMONDE = "raymonde", _('La Raymonde')
|
LARAYMONDE = "raymonde", _('La Raymonde')
|
||||||
|
APIDAE = 'apidae', _('Agenda apidae tourisme')
|
||||||
|
|
||||||
class DOWNLOADER(models.TextChoices):
|
class DOWNLOADER(models.TextChoices):
|
||||||
SIMPLE = "simple", _("simple")
|
SIMPLE = "simple", _("simple")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user