74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
from ..generic_extractors import *
|
|
import re
|
|
import json5
|
|
from datetime import timedelta
|
|
|
|
|
|
# A class dedicated to get events from La puce à l'oreille
|
|
# URL: https://www.lapucealoreille63.fr/
|
|
class CExtractor(TwoStepsExtractor):
|
|
|
|
nom_lieu = "La Puce à l'Oreille"
|
|
|
|
def build_event_url_list(self, content):
|
|
soup = BeautifulSoup(content, "html.parser")
|
|
|
|
events = soup.select("div.SPY_vo div[data-testid=mesh-container-content]")
|
|
for e in events:
|
|
e_url = e.find("a")
|
|
if e_url:
|
|
if self.add_event_url(e_url["href"]):
|
|
title = e.select("div[data-testid=richTextElement] h1.font_0 span")
|
|
if title:
|
|
title = title[0].contents[0].get_text().replace("\n", " ")
|
|
title = re.sub(" +", " ", title)
|
|
self.add_event_title(e_url["href"], title)
|
|
|
|
|
|
def add_event_from_content(self, event_content, event_url, url_human = None, default_values = None, published = False):
|
|
soup = BeautifulSoup(event_content, "html.parser")
|
|
|
|
start_day = self.parse_french_date(soup.find("h2").get_text()) # pas parfait, mais bordel que ce site est mal construit
|
|
|
|
spans = soup.select("div[data-testid=richTextElement] span")
|
|
start_time = None
|
|
end_time = None
|
|
location = None
|
|
|
|
for span in spans:
|
|
txt = span.get_text()
|
|
if txt.lstrip().startswith("DÉBUT"):
|
|
start_time = self.parse_french_time(txt.split(":")[-1])
|
|
end_time = None
|
|
elif txt.lstrip().startswith("HORAIRES :"):
|
|
hs = txt.split(":")[-1].split("-")
|
|
start_time = self.parse_french_time(hs[0])
|
|
if len(hs) > 1:
|
|
end_time = self.parse_french_time(hs[1])
|
|
else:
|
|
end_time = None
|
|
elif txt.lstrip().startswith("LIEU :") and not location:
|
|
location = txt.split(":")[-1].lstrip()
|
|
|
|
if not location:
|
|
location = self.nom_lieu
|
|
end_day = self.guess_end_day(start_day, start_time, end_time)
|
|
|
|
url_human = event_url
|
|
tags = []
|
|
|
|
image = soup.select("wow-image img[fetchpriority=high]")
|
|
if image:
|
|
image = image[0]["src"]
|
|
else:
|
|
image = None
|
|
|
|
descriptions = soup.select("div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]")
|
|
if descriptions:
|
|
descriptions = [d.get_text() for d in descriptions]
|
|
description = max(descriptions, key=len)
|
|
else:
|
|
description = None
|
|
|
|
self.add_event_with_props(event_url, None, "Concert", start_day, location, description, tags, recurrences=None, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, published=published, image=image)
|