from ..generic_extractors import * import re import json5 from datetime import timedelta # A class dedicated to get events from La puce à l'oreille # URL: https://www.lapucealoreille63.fr/ class CExtractor(TwoStepsExtractor): nom_lieu = "La Puce à l'Oreille" def build_event_url_list(self, content): soup = BeautifulSoup(content, "html.parser") events = soup.select("div.SPY_vo div[data-testid=mesh-container-content]") for e in events: e_url = e.find("a") if e_url: if self.add_event_url(e_url["href"]): title = e.select("div[data-testid=richTextElement] h1.font_0 span") if title: title = title[0].contents[0].get_text().replace("\n", " ") title = re.sub(" +", " ", title) self.add_event_title(e_url["href"], title) def add_event_from_content(self, event_content, event_url, url_human = None, default_values = None, published = False): soup = BeautifulSoup(event_content, "html.parser") start_day = self.parse_french_date(soup.find("h2").get_text()) # pas parfait, mais bordel que ce site est mal construit spans = soup.select("div[data-testid=richTextElement] span") start_time = None end_time = None location = None for span in spans: txt = span.get_text() if txt.lstrip().startswith("DÉBUT"): start_time = self.parse_french_time(txt.split(":")[-1]) end_time = None elif txt.lstrip().startswith("HORAIRES :"): hs = txt.split(":")[-1].split("-") start_time = self.parse_french_time(hs[0]) if len(hs) > 1: end_time = self.parse_french_time(hs[1]) else: end_time = None elif txt.lstrip().startswith("LIEU :") and not location: location = txt.split(":")[-1].lstrip() if not location: location = self.nom_lieu end_day = self.guess_end_day(start_day, start_time, end_time) url_human = event_url tags = [] image = soup.select("wow-image img[fetchpriority=high]") if image: image = image[0]["src"] else: image = None descriptions = soup.select("div[data-testid=mesh-container-content] div[data-testid=inline-content] div[data-testid=mesh-container-content] div[data-testid=richTextElement]") if descriptions: descriptions = [d.get_text() for d in descriptions] description = max(descriptions, key=len) else: description = None self.add_event_with_props(event_url, None, "Concert", start_day, location, description, tags, recurrences=None, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, published=published, image=image)