65 lines
2.5 KiB
Python
65 lines
2.5 KiB
Python
from ..generic_extractors import *
|
|
import re
|
|
import json5
|
|
from datetime import timedelta
|
|
|
|
# A class dedicated to get events from La Coopérative de Mai:
|
|
# URL: https://www.lacoope.org/concerts-calendrier/
|
|
class CExtractor(TwoStepsExtractor):
|
|
|
|
nom_lieu = "La Coopérative de Mai"
|
|
|
|
def build_event_url_list(self, content):
|
|
soup = BeautifulSoup(content, "html.parser")
|
|
script = soup.find('div', class_="js-filter__results").findChildren('script')
|
|
if len(script) == 0:
|
|
raise Exception("Cannot find events in the first page")
|
|
script = script[0]
|
|
search = re.search(r"window.fullCalendarContent = (.*)</script>", str(script), re.S)
|
|
if search:
|
|
data = json5.loads(search.group(1))
|
|
for e in data['events']:
|
|
self.add_event_url(e['url'])
|
|
if e['tag'] == "Gratuit":
|
|
self.add_event_tag(e['url'], 'gratuit')
|
|
|
|
else:
|
|
raise Exception('Cannot extract events from javascript')
|
|
|
|
|
|
def add_event_from_content(self, event_content, event_url, url_human = None, default_values = None, published = False):
|
|
soup = BeautifulSoup(event_content, "html.parser")
|
|
|
|
title = soup.find("h1").contents[0]
|
|
category = "Concert"
|
|
image = soup.find("meta", property="og:image")
|
|
if image:
|
|
image = image["content"]
|
|
|
|
description = soup.find("div", class_="grid-concert-content")
|
|
if description:
|
|
description = description.find('div', class_="content-striped")
|
|
if description:
|
|
description = description.find('div', class_='wysiwyg')
|
|
if description:
|
|
description = description.get_text()
|
|
if description is None:
|
|
description = ""
|
|
|
|
tags = []
|
|
|
|
link_calendar = soup.select('a[href^="https://calendar.google.com/calendar/"]')
|
|
if len(link_calendar) == 0:
|
|
raise Exception('Cannot find the google calendar url')
|
|
|
|
gg_cal = GGCalendar(link_calendar[0]["href"])
|
|
start_day = gg_cal.start_day
|
|
start_time = gg_cal.start_time
|
|
end_day = gg_cal.end_day
|
|
end_time = gg_cal.end_time
|
|
location = CExtractor.nom_lieu
|
|
url_human = event_url
|
|
|
|
self.add_event_with_props(event_url, title, category, start_day, location, description, tags, recurrences=None, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, published=published, image=image)
|
|
|