Oups
This commit is contained in:
parent
9b898d26da
commit
55a0094e2f
@ -0,0 +1,156 @@
|
|||||||
|
from ..generic_extractors import *
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
# A class dedicated to get events from La Cour des 3 Coquins and Graines de spectacle
|
||||||
|
# URL: https://billetterie-c3c.clermont-ferrand.fr//
|
||||||
|
class CExtractor(TwoStepsExtractor):
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
self,
|
||||||
|
content,
|
||||||
|
url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
only_future=True,
|
||||||
|
ignore_404=True):
|
||||||
|
self.root_address = "https://" + urlparse(url).netloc + "/"
|
||||||
|
return super().extract(content, url, url_human, default_values, published, only_future, ignore_404)
|
||||||
|
|
||||||
|
def category_agenda(self, category):
|
||||||
|
if not category:
|
||||||
|
return None
|
||||||
|
mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
|
||||||
|
mapping_tag = {"Théâtre": "🎭 théâtre", "Concert": "🎵 concert", "Projection": None}
|
||||||
|
if category in mapping:
|
||||||
|
return mapping[category], mapping_tag[category]
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def build_event_url_list(self, content):
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
events = soup.select("div.fiche-info")
|
||||||
|
|
||||||
|
for e in events:
|
||||||
|
e_url = e.select_one("a.btn.lien_savoir_plus")["href"]
|
||||||
|
if e_url != "":
|
||||||
|
e_url = self.url + "/" + e_url
|
||||||
|
self.add_event_url(e_url)
|
||||||
|
|
||||||
|
def add_event_from_content(
|
||||||
|
self,
|
||||||
|
event_content,
|
||||||
|
event_url,
|
||||||
|
url_human=None,
|
||||||
|
default_values=None,
|
||||||
|
published=False,
|
||||||
|
):
|
||||||
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
|
title = soup.select_one("h1")
|
||||||
|
if title:
|
||||||
|
title = title.text
|
||||||
|
|
||||||
|
image = soup.select_one("#media .swiper-slide img")
|
||||||
|
if image:
|
||||||
|
image = image["src"]
|
||||||
|
else:
|
||||||
|
image = None
|
||||||
|
|
||||||
|
description = soup.select_one(".presentation").get_text()
|
||||||
|
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
||||||
|
if not duration is None:
|
||||||
|
duration = Extractor.parse_french_time(duration.text)
|
||||||
|
|
||||||
|
location = soup.select_one("#criteres .LIEU-V .valeur-critere li")
|
||||||
|
if not location is None:
|
||||||
|
location = location.text
|
||||||
|
|
||||||
|
categories = []
|
||||||
|
tags = []
|
||||||
|
for t in soup.select(".sous-titre span"):
|
||||||
|
classes = t.get("class")
|
||||||
|
if classes and len(classes) > 0:
|
||||||
|
if classes[0].startswith("LIEU-"):
|
||||||
|
location = t.text
|
||||||
|
elif classes[0].startswith("THEMATIQUE-"):
|
||||||
|
cat, tag = self.category_agenda(t.text)
|
||||||
|
if cat:
|
||||||
|
categories.append(cat)
|
||||||
|
if tag:
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
# TODO: parser les dates, récupérer les heures ()
|
||||||
|
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
|
||||||
|
|
||||||
|
patternCodeSite = re.compile(r'.*gsw_vars\["CODEPRESTATAIRE"\] = "(.*?)";.*', flags=re.DOTALL)
|
||||||
|
patternCodeObject = re.compile(r'.*gsw_vars\["CODEPRESTATION"\] = "(.*?)";.*', flags=re.DOTALL)
|
||||||
|
patternCodeMoteur = re.compile(r'.*Resa.init_moteur_resa\(\'([0-9]+)\'\);.*', flags=re.DOTALL)
|
||||||
|
scripts = soup.find_all('script')
|
||||||
|
codeSite = ""
|
||||||
|
idObject = ""
|
||||||
|
moteur = ""
|
||||||
|
for script in scripts:
|
||||||
|
if(patternCodeSite.match(str(script.string))):
|
||||||
|
data = patternCodeSite.match(script.string)
|
||||||
|
codeSite = data.groups()[0]
|
||||||
|
if(patternCodeObject.match(str(script.string))):
|
||||||
|
data = patternCodeObject.match(script.string)
|
||||||
|
idObject = data.groups()[0]
|
||||||
|
if(patternCodeMoteur.match(str(script.string))):
|
||||||
|
data = patternCodeMoteur.match(script.string)
|
||||||
|
moteur = data.groups()[0]
|
||||||
|
|
||||||
|
|
||||||
|
pause = self.downloader.pause
|
||||||
|
self.downloader.pause = False
|
||||||
|
|
||||||
|
# get exact schedule need two supplementary requests
|
||||||
|
datetimes = []
|
||||||
|
if codeSite != "" and idObject != "" and moteur != "":
|
||||||
|
for date in dates:
|
||||||
|
# the first page is required such that the server knows the selected date
|
||||||
|
page1 = self.downloader.get_content(self.root_address + "/booking?action=searchAjax&cid=" + moteur + "&afficheDirectDispo=" + date + "&type_prestataire=V&cle_fiche=PRESTATION-V-" + codeSite + "-" + idObject + "&datedeb=" + date)
|
||||||
|
# then we get the form with hours
|
||||||
|
page2 = self.downloader.get_content(self.root_address + "/booking?action=detailTarifsPrestationAjax&prestation=V-" + codeSite + "-" + idObject)
|
||||||
|
soup2 = BeautifulSoup(page2, "html.parser")
|
||||||
|
times = [o.text for o in soup2.select("#quart_en_cours_spec option")]
|
||||||
|
for t in times:
|
||||||
|
startdate = Extractor.parse_french_date(date)
|
||||||
|
starttime = Extractor.parse_french_time(t)
|
||||||
|
start = datetime.datetime.combine(startdate, starttime)
|
||||||
|
enddate = None
|
||||||
|
endtime = None
|
||||||
|
if duration is not None:
|
||||||
|
end = start + timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second)
|
||||||
|
enddate = end.date()
|
||||||
|
endtime = end.time()
|
||||||
|
datetimes.append((startdate, starttime, enddate, endtime))
|
||||||
|
self.downloader.pause = pause
|
||||||
|
|
||||||
|
category = None
|
||||||
|
if len(categories) > 0:
|
||||||
|
category = categories[0]
|
||||||
|
|
||||||
|
for dt in datetimes:
|
||||||
|
|
||||||
|
self.add_event_with_props(
|
||||||
|
default_values,
|
||||||
|
event_url,
|
||||||
|
title,
|
||||||
|
category,
|
||||||
|
dt[0],
|
||||||
|
location,
|
||||||
|
description,
|
||||||
|
tags,
|
||||||
|
recurrences=None,
|
||||||
|
uuids=[event_url],
|
||||||
|
url_human=url_human,
|
||||||
|
start_time=dt[1],
|
||||||
|
end_day=dt[2],
|
||||||
|
end_time=dt[3],
|
||||||
|
published=published,
|
||||||
|
image=image,
|
||||||
|
)
|
Loading…
x
Reference in New Issue
Block a user