Fix nouveau site puce à l'oreille
This commit is contained in:
parent
62060925cd
commit
6c86a8fc18
@ -29,8 +29,8 @@ from src.agenda_culturel.import_tasks.custom_extractors import *
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
u2e = URL2Events(SimpleDownloader(), lapucealoreille.CExtractor())
|
u2e = URL2Events(SimpleDownloader(), lapucealoreille.CExtractor())
|
||||||
url = "https://www.lapucealoreille63.fr/programmation/"
|
url = "https://www.lapucealoreille63.fr/agenda"
|
||||||
url_human = "https://www.lapucealoreille63.fr/programmation/"
|
url_human = "https://www.lapucealoreille63.fr/agenda"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
events = u2e.process(url, url_human, cache = "cache-lapucealoreille.xml", default_values = {}, published = True)
|
events = u2e.process(url, url_human, cache = "cache-lapucealoreille.xml", default_values = {}, published = True)
|
||||||
|
@ -15,11 +15,6 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
e_url = e.find("a")
|
e_url = e.find("a")
|
||||||
if e_url:
|
if e_url:
|
||||||
if self.add_event_url(e_url["href"]):
|
if self.add_event_url(e_url["href"]):
|
||||||
title = e.select("div[data-testid=richTextElement] h1.font_0 span")
|
|
||||||
if title:
|
|
||||||
title = title[0].contents[0].get_text().replace("\n", " ")
|
|
||||||
title = re.sub(" +", " ", title)
|
|
||||||
self.add_event_title(e_url["href"], title)
|
|
||||||
|
|
||||||
def add_event_from_content(
|
def add_event_from_content(
|
||||||
self,
|
self,
|
||||||
@ -31,9 +26,12 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
):
|
):
|
||||||
soup = BeautifulSoup(event_content, "html.parser")
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
|
title = soup.select("h2")[0].get_text()
|
||||||
|
|
||||||
start_day = self.parse_french_date(
|
start_day = self.parse_french_date(
|
||||||
soup.find("h2").get_text()
|
soup.select("h2")[1].get_text()
|
||||||
) # pas parfait, mais bordel que ce site est mal construit
|
) # pas parfait, mais bordel que ce site est mal construit
|
||||||
|
print(soup.select("h2")[1].get_text())
|
||||||
|
|
||||||
spans = soup.select("div[data-testid=richTextElement] span")
|
spans = soup.select("div[data-testid=richTextElement] span")
|
||||||
start_time = None
|
start_time = None
|
||||||
@ -79,7 +77,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
self.add_event_with_props(
|
self.add_event_with_props(
|
||||||
event_url,
|
event_url,
|
||||||
None,
|
title,
|
||||||
"Concert",
|
"Concert",
|
||||||
start_day,
|
start_day,
|
||||||
location,
|
location,
|
||||||
|
Loading…
Reference in New Issue
Block a user