Fix nouveau site puce à l'oreille
This commit is contained in:
parent
62060925cd
commit
6c86a8fc18
@ -29,8 +29,8 @@ from src.agenda_culturel.import_tasks.custom_extractors import *
|
||||
if __name__ == "__main__":
|
||||
|
||||
u2e = URL2Events(SimpleDownloader(), lapucealoreille.CExtractor())
|
||||
url = "https://www.lapucealoreille63.fr/programmation/"
|
||||
url_human = "https://www.lapucealoreille63.fr/programmation/"
|
||||
url = "https://www.lapucealoreille63.fr/agenda"
|
||||
url_human = "https://www.lapucealoreille63.fr/agenda"
|
||||
|
||||
try:
|
||||
events = u2e.process(url, url_human, cache = "cache-lapucealoreille.xml", default_values = {}, published = True)
|
||||
|
@ -15,11 +15,6 @@ class CExtractor(TwoStepsExtractor):
|
||||
e_url = e.find("a")
|
||||
if e_url:
|
||||
if self.add_event_url(e_url["href"]):
|
||||
title = e.select("div[data-testid=richTextElement] h1.font_0 span")
|
||||
if title:
|
||||
title = title[0].contents[0].get_text().replace("\n", " ")
|
||||
title = re.sub(" +", " ", title)
|
||||
self.add_event_title(e_url["href"], title)
|
||||
|
||||
def add_event_from_content(
|
||||
self,
|
||||
@ -31,9 +26,12 @@ class CExtractor(TwoStepsExtractor):
|
||||
):
|
||||
soup = BeautifulSoup(event_content, "html.parser")
|
||||
|
||||
title = soup.select("h2")[0].get_text()
|
||||
|
||||
start_day = self.parse_french_date(
|
||||
soup.find("h2").get_text()
|
||||
soup.select("h2")[1].get_text()
|
||||
) # pas parfait, mais bordel que ce site est mal construit
|
||||
print(soup.select("h2")[1].get_text())
|
||||
|
||||
spans = soup.select("div[data-testid=richTextElement] span")
|
||||
start_time = None
|
||||
@ -79,7 +77,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
|
||||
self.add_event_with_props(
|
||||
event_url,
|
||||
None,
|
||||
title,
|
||||
"Concert",
|
||||
start_day,
|
||||
location,
|
||||
|
Loading…
Reference in New Issue
Block a user