From 9d84f4f63035ba4ce3a3ee631968f8b2c7eefb7c Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sun, 19 Jan 2025 14:20:08 +0100 Subject: [PATCH] L'url des images est absolue --- .../import_tasks/custom_extractors/iguana_agenda.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py b/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py index 709985b..313870c 100644 --- a/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py +++ b/src/agenda_culturel/import_tasks/custom_extractors/iguana_agenda.py @@ -1,6 +1,7 @@ from ..generic_extractors import * from bs4 import BeautifulSoup from datetime import datetime +from urllib.parse import urlparse # A class dedicated to get events from Raymond Bar # URL: https://www.raymondbar.net/ @@ -63,6 +64,8 @@ class CExtractor(TwoStepsExtractorNoPause): published=False, ): + root_address_human = "https://" + urlparse(self.url_human).netloc + "/" + soup = BeautifulSoup(event_content, "xml") @@ -71,7 +74,7 @@ class CExtractor(TwoStepsExtractorNoPause): soup = BeautifulSoup(content, "html.parser") - image = soup.select_one(".image img")["src"] + image = root_address_human + soup.select_one(".image img")["src"] description = soup.select_one(".rightcolumn .content").text location = soup.select_one(".infos .location").text public = soup.select_one(".infos .public").text