On ajoute un referer pour toutes les requêtes

This commit is contained in:
Jean-Marie Favreau 2024-06-01 19:58:38 +02:00
parent 9c9abd27dd
commit becce291af
4 changed files with 12 additions and 9 deletions

View File

@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
class CExtractor(TwoStepsExtractor):
nom_lieu = "La Comédie de Clermont"
url_referer = "https://lacomediedeclermont.com/saison23-24/"
def category_comedie2agenda(self, category):
mapping = {

View File

@ -39,19 +39,19 @@ class SimpleDownloader(Downloader):
super().__init__()
def download(self, url, referer=None, post=None):
print("Downloading {}".format(url))
print("Downloading {} referer: {} post: {}".format(url, referer, post))
try:
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
}
if referer is not None:
headers["Referer"] = referer
req = Request(url, headers=headers)
if post:
post_args = urlencode(post).encode("utf-8")
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
}
if referer is not None:
headers["Referer"] = referer
req = Request(url, headers=headers)
resource = urllib.request.urlopen(req, post_args)
else:
resource = urllib.request.urlopen(url)
resource = urllib.request.urlopen(req)
data = resource.read().decode(resource.headers.get_content_charset())
return data
except Exception as e:

View File

@ -47,6 +47,8 @@ class GGCalendar:
# - then for each document downloaded from these urls, build the events
# This class is an abstract class
class TwoStepsExtractor(Extractor):
url_referer=None
def __init__(self):
super().__init__()
self.event_urls = None

View File

@ -13,7 +13,7 @@ class URL2Events:
def process(
self, url, url_human=None, cache=None, default_values=None, published=False
):
content = self.downloader.get_content(url, cache)
content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer)
if content is None:
return None