On ajoute un referer pour toutes les requêtes
This commit is contained in:
parent
9c9abd27dd
commit
becce291af
@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
|
||||
# URL pour les humains: https://lacomediedeclermont.com/saison23-24/
|
||||
class CExtractor(TwoStepsExtractor):
|
||||
nom_lieu = "La Comédie de Clermont"
|
||||
url_referer = "https://lacomediedeclermont.com/saison23-24/"
|
||||
|
||||
def category_comedie2agenda(self, category):
|
||||
mapping = {
|
||||
|
@ -39,19 +39,19 @@ class SimpleDownloader(Downloader):
|
||||
super().__init__()
|
||||
|
||||
def download(self, url, referer=None, post=None):
|
||||
print("Downloading {}".format(url))
|
||||
print("Downloading {} referer: {} post: {}".format(url, referer, post))
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||||
}
|
||||
if referer is not None:
|
||||
headers["Referer"] = referer
|
||||
req = Request(url, headers=headers)
|
||||
if post:
|
||||
post_args = urlencode(post).encode("utf-8")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||||
}
|
||||
if referer is not None:
|
||||
headers["Referer"] = referer
|
||||
req = Request(url, headers=headers)
|
||||
resource = urllib.request.urlopen(req, post_args)
|
||||
else:
|
||||
resource = urllib.request.urlopen(url)
|
||||
resource = urllib.request.urlopen(req)
|
||||
data = resource.read().decode(resource.headers.get_content_charset())
|
||||
return data
|
||||
except Exception as e:
|
||||
|
@ -47,6 +47,8 @@ class GGCalendar:
|
||||
# - then for each document downloaded from these urls, build the events
|
||||
# This class is an abstract class
|
||||
class TwoStepsExtractor(Extractor):
|
||||
url_referer=None
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.event_urls = None
|
||||
|
@ -13,7 +13,7 @@ class URL2Events:
|
||||
def process(
|
||||
self, url, url_human=None, cache=None, default_values=None, published=False
|
||||
):
|
||||
content = self.downloader.get_content(url, cache)
|
||||
content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer)
|
||||
|
||||
if content is None:
|
||||
return None
|
||||
|
Loading…
Reference in New Issue
Block a user