On ajoute un referer et un user-agent pour ne pas se prendre une erreur 406

This commit is contained in:
Jean-Marie Favreau 2024-06-01 19:41:25 +02:00
parent 4e41efb75a
commit 9c9abd27dd
2 changed files with 15 additions and 7 deletions

View File

@ -28,7 +28,9 @@ class CExtractor(TwoStepsExtractor):
for d in list(set(dates)):
if not self.only_future or self.now <= datetime.date.fromisoformat(d):
events = self.downloader.get_content(
url, post={"action": "load_evenements_jour", "jour": d}
url,
post={"action": "load_evenements_jour", "jour": d},
referer="https://lacomediedeclermont.com/saison23-24/"
)
if events:
events = json5.loads(events)

View File

@ -1,5 +1,6 @@
from urllib.parse import urlencode
import urllib.request
from urllib.request import Request
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
@ -15,13 +16,13 @@ class Downloader(ABC):
def download(self, url, post=None):
pass
def get_content(self, url, cache=None, post=None):
def get_content(self, url, cache=None, referer=None, post=None):
if cache and os.path.exists(cache):
print("Loading cache ({})".format(cache))
with open(cache) as f:
content = "\n".join(f.readlines())
else:
content = self.download(url, post)
content = self.download(url, referer=referer, post=post)
if cache:
print("Saving cache ({})".format(cache))
@ -37,13 +38,18 @@ class SimpleDownloader(Downloader):
def __init__(self):
super().__init__()
def download(self, url, post=None):
def download(self, url, referer=None, post=None):
print("Downloading {}".format(url))
try:
if post:
post_args = urlencode(post).encode()
resource = urllib.request.urlopen(url, post_args)
post_args = urlencode(post).encode("utf-8")
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
}
if referer is not None:
headers["Referer"] = referer
req = Request(url, headers=headers)
resource = urllib.request.urlopen(req, post_args)
else:
resource = urllib.request.urlopen(url)
data = resource.read().decode(resource.headers.get_content_charset())