On ajoute un referer et un user-agent pour ne pas se prendre une erreur 406

This commit is contained in:
Jean-Marie Favreau 2024-06-01 19:41:25 +02:00
parent 4e41efb75a
commit 9c9abd27dd
2 changed files with 15 additions and 7 deletions

View File

@ -28,7 +28,9 @@ class CExtractor(TwoStepsExtractor):
for d in list(set(dates)): for d in list(set(dates)):
if not self.only_future or self.now <= datetime.date.fromisoformat(d): if not self.only_future or self.now <= datetime.date.fromisoformat(d):
events = self.downloader.get_content( events = self.downloader.get_content(
url, post={"action": "load_evenements_jour", "jour": d} url,
post={"action": "load_evenements_jour", "jour": d},
referer="https://lacomediedeclermont.com/saison23-24/"
) )
if events: if events:
events = json5.loads(events) events = json5.loads(events)

View File

@ -1,5 +1,6 @@
from urllib.parse import urlencode from urllib.parse import urlencode
import urllib.request import urllib.request
from urllib.request import Request
import os import os
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
@ -15,13 +16,13 @@ class Downloader(ABC):
def download(self, url, post=None): def download(self, url, post=None):
pass pass
def get_content(self, url, cache=None, post=None): def get_content(self, url, cache=None, referer=None, post=None):
if cache and os.path.exists(cache): if cache and os.path.exists(cache):
print("Loading cache ({})".format(cache)) print("Loading cache ({})".format(cache))
with open(cache) as f: with open(cache) as f:
content = "\n".join(f.readlines()) content = "\n".join(f.readlines())
else: else:
content = self.download(url, post) content = self.download(url, referer=referer, post=post)
if cache: if cache:
print("Saving cache ({})".format(cache)) print("Saving cache ({})".format(cache))
@ -37,13 +38,18 @@ class SimpleDownloader(Downloader):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
def download(self, url, post=None): def download(self, url, referer=None, post=None):
print("Downloading {}".format(url)) print("Downloading {}".format(url))
try: try:
if post: if post:
post_args = urlencode(post).encode() post_args = urlencode(post).encode("utf-8")
resource = urllib.request.urlopen(url, post_args) headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
}
if referer is not None:
headers["Referer"] = referer
req = Request(url, headers=headers)
resource = urllib.request.urlopen(req, post_args)
else: else:
resource = urllib.request.urlopen(url) resource = urllib.request.urlopen(url)
data = resource.read().decode(resource.headers.get_content_charset()) data = resource.read().decode(resource.headers.get_content_charset())