On ajoute un referer et un user-agent pour ne pas se prendre une erreur 406
This commit is contained in:
parent
4e41efb75a
commit
9c9abd27dd
@ -28,7 +28,9 @@ class CExtractor(TwoStepsExtractor):
|
||||
for d in list(set(dates)):
|
||||
if not self.only_future or self.now <= datetime.date.fromisoformat(d):
|
||||
events = self.downloader.get_content(
|
||||
url, post={"action": "load_evenements_jour", "jour": d}
|
||||
url,
|
||||
post={"action": "load_evenements_jour", "jour": d},
|
||||
referer="https://lacomediedeclermont.com/saison23-24/"
|
||||
)
|
||||
if events:
|
||||
events = json5.loads(events)
|
||||
|
@ -1,5 +1,6 @@
|
||||
from urllib.parse import urlencode
|
||||
import urllib.request
|
||||
from urllib.request import Request
|
||||
import os
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
@ -15,13 +16,13 @@ class Downloader(ABC):
|
||||
def download(self, url, post=None):
|
||||
pass
|
||||
|
||||
def get_content(self, url, cache=None, post=None):
|
||||
def get_content(self, url, cache=None, referer=None, post=None):
|
||||
if cache and os.path.exists(cache):
|
||||
print("Loading cache ({})".format(cache))
|
||||
with open(cache) as f:
|
||||
content = "\n".join(f.readlines())
|
||||
else:
|
||||
content = self.download(url, post)
|
||||
content = self.download(url, referer=referer, post=post)
|
||||
|
||||
if cache:
|
||||
print("Saving cache ({})".format(cache))
|
||||
@ -37,13 +38,18 @@ class SimpleDownloader(Downloader):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def download(self, url, post=None):
|
||||
def download(self, url, referer=None, post=None):
|
||||
print("Downloading {}".format(url))
|
||||
|
||||
try:
|
||||
if post:
|
||||
post_args = urlencode(post).encode()
|
||||
resource = urllib.request.urlopen(url, post_args)
|
||||
post_args = urlencode(post).encode("utf-8")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||||
}
|
||||
if referer is not None:
|
||||
headers["Referer"] = referer
|
||||
req = Request(url, headers=headers)
|
||||
resource = urllib.request.urlopen(req, post_args)
|
||||
else:
|
||||
resource = urllib.request.urlopen(url)
|
||||
data = resource.read().decode(resource.headers.get_content_charset())
|
||||
|
Loading…
Reference in New Issue
Block a user