On ajoute un referer et un user-agent pour ne pas se prendre une erreur 406
This commit is contained in:
parent
4e41efb75a
commit
9c9abd27dd
@ -28,7 +28,9 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
for d in list(set(dates)):
|
for d in list(set(dates)):
|
||||||
if not self.only_future or self.now <= datetime.date.fromisoformat(d):
|
if not self.only_future or self.now <= datetime.date.fromisoformat(d):
|
||||||
events = self.downloader.get_content(
|
events = self.downloader.get_content(
|
||||||
url, post={"action": "load_evenements_jour", "jour": d}
|
url,
|
||||||
|
post={"action": "load_evenements_jour", "jour": d},
|
||||||
|
referer="https://lacomediedeclermont.com/saison23-24/"
|
||||||
)
|
)
|
||||||
if events:
|
if events:
|
||||||
events = json5.loads(events)
|
events = json5.loads(events)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from urllib.request import Request
|
||||||
import os
|
import os
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
@ -15,13 +16,13 @@ class Downloader(ABC):
|
|||||||
def download(self, url, post=None):
|
def download(self, url, post=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_content(self, url, cache=None, post=None):
|
def get_content(self, url, cache=None, referer=None, post=None):
|
||||||
if cache and os.path.exists(cache):
|
if cache and os.path.exists(cache):
|
||||||
print("Loading cache ({})".format(cache))
|
print("Loading cache ({})".format(cache))
|
||||||
with open(cache) as f:
|
with open(cache) as f:
|
||||||
content = "\n".join(f.readlines())
|
content = "\n".join(f.readlines())
|
||||||
else:
|
else:
|
||||||
content = self.download(url, post)
|
content = self.download(url, referer=referer, post=post)
|
||||||
|
|
||||||
if cache:
|
if cache:
|
||||||
print("Saving cache ({})".format(cache))
|
print("Saving cache ({})".format(cache))
|
||||||
@ -37,13 +38,18 @@ class SimpleDownloader(Downloader):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def download(self, url, post=None):
|
def download(self, url, referer=None, post=None):
|
||||||
print("Downloading {}".format(url))
|
print("Downloading {}".format(url))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if post:
|
if post:
|
||||||
post_args = urlencode(post).encode()
|
post_args = urlencode(post).encode("utf-8")
|
||||||
resource = urllib.request.urlopen(url, post_args)
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0",
|
||||||
|
}
|
||||||
|
if referer is not None:
|
||||||
|
headers["Referer"] = referer
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
resource = urllib.request.urlopen(req, post_args)
|
||||||
else:
|
else:
|
||||||
resource = urllib.request.urlopen(url)
|
resource = urllib.request.urlopen(url)
|
||||||
data = resource.read().decode(resource.headers.get_content_charset())
|
data = resource.read().decode(resource.headers.get_content_charset())
|
||||||
|
Loading…
Reference in New Issue
Block a user