- Fix crash récupération événement FB

- Tentative d'accélération quand on utilise Selenium (objet créé au chargement de Celery)
This commit is contained in:
Jean-Marie Favreau 2024-06-26 10:45:56 +02:00
parent 7227fd45ae
commit 623274b1a6
4 changed files with 11 additions and 5 deletions

View File

@ -20,6 +20,8 @@ app = Celery("agenda_culturel")
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
chromiumDownloader = ChromiumHeadlessDownloader()
# Using a string here means the worker doesn't have to serialize # Using a string here means the worker doesn't have to serialize
# the configuration object to child processes. # the configuration object to child processes.
@ -96,7 +98,7 @@ def run_recurrent_import(self, pk):
downloader = ( downloader = (
SimpleDownloader() SimpleDownloader()
if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE
else ChromiumHeadlessDownloader() else chromiumDownloader
) )
if rimport.processor == RecurrentImport.PROCESSOR.ICAL: if rimport.processor == RecurrentImport.PROCESSOR.ICAL:
extractor = ICALExtractor() extractor = ICALExtractor()

View File

@ -67,14 +67,14 @@ class ChromiumHeadlessDownloader(Downloader):
self.options.add_argument("--disable-dev-shm-usage") self.options.add_argument("--disable-dev-shm-usage")
self.options.add_argument("--no-sandbox") self.options.add_argument("--no-sandbox")
self.service = Service("/usr/bin/chromedriver") self.service = Service("/usr/bin/chromedriver")
self.driver = webdriver.Chrome(service=self.service, options=self.options)
def download(self, url, post=None): def download(self, url, referer=None, post=None):
if post: if post:
raise Exception("POST method with Chromium headless not yet implemented") raise Exception("POST method with Chromium headless not yet implemented")
print("Download {}".format(url)) print("Download {}".format(url))
self.driver = webdriver.Chrome(service=self.service, options=self.options)
self.driver.get(url) self.driver.get(url)
doc = self.driver.page_source doc = self.driver.page_source
self.driver.quit() self.driver.close()
return doc return doc

View File

@ -16,6 +16,7 @@ class Extractor(ABC):
self.header = {} self.header = {}
self.events = [] self.events = []
self.downloader = None self.downloader = None
self.referer = ""
def guess_end_day(self, start_day, start_time, end_time): def guess_end_day(self, start_day, start_time, end_time):
if end_time: if end_time:

View File

@ -13,7 +13,10 @@ class URL2Events:
def process( def process(
self, url, url_human=None, cache=None, default_values=None, published=False self, url, url_human=None, cache=None, default_values=None, published=False
): ):
content = self.downloader.get_content(url, cache, referer=self.extractor.url_referer) referer = ""
if self.extractor:
referer = self.extractor.url_referer
content = self.downloader.get_content(url, cache, referer=referer)
if content is None: if content is None:
return None return None