diff --git a/deployment/Dockerfile b/deployment/Dockerfile
index 533a9e8..7f038bf 100644
--- a/deployment/Dockerfile
+++ b/deployment/Dockerfile
@@ -5,10 +5,11 @@ WORKDIR /usr/src/app
RUN --mount=type=cache,target=/var/cache/apt \
apt-get update && \
- apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin \
+ apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin fonts-symbola \
&& rm -rf /var/lib/apt/lists/*
+
COPY src/requirements.txt ./requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py
index 67672bb..1932cb9 100644
--- a/src/agenda_culturel/celery.py
+++ b/src/agenda_culturel/celery.py
@@ -6,7 +6,8 @@ from celery.schedules import crontab
from celery.utils.log import get_task_logger
from celery.exceptions import MaxRetriesExceededError
import time as time_
-
+from django.conf import settings
+from celery.signals import worker_ready
from contextlib import contextmanager
@@ -250,6 +251,23 @@ def daily_imports(self):
run_recurrent_imports_from_list([imp.pk for imp in imports])
+SCREENSHOT_FILE = settings.MEDIA_ROOT + '/screenshot.png'
+
+@app.task(bind=True)
+def screenshot(self):
+ downloader = ChromiumHeadlessDownloader(noimage=False)
+ downloader.screenshot("https://pommesdelune.fr", SCREENSHOT_FILE)
+
+@worker_ready.connect
+def at_start(sender, **k):
+ if not os.path.isfile(SCREENSHOT_FILE):
+ logger.info("Init screenshot file")
+ with sender.app.connection() as conn:
+ sender.app.send_task('agenda_culturel.celery.screenshot', None, connection=conn)
+ else:
+ logger.info("Screenshot file already exists")
+
+
@app.task(bind=True)
def run_all_recurrent_imports(self):
from agenda_culturel.models import RecurrentImport
@@ -370,6 +388,10 @@ app.conf.beat_schedule = {
# Daily imports at 3:14 a.m.
"schedule": crontab(hour=3, minute=14),
},
+ "daily_screenshot": {
+ "task": "agenda_culturel.celery.screenshot",
+ "schedule": crontab(hour=3, minute=3),
+ },
"weekly_imports": {
"task": "agenda_culturel.celery.weekly_imports",
# Daily imports on Mondays at 2:22 a.m.
diff --git a/src/agenda_culturel/import_tasks/downloader.py b/src/agenda_culturel/import_tasks/downloader.py
index 7fd45ee..905c130 100644
--- a/src/agenda_culturel/import_tasks/downloader.py
+++ b/src/agenda_culturel/import_tasks/downloader.py
@@ -66,7 +66,7 @@ class SimpleDownloader(Downloader):
class ChromiumHeadlessDownloader(Downloader):
- def __init__(self, pause=True):
+ def __init__(self, pause=True, noimage=True):
super().__init__()
self.pause = pause
self.options = Options()
@@ -78,17 +78,31 @@ class ChromiumHeadlessDownloader(Downloader):
self.options.add_argument("--disable-dev-shm-usage")
self.options.add_argument("--disable-browser-side-navigation")
self.options.add_argument("--disable-gpu")
- self.options.add_experimental_option(
- "prefs", {
- # block image loading
- "profile.managed_default_content_settings.images": 2,
- }
- )
+ if noimage:
+ self.options.add_experimental_option(
+ "prefs", {
+ # block image loading
+ "profile.managed_default_content_settings.images": 2,
+ }
+ )
self.service = Service("/usr/bin/chromedriver")
self.driver = webdriver.Chrome(service=self.service, options=self.options)
+ def screenshot(self, url, path_image):
+ print("Screenshot {}".format(url))
+ try:
+ self.driver.get(url)
+ if self.pause:
+ time.sleep(2)
+ self.driver.save_screenshot(path_image)
+ except:
+ print(f">> Exception: {URL}")
+ return False
+
+ return True
+
def download(self, url, referer=None, post=None):
if post:
raise Exception("POST method with Chromium headless not yet implemented")
diff --git a/src/agenda_culturel/import_tasks/extractor_facebook.py b/src/agenda_culturel/import_tasks/extractor_facebook.py
index e0c50b6..b7970ab 100644
--- a/src/agenda_culturel/import_tasks/extractor_facebook.py
+++ b/src/agenda_culturel/import_tasks/extractor_facebook.py
@@ -239,7 +239,7 @@ class FacebookEventExtractor(Extractor):
result = "https://www.facebook.com" + u.path
# remove name in the url
- match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9]+)/([0-9/]*)", result)
+ match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9-]+)/([0-9/]*)", result)
if match:
result = match[1] + "/" + match[3]
diff --git a/src/agenda_culturel/static/images/capture.png b/src/agenda_culturel/static/images/capture.png
deleted file mode 100644
index 203a3de..0000000
Binary files a/src/agenda_culturel/static/images/capture.png and /dev/null differ
diff --git a/src/agenda_culturel/templates/agenda_culturel/page.html b/src/agenda_culturel/templates/agenda_culturel/page.html
index 7483394..465cb99 100644
--- a/src/agenda_culturel/templates/agenda_culturel/page.html
+++ b/src/agenda_culturel/templates/agenda_culturel/page.html
@@ -9,7 +9,7 @@
{% load static %}
-
+
{% if debug %}