On fait une capture par jour, pour l'aperçu moteurs de recherche
Fix #225
This commit is contained in:
parent
21b42e4fee
commit
ec707bf272
@ -5,10 +5,11 @@ WORKDIR /usr/src/app
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt \
|
||||
apt-get update && \
|
||||
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin \
|
||||
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin fonts-symbola \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
|
||||
COPY src/requirements.txt ./requirements.txt
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
|
@ -6,7 +6,8 @@ from celery.schedules import crontab
|
||||
from celery.utils.log import get_task_logger
|
||||
from celery.exceptions import MaxRetriesExceededError
|
||||
import time as time_
|
||||
|
||||
from django.conf import settings
|
||||
from celery.signals import worker_ready
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
@ -250,6 +251,23 @@ def daily_imports(self):
|
||||
run_recurrent_imports_from_list([imp.pk for imp in imports])
|
||||
|
||||
|
||||
SCREENSHOT_FILE = settings.MEDIA_ROOT + '/screenshot.png'
|
||||
|
||||
@app.task(bind=True)
|
||||
def screenshot(self):
|
||||
downloader = ChromiumHeadlessDownloader(noimage=False)
|
||||
downloader.screenshot("https://pommesdelune.fr", SCREENSHOT_FILE)
|
||||
|
||||
@worker_ready.connect
|
||||
def at_start(sender, **k):
|
||||
if not os.path.isfile(SCREENSHOT_FILE):
|
||||
logger.info("Init screenshot file")
|
||||
with sender.app.connection() as conn:
|
||||
sender.app.send_task('agenda_culturel.celery.screenshot', None, connection=conn)
|
||||
else:
|
||||
logger.info("Screenshot file already exists")
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
def run_all_recurrent_imports(self):
|
||||
from agenda_culturel.models import RecurrentImport
|
||||
@ -370,6 +388,10 @@ app.conf.beat_schedule = {
|
||||
# Daily imports at 3:14 a.m.
|
||||
"schedule": crontab(hour=3, minute=14),
|
||||
},
|
||||
"daily_screenshot": {
|
||||
"task": "agenda_culturel.celery.screenshot",
|
||||
"schedule": crontab(hour=3, minute=3),
|
||||
},
|
||||
"weekly_imports": {
|
||||
"task": "agenda_culturel.celery.weekly_imports",
|
||||
# Daily imports on Mondays at 2:22 a.m.
|
||||
|
@ -66,7 +66,7 @@ class SimpleDownloader(Downloader):
|
||||
|
||||
|
||||
class ChromiumHeadlessDownloader(Downloader):
|
||||
def __init__(self, pause=True):
|
||||
def __init__(self, pause=True, noimage=True):
|
||||
super().__init__()
|
||||
self.pause = pause
|
||||
self.options = Options()
|
||||
@ -78,17 +78,31 @@ class ChromiumHeadlessDownloader(Downloader):
|
||||
self.options.add_argument("--disable-dev-shm-usage")
|
||||
self.options.add_argument("--disable-browser-side-navigation")
|
||||
self.options.add_argument("--disable-gpu")
|
||||
self.options.add_experimental_option(
|
||||
"prefs", {
|
||||
# block image loading
|
||||
"profile.managed_default_content_settings.images": 2,
|
||||
}
|
||||
)
|
||||
if noimage:
|
||||
self.options.add_experimental_option(
|
||||
"prefs", {
|
||||
# block image loading
|
||||
"profile.managed_default_content_settings.images": 2,
|
||||
}
|
||||
)
|
||||
|
||||
self.service = Service("/usr/bin/chromedriver")
|
||||
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||
|
||||
|
||||
def screenshot(self, url, path_image):
|
||||
print("Screenshot {}".format(url))
|
||||
try:
|
||||
self.driver.get(url)
|
||||
if self.pause:
|
||||
time.sleep(2)
|
||||
self.driver.save_screenshot(path_image)
|
||||
except:
|
||||
print(f">> Exception: {URL}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def download(self, url, referer=None, post=None):
|
||||
if post:
|
||||
raise Exception("POST method with Chromium headless not yet implemented")
|
||||
|
@ -239,7 +239,7 @@ class FacebookEventExtractor(Extractor):
|
||||
result = "https://www.facebook.com" + u.path
|
||||
|
||||
# remove name in the url
|
||||
match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9]+)/([0-9/]*)", result)
|
||||
match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9-]+)/([0-9/]*)", result)
|
||||
if match:
|
||||
result = match[1] + "/" + match[3]
|
||||
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 237 KiB |
@ -9,7 +9,7 @@
|
||||
{% load static %}
|
||||
<meta property="og:title" content="Pommes de lune — {% block og_title %}{% endblock %}" />
|
||||
<meta property="og:description" content="{% block og_description %}Événements culturels à Clermont-Ferrand et aux environs{% endblock %}" />
|
||||
<meta property="og:image" content="{% block og_image %}{% static 'images/capture.png' %}{% endblock %}" />
|
||||
<meta property="og:image" content="{% block og_image %}{% get_media_prefix %}/screenshot.png{% endblock %}" />
|
||||
<meta property="og:url" content="{{ request.build_absolute_uri }}" />
|
||||
|
||||
{% if debug %}
|
||||
|
Loading…
Reference in New Issue
Block a user