On fait une capture par jour, pour l'aperçu moteurs de recherche
Fix #225
This commit is contained in:
parent
21b42e4fee
commit
ec707bf272
@ -5,10 +5,11 @@ WORKDIR /usr/src/app
|
|||||||
|
|
||||||
RUN --mount=type=cache,target=/var/cache/apt \
|
RUN --mount=type=cache,target=/var/cache/apt \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin \
|
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver gdal-bin fonts-symbola \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
COPY src/requirements.txt ./requirements.txt
|
COPY src/requirements.txt ./requirements.txt
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
|
@ -6,7 +6,8 @@ from celery.schedules import crontab
|
|||||||
from celery.utils.log import get_task_logger
|
from celery.utils.log import get_task_logger
|
||||||
from celery.exceptions import MaxRetriesExceededError
|
from celery.exceptions import MaxRetriesExceededError
|
||||||
import time as time_
|
import time as time_
|
||||||
|
from django.conf import settings
|
||||||
|
from celery.signals import worker_ready
|
||||||
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
|
||||||
@ -250,6 +251,23 @@ def daily_imports(self):
|
|||||||
run_recurrent_imports_from_list([imp.pk for imp in imports])
|
run_recurrent_imports_from_list([imp.pk for imp in imports])
|
||||||
|
|
||||||
|
|
||||||
|
SCREENSHOT_FILE = settings.MEDIA_ROOT + '/screenshot.png'
|
||||||
|
|
||||||
|
@app.task(bind=True)
|
||||||
|
def screenshot(self):
|
||||||
|
downloader = ChromiumHeadlessDownloader(noimage=False)
|
||||||
|
downloader.screenshot("https://pommesdelune.fr", SCREENSHOT_FILE)
|
||||||
|
|
||||||
|
@worker_ready.connect
|
||||||
|
def at_start(sender, **k):
|
||||||
|
if not os.path.isfile(SCREENSHOT_FILE):
|
||||||
|
logger.info("Init screenshot file")
|
||||||
|
with sender.app.connection() as conn:
|
||||||
|
sender.app.send_task('agenda_culturel.celery.screenshot', None, connection=conn)
|
||||||
|
else:
|
||||||
|
logger.info("Screenshot file already exists")
|
||||||
|
|
||||||
|
|
||||||
@app.task(bind=True)
|
@app.task(bind=True)
|
||||||
def run_all_recurrent_imports(self):
|
def run_all_recurrent_imports(self):
|
||||||
from agenda_culturel.models import RecurrentImport
|
from agenda_culturel.models import RecurrentImport
|
||||||
@ -370,6 +388,10 @@ app.conf.beat_schedule = {
|
|||||||
# Daily imports at 3:14 a.m.
|
# Daily imports at 3:14 a.m.
|
||||||
"schedule": crontab(hour=3, minute=14),
|
"schedule": crontab(hour=3, minute=14),
|
||||||
},
|
},
|
||||||
|
"daily_screenshot": {
|
||||||
|
"task": "agenda_culturel.celery.screenshot",
|
||||||
|
"schedule": crontab(hour=3, minute=3),
|
||||||
|
},
|
||||||
"weekly_imports": {
|
"weekly_imports": {
|
||||||
"task": "agenda_culturel.celery.weekly_imports",
|
"task": "agenda_culturel.celery.weekly_imports",
|
||||||
# Daily imports on Mondays at 2:22 a.m.
|
# Daily imports on Mondays at 2:22 a.m.
|
||||||
|
@ -66,7 +66,7 @@ class SimpleDownloader(Downloader):
|
|||||||
|
|
||||||
|
|
||||||
class ChromiumHeadlessDownloader(Downloader):
|
class ChromiumHeadlessDownloader(Downloader):
|
||||||
def __init__(self, pause=True):
|
def __init__(self, pause=True, noimage=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.pause = pause
|
self.pause = pause
|
||||||
self.options = Options()
|
self.options = Options()
|
||||||
@ -78,17 +78,31 @@ class ChromiumHeadlessDownloader(Downloader):
|
|||||||
self.options.add_argument("--disable-dev-shm-usage")
|
self.options.add_argument("--disable-dev-shm-usage")
|
||||||
self.options.add_argument("--disable-browser-side-navigation")
|
self.options.add_argument("--disable-browser-side-navigation")
|
||||||
self.options.add_argument("--disable-gpu")
|
self.options.add_argument("--disable-gpu")
|
||||||
self.options.add_experimental_option(
|
if noimage:
|
||||||
"prefs", {
|
self.options.add_experimental_option(
|
||||||
# block image loading
|
"prefs", {
|
||||||
"profile.managed_default_content_settings.images": 2,
|
# block image loading
|
||||||
}
|
"profile.managed_default_content_settings.images": 2,
|
||||||
)
|
}
|
||||||
|
)
|
||||||
|
|
||||||
self.service = Service("/usr/bin/chromedriver")
|
self.service = Service("/usr/bin/chromedriver")
|
||||||
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||||
|
|
||||||
|
|
||||||
|
def screenshot(self, url, path_image):
|
||||||
|
print("Screenshot {}".format(url))
|
||||||
|
try:
|
||||||
|
self.driver.get(url)
|
||||||
|
if self.pause:
|
||||||
|
time.sleep(2)
|
||||||
|
self.driver.save_screenshot(path_image)
|
||||||
|
except:
|
||||||
|
print(f">> Exception: {URL}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def download(self, url, referer=None, post=None):
|
def download(self, url, referer=None, post=None):
|
||||||
if post:
|
if post:
|
||||||
raise Exception("POST method with Chromium headless not yet implemented")
|
raise Exception("POST method with Chromium headless not yet implemented")
|
||||||
|
@ -239,7 +239,7 @@ class FacebookEventExtractor(Extractor):
|
|||||||
result = "https://www.facebook.com" + u.path
|
result = "https://www.facebook.com" + u.path
|
||||||
|
|
||||||
# remove name in the url
|
# remove name in the url
|
||||||
match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9]+)/([0-9/]*)", result)
|
match = re.match(r"(.*/events)/s/([a-zA-Z-][a-zA-Z-0-9-]+)/([0-9/]*)", result)
|
||||||
if match:
|
if match:
|
||||||
result = match[1] + "/" + match[3]
|
result = match[1] + "/" + match[3]
|
||||||
|
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 237 KiB |
@ -9,7 +9,7 @@
|
|||||||
{% load static %}
|
{% load static %}
|
||||||
<meta property="og:title" content="Pommes de lune — {% block og_title %}{% endblock %}" />
|
<meta property="og:title" content="Pommes de lune — {% block og_title %}{% endblock %}" />
|
||||||
<meta property="og:description" content="{% block og_description %}Événements culturels à Clermont-Ferrand et aux environs{% endblock %}" />
|
<meta property="og:description" content="{% block og_description %}Événements culturels à Clermont-Ferrand et aux environs{% endblock %}" />
|
||||||
<meta property="og:image" content="{% block og_image %}{% static 'images/capture.png' %}{% endblock %}" />
|
<meta property="og:image" content="{% block og_image %}{% get_media_prefix %}/screenshot.png{% endblock %}" />
|
||||||
<meta property="og:url" content="{{ request.build_absolute_uri }}" />
|
<meta property="og:url" content="{{ request.build_absolute_uri }}" />
|
||||||
|
|
||||||
{% if debug %}
|
{% if debug %}
|
||||||
|
Loading…
Reference in New Issue
Block a user