On améliore la gestion des imports pour éviter les imports parallèles
avec chromium (notamment difficultés avec Facebook)
This commit is contained in:
parent
f38d4bee97
commit
f9038a03f4
@ -4,6 +4,10 @@ import json
|
||||
from celery import Celery, Task
|
||||
from celery.schedules import crontab
|
||||
from celery.utils.log import get_task_logger
|
||||
import time as time_
|
||||
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
from .import_tasks.downloader import *
|
||||
from .import_tasks.extractor import *
|
||||
@ -18,6 +22,8 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"agenda_culturel.settings.{APP_
|
||||
|
||||
app = Celery("agenda_culturel")
|
||||
|
||||
from django.core.cache import cache
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
@ -30,6 +36,26 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
|
||||
# Load task modules from all registered Django apps.
|
||||
app.autodiscover_tasks()
|
||||
|
||||
LOCK_EXPIRE = 60 * 10 # Lock expires in 10 minutes
|
||||
|
||||
@contextmanager
|
||||
def memcache_chromium_lock(oid):
|
||||
lock_id = "chromium-lock"
|
||||
timeout_at = time_.monotonic() + LOCK_EXPIRE - 3
|
||||
# cache.add fails if the key already exists
|
||||
status = cache.add(lock_id, oid, LOCK_EXPIRE)
|
||||
try:
|
||||
yield status
|
||||
finally:
|
||||
# memcache delete is very slow, but we have to use it to take
|
||||
# advantage of using add() for atomic locking
|
||||
if time_.monotonic() < timeout_at and status:
|
||||
# don't release the lock if we exceeded the timeout
|
||||
# to lessen the chance of releasing an expired lock
|
||||
# owned by someone else
|
||||
# also don't release the lock if we didn't acquire it
|
||||
cache.delete(lock_id)
|
||||
|
||||
|
||||
def close_import_task(taskid, success, error_message, importer):
|
||||
from agenda_culturel.models import BatchImportation
|
||||
@ -84,33 +110,21 @@ class ChromiumTask(Task):
|
||||
return self._chm
|
||||
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def run_recurrent_import(self, pk):
|
||||
def run_recurrent_import_internal(rimport, downloader, req_id):
|
||||
from agenda_culturel.models import RecurrentImport, BatchImportation
|
||||
from .db_importer import DBImporterEvents
|
||||
|
||||
logger.info("Run recurrent import: {}".format(self.request.id))
|
||||
logger.info("Run recurrent import: {}".format(req_id))
|
||||
|
||||
# get the recurrent import
|
||||
rimport = RecurrentImport.objects.get(pk=pk)
|
||||
|
||||
# create a batch importation
|
||||
importation = BatchImportation(recurrentImport=rimport, celery_id=self.request.id)
|
||||
importation = BatchImportation(recurrentImport=rimport, celery_id=req_id)
|
||||
# save batch importation
|
||||
importation.save()
|
||||
|
||||
# create an importer
|
||||
importer = DBImporterEvents(self.request.id)
|
||||
importer = DBImporterEvents(req_id)
|
||||
|
||||
# prepare downloading and extracting processes
|
||||
if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE:
|
||||
downloader = SimpleDownloader()
|
||||
elif rimport.downloader == RecurrentImport.DOWNLOADER.CHROMIUMHEADLESS:
|
||||
downloader = self.chromiumDownloader
|
||||
downloader.pause = False
|
||||
else:
|
||||
downloader = self.chromiumDownloader
|
||||
downloader.pause = True
|
||||
|
||||
if rimport.processor == RecurrentImport.PROCESSOR.ICAL:
|
||||
extractor = ICALExtractor()
|
||||
@ -164,10 +178,44 @@ def run_recurrent_import(self, pk):
|
||||
success, error_message = importer.import_events(json_events)
|
||||
|
||||
# finally, close task
|
||||
close_import_task(self.request.id, success, error_message, importer)
|
||||
close_import_task(req_id, success, error_message, importer)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
close_import_task(self.request.id, False, e, importer)
|
||||
close_import_task(req_id, False, e, importer)
|
||||
|
||||
return
|
||||
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def run_recurrent_import(self, pk):
|
||||
from agenda_culturel.models import RecurrentImport
|
||||
|
||||
# get the recurrent import
|
||||
rimport = RecurrentImport.objects.get(pk=pk)
|
||||
|
||||
# prepare downloading and extracting processes
|
||||
if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE:
|
||||
downloader = SimpleDownloader()
|
||||
elif rimport.downloader == RecurrentImport.DOWNLOADER.CHROMIUMHEADLESS:
|
||||
downloader = self.chromiumDownloader
|
||||
downloader.pause = False
|
||||
else:
|
||||
downloader = self.chromiumDownloader
|
||||
downloader.pause = True
|
||||
|
||||
# only one thread using Chromium can run at a time,
|
||||
# to prevent from errors (including strange Facebook errors)
|
||||
if rimport.downloader in [RecurrentImport.DOWNLOADER.CHROMIUMHEADLESS, RecurrentImport.DOWNLOADER.CHROMIUMHEADLESSPAUSE]:
|
||||
with memcache_chromium_lock(self.app.oid) as acquired:
|
||||
if acquired:
|
||||
return run_recurrent_import_internal(rimport, downloader, self.request.id)
|
||||
else:
|
||||
return run_recurrent_import_internal(rimport, downloader, self.request.id)
|
||||
|
||||
# if chromium is locked, we wait 30 seconds before retrying
|
||||
raise self.retry(countdown=30)
|
||||
|
||||
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
@ -224,53 +272,61 @@ def import_events_from_url(self, url, cat):
|
||||
from agenda_culturel.models import RecurrentImport, BatchImportation
|
||||
from agenda_culturel.models import Event, Category
|
||||
|
||||
|
||||
logger.info("URL import: {}".format(self.request.id))
|
||||
with memcache_chromium_lock(self.app.oid) as acquired:
|
||||
if acquired:
|
||||
|
||||
|
||||
# clean url
|
||||
url = Extractor.clean_url(url)
|
||||
logger.info("URL import: {}".format(self.request.id))
|
||||
|
||||
# we check if the url is known
|
||||
existing = Event.objects.filter(uuids__contains=[url])
|
||||
# if it's unknown
|
||||
if len(existing) == 0:
|
||||
|
||||
# create an importer
|
||||
importer = DBImporterEvents(self.request.id)
|
||||
# clean url
|
||||
url = Extractor.clean_url(url)
|
||||
|
||||
# create a batch importation
|
||||
importation = BatchImportation(url_source=url, celery_id=self.request.id)
|
||||
# save batch importation
|
||||
importation.save()
|
||||
# we check if the url is known
|
||||
existing = Event.objects.filter(uuids__contains=[url])
|
||||
# if it's unknown
|
||||
if len(existing) == 0:
|
||||
|
||||
try:
|
||||
## create loader
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
|
||||
# set default values
|
||||
values = {}
|
||||
if cat is not None:
|
||||
values = {"category": cat}
|
||||
# create an importer
|
||||
importer = DBImporterEvents(self.request.id)
|
||||
|
||||
# get event
|
||||
events = u2e.process(
|
||||
url, published=False, default_values=values
|
||||
)
|
||||
# create a batch importation
|
||||
importation = BatchImportation(url_source=url, celery_id=self.request.id)
|
||||
# save batch importation
|
||||
importation.save()
|
||||
|
||||
if events:
|
||||
# convert it to json
|
||||
json_events = json.dumps(events, default=str)
|
||||
try:
|
||||
## create loader
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
|
||||
# set default values
|
||||
values = {}
|
||||
if cat is not None:
|
||||
values = {"category": cat}
|
||||
|
||||
# import events (from json)
|
||||
success, error_message = importer.import_events(json_events)
|
||||
# get event
|
||||
events = u2e.process(
|
||||
url, published=False, default_values=values
|
||||
)
|
||||
|
||||
# finally, close task
|
||||
close_import_task(self.request.id, success, error_message, importer)
|
||||
else:
|
||||
close_import_task(self.request.id, False, "Cannot find any event", importer)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
close_import_task(self.request.id, False, e, importer)
|
||||
if events:
|
||||
# convert it to json
|
||||
json_events = json.dumps(events, default=str)
|
||||
|
||||
# import events (from json)
|
||||
success, error_message = importer.import_events(json_events)
|
||||
|
||||
# finally, close task
|
||||
close_import_task(self.request.id, success, error_message, importer)
|
||||
else:
|
||||
close_import_task(self.request.id, False, "Cannot find any event", importer)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
close_import_task(self.request.id, False, e, importer)
|
||||
|
||||
return
|
||||
|
||||
# if chromium is locked, we wait 30 seconds before retrying
|
||||
raise self.retry(countdown=30)
|
||||
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
|
@ -3,6 +3,7 @@
|
||||
{% block title %}{% block og_title %}Importations récurrentes{% endblock %}{% endblock %}
|
||||
|
||||
{% load utils_extra %}
|
||||
{% load rimports_extra %}
|
||||
{% load cat_extra %}
|
||||
{% block entete_header %}
|
||||
{% css_categories %}
|
||||
@ -13,8 +14,10 @@
|
||||
<article>
|
||||
<header>
|
||||
<div class="slide-buttons">
|
||||
<a href="{% url 'run_all_rimports' %}" role="button">Importer tout {% picto_from_name "play-circle" %}</a>
|
||||
<a href="{% url 'run_all_rimports_failed' %}" role="button">Relancer les imports échoués {% picto_from_name "play-circle" %}</a>
|
||||
<a href="{% url 'run_all_rimports' %}" role="button">Exécuter tout {% picto_from_name "play-circle" %}</a>
|
||||
{% if has_failed_rimports %}
|
||||
<a href="{% url 'run_all_rimports_failed' %}" role="button">Relancer les imports échoués {% picto_from_name "play-circle" %}</a>
|
||||
{% endif %}
|
||||
<a href="{% url 'add_rimport'%}" role="button">Ajouter {% picto_from_name "plus-circle" %}</a>
|
||||
</div>
|
||||
<h1>Importations récurrentes</h1>
|
||||
|
@ -12,6 +12,10 @@ from .utils_extra import picto_from_name
|
||||
register = template.Library()
|
||||
|
||||
|
||||
@register.simple_tag
|
||||
def has_failed_rimports():
|
||||
return BatchImportation.objects.filter(status=BatchImportation.STATUS.FAILED).count() != 0
|
||||
|
||||
@register.simple_tag
|
||||
def show_badge_failed_rimports(placement="top"):
|
||||
newest = BatchImportation.objects.filter(recurrentImport=OuterRef("pk")).order_by(
|
||||
|
Loading…
Reference in New Issue
Block a user