From e9c017b38b257bb0360d21d21fbfc7bc57a451dc Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sat, 17 Aug 2024 11:41:28 +0200 Subject: [PATCH] =?UTF-8?q?On=20int=C3=A8gre=20l'importation=20par=20url?= =?UTF-8?q?=20dans=20le=20processus=20de=20logging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/agenda_culturel/celery.py | 84 ++++++++++++------- .../0066_batchimportation_url_source.py | 18 ++++ src/agenda_culturel/models.py | 11 +++ .../agenda_culturel/batch-imports-inc.html | 6 +- src/agenda_culturel/views.py | 5 +- 5 files changed, 91 insertions(+), 33 deletions(-) create mode 100644 src/agenda_culturel/migrations/0066_batchimportation_url_source.py diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index edb93e8..59a0c6f 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -198,45 +198,69 @@ def weekly_imports(self): for imp in imports: run_recurrent_import.delay(imp.pk) +@app.task(base=ChromiumTask, bind=True) +def import_events_from_url(self, url, cat): + from .db_importer import DBImporterEvents + from agenda_culturel.models import RecurrentImport, BatchImportation + from agenda_culturel.models import Event, Category + + + logger.info("URL import: {}".format(self.request.id)) + + + # clean url + url = Extractor.clean_url(url) + + # we check if the url is known + existing = Event.objects.filter(uuids__contains=[url]) + # if it's unknown + if len(existing) == 0: + + # create an importer + importer = DBImporterEvents(self.request.id) + + # create a batch importation + importation = BatchImportation(url_source=url, celery_id=self.request.id) + # save batch importation + importation.save() + + try: + ## create loader + u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True) + # set default values + values = {} + if cat is not None: + values = {"category": cat} + + # get event + events = u2e.process( + url, published=False, default_values=values + ) + + # TODO: use celery to import the associated events + + # convert it to json + json_events = json.dumps(events, default=str) + + # import events (from json) + success, error_message = importer.import_events(json_events) + + # finally, close task + close_import_task(self.request.id, success, error_message, importer) + except Exception as e: + logger.error(e) + close_import_task(self.request.id, False, e, importer) + @app.task(base=ChromiumTask, bind=True) def import_events_from_urls(self, urls_and_cats): - from agenda_culturel.models import Event, Category - for ucat in urls_and_cats: if ucat is not None: url = ucat[0] cat = ucat[1] - url = Extractor.clean_url(url) + import_events_from_url.delay(url, cat) - # we check if the url is known - existing = Event.objects.filter(uuids__contains=[url]) - # if it's unknown - if len(existing) == 0: - - u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True) - events_structure = u2e.process( - url, published=False - ) - # TODO: use celery to import the associated events - if ( - events_structure is not None - and "events" in events_structure - and len(events_structure["events"]) > 0 - ): - event = Event.from_structure( - events_structure["events"][0], - events_structure["header"]["url"], - ) - if event is not None: - if (event.category is None or event.category == Category.get_default_category()): - if cat is not None: - event.category = Category.objects.filter(name=cat).first() - - - event.save() - logger.info('Successful import from ' + url) app.conf.beat_schedule = { "daily_imports": { diff --git a/src/agenda_culturel/migrations/0066_batchimportation_url_source.py b/src/agenda_culturel/migrations/0066_batchimportation_url_source.py new file mode 100644 index 0000000..5d06248 --- /dev/null +++ b/src/agenda_culturel/migrations/0066_batchimportation_url_source.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2024-08-17 09:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agenda_culturel', '0065_alter_place_options'), + ] + + operations = [ + migrations.AddField( + model_name='batchimportation', + name='url_source', + field=models.URLField(blank=True, editable=False, help_text='Source URL if no RecurrentImport is associated.', max_length=1024, null=True, verbose_name='URL (if not recurrent import)'), + ), + ] diff --git a/src/agenda_culturel/models.py b/src/agenda_culturel/models.py index e4d57b7..7f495bd 100644 --- a/src/agenda_culturel/models.py +++ b/src/agenda_culturel/models.py @@ -1344,6 +1344,17 @@ class BatchImportation(models.Model): editable=False, ) + url_source = models.URLField( + verbose_name=_("URL (if not recurrent import)"), + help_text=_( + "Source URL if no RecurrentImport is associated." + ), + max_length=1024, + blank=True, + null=True, + editable=False, + ) + status = models.CharField( _("Status"), max_length=20, choices=STATUS.choices, default=STATUS.RUNNING ) diff --git a/src/agenda_culturel/templates/agenda_culturel/batch-imports-inc.html b/src/agenda_culturel/templates/agenda_culturel/batch-imports-inc.html index 4d02f85..55fbaf2 100644 --- a/src/agenda_culturel/templates/agenda_culturel/batch-imports-inc.html +++ b/src/agenda_culturel/templates/agenda_culturel/batch-imports-inc.html @@ -20,7 +20,11 @@ {{ obj.id }} {{ obj.created_date }} - {% if obj.recurrentImport %}{{ obj.recurrentImport.name }}{% else %}-{% endif %} + {% if obj.recurrentImport %}{{ obj.recurrentImport.name }}{% else %} + {% if obj.url_source %} + url + {% endif %} + {% endif %} {{ obj.status }} {% if obj.status == "running" %}Annuler{% endif %} {% if obj.status == "success" %}{{ obj.nb_initial }}{% endif %} diff --git a/src/agenda_culturel/views.py b/src/agenda_culturel/views.py index 717c77d..d11712d 100644 --- a/src/agenda_culturel/views.py +++ b/src/agenda_culturel/views.py @@ -72,6 +72,7 @@ from .celery import ( import_events_from_json, run_recurrent_import, run_all_recurrent_imports, + import_events_from_url, import_events_from_urls, ) @@ -619,7 +620,7 @@ class URLEventEvaluation: def import_from_urls(request): - URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=10, min_num=1) + URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=9, min_num=1) if request.method == "POST": formset = URLSubmissionFormSet(request.POST, request.FILES) @@ -701,7 +702,7 @@ def import_from_url(request): request, _('Integrating {} into our import process.').format(uc.url) ) - import_events_from_urls.delay([uc.to_list()]) + import_events_from_url.delay(uc.url, uc.cat) return HttpResponseRedirect(reverse("thank_you"))