On intègre l'importation par url dans le processus de logging

This commit is contained in:
Jean-Marie Favreau 2024-08-17 11:41:28 +02:00
parent ec8f0696bb
commit e9c017b38b
5 changed files with 91 additions and 33 deletions

View File

@ -198,45 +198,69 @@ def weekly_imports(self):
for imp in imports:
run_recurrent_import.delay(imp.pk)
@app.task(base=ChromiumTask, bind=True)
def import_events_from_url(self, url, cat):
from .db_importer import DBImporterEvents
from agenda_culturel.models import RecurrentImport, BatchImportation
from agenda_culturel.models import Event, Category
logger.info("URL import: {}".format(self.request.id))
# clean url
url = Extractor.clean_url(url)
# we check if the url is known
existing = Event.objects.filter(uuids__contains=[url])
# if it's unknown
if len(existing) == 0:
# create an importer
importer = DBImporterEvents(self.request.id)
# create a batch importation
importation = BatchImportation(url_source=url, celery_id=self.request.id)
# save batch importation
importation.save()
try:
## create loader
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
# set default values
values = {}
if cat is not None:
values = {"category": cat}
# get event
events = u2e.process(
url, published=False, default_values=values
)
# TODO: use celery to import the associated events
# convert it to json
json_events = json.dumps(events, default=str)
# import events (from json)
success, error_message = importer.import_events(json_events)
# finally, close task
close_import_task(self.request.id, success, error_message, importer)
except Exception as e:
logger.error(e)
close_import_task(self.request.id, False, e, importer)
@app.task(base=ChromiumTask, bind=True)
def import_events_from_urls(self, urls_and_cats):
from agenda_culturel.models import Event, Category
for ucat in urls_and_cats:
if ucat is not None:
url = ucat[0]
cat = ucat[1]
url = Extractor.clean_url(url)
import_events_from_url.delay(url, cat)
# we check if the url is known
existing = Event.objects.filter(uuids__contains=[url])
# if it's unknown
if len(existing) == 0:
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
events_structure = u2e.process(
url, published=False
)
# TODO: use celery to import the associated events
if (
events_structure is not None
and "events" in events_structure
and len(events_structure["events"]) > 0
):
event = Event.from_structure(
events_structure["events"][0],
events_structure["header"]["url"],
)
if event is not None:
if (event.category is None or event.category == Category.get_default_category()):
if cat is not None:
event.category = Category.objects.filter(name=cat).first()
event.save()
logger.info('Successful import from ' + url)
app.conf.beat_schedule = {
"daily_imports": {

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-08-17 09:33
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0065_alter_place_options'),
]
operations = [
migrations.AddField(
model_name='batchimportation',
name='url_source',
field=models.URLField(blank=True, editable=False, help_text='Source URL if no RecurrentImport is associated.', max_length=1024, null=True, verbose_name='URL (if not recurrent import)'),
),
]

View File

@ -1344,6 +1344,17 @@ class BatchImportation(models.Model):
editable=False,
)
url_source = models.URLField(
verbose_name=_("URL (if not recurrent import)"),
help_text=_(
"Source URL if no RecurrentImport is associated."
),
max_length=1024,
blank=True,
null=True,
editable=False,
)
status = models.CharField(
_("Status"), max_length=20, choices=STATUS.choices, default=STATUS.RUNNING
)

View File

@ -20,7 +20,11 @@
<tr>
<td>{{ obj.id }}</a></td>
<td>{{ obj.created_date }}</td>
<td>{% if obj.recurrentImport %}<a href="{{ obj.recurrentImport.get_absolute_url }}">{{ obj.recurrentImport.name }}</a>{% else %}-{% endif %} </td>
<td>{% if obj.recurrentImport %}<a href="{{ obj.recurrentImport.get_absolute_url }}">{{ obj.recurrentImport.name }}</a>{% else %}
{% if obj.url_source %}
<a href="{{ obj.url_source }}"><em>url</em></a>
{% endif %}
{% endif %} </td>
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>
<td>{% if obj.status == "running" %}<a href="{% url 'cancel_import' obj.id %}">Annuler</a>{% endif %}</td>
<td>{% if obj.status == "success" %}{{ obj.nb_initial }}{% endif %}</td>

View File

@ -72,6 +72,7 @@ from .celery import (
import_events_from_json,
run_recurrent_import,
run_all_recurrent_imports,
import_events_from_url,
import_events_from_urls,
)
@ -619,7 +620,7 @@ class URLEventEvaluation:
def import_from_urls(request):
URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=10, min_num=1)
URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=9, min_num=1)
if request.method == "POST":
formset = URLSubmissionFormSet(request.POST, request.FILES)
@ -701,7 +702,7 @@ def import_from_url(request):
request,
_('Integrating {} into our import process.').format(uc.url)
)
import_events_from_urls.delay([uc.to_list()])
import_events_from_url.delay(uc.url, uc.cat)
return HttpResponseRedirect(reverse("thank_you"))