On intègre l'importation par url dans le processus de logging
This commit is contained in:
parent
ec8f0696bb
commit
e9c017b38b
@ -198,45 +198,69 @@ def weekly_imports(self):
|
||||
for imp in imports:
|
||||
run_recurrent_import.delay(imp.pk)
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def import_events_from_url(self, url, cat):
|
||||
from .db_importer import DBImporterEvents
|
||||
from agenda_culturel.models import RecurrentImport, BatchImportation
|
||||
from agenda_culturel.models import Event, Category
|
||||
|
||||
|
||||
logger.info("URL import: {}".format(self.request.id))
|
||||
|
||||
|
||||
# clean url
|
||||
url = Extractor.clean_url(url)
|
||||
|
||||
# we check if the url is known
|
||||
existing = Event.objects.filter(uuids__contains=[url])
|
||||
# if it's unknown
|
||||
if len(existing) == 0:
|
||||
|
||||
# create an importer
|
||||
importer = DBImporterEvents(self.request.id)
|
||||
|
||||
# create a batch importation
|
||||
importation = BatchImportation(url_source=url, celery_id=self.request.id)
|
||||
# save batch importation
|
||||
importation.save()
|
||||
|
||||
try:
|
||||
## create loader
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
|
||||
# set default values
|
||||
values = {}
|
||||
if cat is not None:
|
||||
values = {"category": cat}
|
||||
|
||||
# get event
|
||||
events = u2e.process(
|
||||
url, published=False, default_values=values
|
||||
)
|
||||
|
||||
# TODO: use celery to import the associated events
|
||||
|
||||
# convert it to json
|
||||
json_events = json.dumps(events, default=str)
|
||||
|
||||
# import events (from json)
|
||||
success, error_message = importer.import_events(json_events)
|
||||
|
||||
# finally, close task
|
||||
close_import_task(self.request.id, success, error_message, importer)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
close_import_task(self.request.id, False, e, importer)
|
||||
|
||||
|
||||
@app.task(base=ChromiumTask, bind=True)
|
||||
def import_events_from_urls(self, urls_and_cats):
|
||||
from agenda_culturel.models import Event, Category
|
||||
|
||||
for ucat in urls_and_cats:
|
||||
if ucat is not None:
|
||||
url = ucat[0]
|
||||
cat = ucat[1]
|
||||
|
||||
url = Extractor.clean_url(url)
|
||||
import_events_from_url.delay(url, cat)
|
||||
|
||||
# we check if the url is known
|
||||
existing = Event.objects.filter(uuids__contains=[url])
|
||||
# if it's unknown
|
||||
if len(existing) == 0:
|
||||
|
||||
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
|
||||
events_structure = u2e.process(
|
||||
url, published=False
|
||||
)
|
||||
# TODO: use celery to import the associated events
|
||||
if (
|
||||
events_structure is not None
|
||||
and "events" in events_structure
|
||||
and len(events_structure["events"]) > 0
|
||||
):
|
||||
event = Event.from_structure(
|
||||
events_structure["events"][0],
|
||||
events_structure["header"]["url"],
|
||||
)
|
||||
if event is not None:
|
||||
if (event.category is None or event.category == Category.get_default_category()):
|
||||
if cat is not None:
|
||||
event.category = Category.objects.filter(name=cat).first()
|
||||
|
||||
|
||||
event.save()
|
||||
logger.info('Successful import from ' + url)
|
||||
|
||||
app.conf.beat_schedule = {
|
||||
"daily_imports": {
|
||||
|
@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.2.7 on 2024-08-17 09:33
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('agenda_culturel', '0065_alter_place_options'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='batchimportation',
|
||||
name='url_source',
|
||||
field=models.URLField(blank=True, editable=False, help_text='Source URL if no RecurrentImport is associated.', max_length=1024, null=True, verbose_name='URL (if not recurrent import)'),
|
||||
),
|
||||
]
|
@ -1344,6 +1344,17 @@ class BatchImportation(models.Model):
|
||||
editable=False,
|
||||
)
|
||||
|
||||
url_source = models.URLField(
|
||||
verbose_name=_("URL (if not recurrent import)"),
|
||||
help_text=_(
|
||||
"Source URL if no RecurrentImport is associated."
|
||||
),
|
||||
max_length=1024,
|
||||
blank=True,
|
||||
null=True,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
status = models.CharField(
|
||||
_("Status"), max_length=20, choices=STATUS.choices, default=STATUS.RUNNING
|
||||
)
|
||||
|
@ -20,7 +20,11 @@
|
||||
<tr>
|
||||
<td>{{ obj.id }}</a></td>
|
||||
<td>{{ obj.created_date }}</td>
|
||||
<td>{% if obj.recurrentImport %}<a href="{{ obj.recurrentImport.get_absolute_url }}">{{ obj.recurrentImport.name }}</a>{% else %}-{% endif %} </td>
|
||||
<td>{% if obj.recurrentImport %}<a href="{{ obj.recurrentImport.get_absolute_url }}">{{ obj.recurrentImport.name }}</a>{% else %}
|
||||
{% if obj.url_source %}
|
||||
<a href="{{ obj.url_source }}"><em>url</em></a>
|
||||
{% endif %}
|
||||
{% endif %} </td>
|
||||
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>
|
||||
<td>{% if obj.status == "running" %}<a href="{% url 'cancel_import' obj.id %}">Annuler</a>{% endif %}</td>
|
||||
<td>{% if obj.status == "success" %}{{ obj.nb_initial }}{% endif %}</td>
|
||||
|
@ -72,6 +72,7 @@ from .celery import (
|
||||
import_events_from_json,
|
||||
run_recurrent_import,
|
||||
run_all_recurrent_imports,
|
||||
import_events_from_url,
|
||||
import_events_from_urls,
|
||||
)
|
||||
|
||||
@ -619,7 +620,7 @@ class URLEventEvaluation:
|
||||
|
||||
|
||||
def import_from_urls(request):
|
||||
URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=10, min_num=1)
|
||||
URLSubmissionFormSet = formset_factory(URLSubmissionForm, extra=9, min_num=1)
|
||||
|
||||
if request.method == "POST":
|
||||
formset = URLSubmissionFormSet(request.POST, request.FILES)
|
||||
@ -701,7 +702,7 @@ def import_from_url(request):
|
||||
request,
|
||||
_('Integrating {} into our import process.').format(uc.url)
|
||||
)
|
||||
import_events_from_urls.delay([uc.to_list()])
|
||||
import_events_from_url.delay(uc.url, uc.cat)
|
||||
return HttpResponseRedirect(reverse("thank_you"))
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user