Dernière étape manquante de l'import: suppression d'un événement s'il n'est pas présent dans le json d'import.

Fix #46
This commit is contained in:
Jean-Marie Favreau 2024-01-01 12:57:22 +01:00
parent 5a63073cf7
commit 25f9b8c8e6
6 changed files with 75 additions and 18 deletions

View File

@ -151,6 +151,7 @@ class ICALExtractor(Extractor):
print("Extracting ical events from {}".format(url)) print("Extracting ical events from {}".format(url))
self.set_header(url) self.set_header(url)
self.clear_events() self.clear_events()
self.uuids = {}
calendar = icalendar.Calendar.from_ical(content) calendar = icalendar.Calendar.from_ical(content)
@ -179,6 +180,11 @@ class ICALExtractor(Extractor):
uuid = self.get_item_from_vevent(event, "UID") uuid = self.get_item_from_vevent(event, "UID")
if uuid is not None: if uuid is not None:
if uuid in self.uuids:
self.uuids[uuid] += 1
uuid += ":{:04}".format(self.uuids[uuid] - 1)
else:
self.uuids[uuid] = 1
event_url = url + "#" + uuid event_url = url + "#" + uuid
tags = self.default_value_if_exists(default_values, "tags") tags = self.default_value_if_exists(default_values, "tags")

View File

@ -20,6 +20,7 @@ class EventForm(ModelForm):
'end_day': TextInput(attrs={'type': 'date'}), 'end_day': TextInput(attrs={'type': 'date'}),
'end_time': TextInput(attrs={'type': 'time'}), 'end_time': TextInput(attrs={'type': 'time'}),
'uuids': MultipleHiddenInput(), 'uuids': MultipleHiddenInput(),
'import_sources': MultipleHiddenInput(),
} }

View File

@ -82,7 +82,7 @@ class EventsImporter:
return event["end_day"] >= self.today return event["end_day"] >= self.today
def save_imported(self): def save_imported(self):
self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing=True) self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing_from_source=self.url)
def is_valid_event_structure(self, event): def is_valid_event_structure(self, event):
@ -97,7 +97,7 @@ class EventsImporter:
def load_event(self, event): def load_event(self, event):
if self.is_valid_event_structure(event): if self.is_valid_event_structure(event):
event_obj = Event.from_structure(event) event_obj = Event.from_structure(event, self.url)
self.event_objects.append(event_obj) self.event_objects.append(event_obj)
return True return True
else: else:

View File

@ -0,0 +1,19 @@
# Generated by Django 4.2.7 on 2023-12-31 20:06
from django.db import migrations, models
import django_better_admin_arrayfield.models.fields
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0021_alter_event_possibly_duplicated'),
]
operations = [
migrations.AddField(
model_name='event',
name='import_sources',
field=django_better_admin_arrayfield.models.fields.ArrayField(base_field=models.CharField(max_length=512), blank=True, help_text='Importation source used to detect removed entries.', null=True, size=None, verbose_name='Importation source'),
),
]

View File

@ -15,7 +15,7 @@ from django.db.models import Q
from django.template.defaultfilters import date as _date from django.template.defaultfilters import date as _date
from datetime import time, timedelta from datetime import time, timedelta, date
from django.utils.timezone import datetime from django.utils.timezone import datetime
from django.utils import timezone from django.utils import timezone
@ -160,6 +160,7 @@ class Event(models.Model):
image = models.URLField(verbose_name=_('Illustration'), help_text=_("URL of the illustration image"), max_length=1024, blank=True, null=True) image = models.URLField(verbose_name=_('Illustration'), help_text=_("URL of the illustration image"), max_length=1024, blank=True, null=True)
image_alt = models.CharField(verbose_name=_('Illustration description'), help_text=_('Alternative text used by screen readers for the image'), blank=True, null=True, max_length=1024) image_alt = models.CharField(verbose_name=_('Illustration description'), help_text=_('Alternative text used by screen readers for the image'), blank=True, null=True, max_length=1024)
import_sources = ArrayField(models.CharField(max_length=512), verbose_name=_('Importation source'), help_text=_("Importation source used to detect removed entries."), blank=True, null=True)
uuids = ArrayField(models.CharField(max_length=512), verbose_name=_('UUIDs'), help_text=_("UUIDs from import to detect duplicated entries."), blank=True, null=True) uuids = ArrayField(models.CharField(max_length=512), verbose_name=_('UUIDs'), help_text=_("UUIDs from import to detect duplicated entries."), blank=True, null=True)
reference_urls = ArrayField(models.URLField(max_length=512), verbose_name=_('URLs'), help_text=_("List of all the urls where this event can be found."), blank=True, null=True) reference_urls = ArrayField(models.URLField(max_length=512), verbose_name=_('URLs'), help_text=_("List of all the urls where this event can be found."), blank=True, null=True)
@ -295,7 +296,7 @@ class Event(models.Model):
super().save(*args, **kwargs) super().save(*args, **kwargs)
def from_structure(event_structure): def from_structure(event_structure, import_source = None):
if "category" in event_structure and event_structure["category"] is not None: if "category" in event_structure and event_structure["category"] is not None:
event_structure["category"] = Category.objects.get(name=event_structure["category"]) event_structure["category"] = Category.objects.get(name=event_structure["category"])
@ -331,6 +332,9 @@ class Event(models.Model):
if "description" in event_structure and event_structure["description"] is None: if "description" in event_structure and event_structure["description"] is None:
event_structure["description"] = "" event_structure["description"] = ""
if import_source is not None:
event_structure["import_sources"] = [import_source]
return Event(**event_structure) return Event(**event_structure)
@ -369,11 +373,11 @@ class Event(models.Model):
# do we have to create a new group? # do we have to create a new group?
if len(groups) == 0: if len(groups) == 0:
group = DuplicatedEvents.objects.create() group = DuplicatedEvents.objects.create()
logger.warning("set possibily duplicated 0 {}".format(group)) logger.warning("set possibly duplicated 0 {}".format(group))
else: else:
# otherwise merge existing groups # otherwise merge existing groups
group = DuplicatedEvents.merge_groups(groups) group = DuplicatedEvents.merge_groups(groups)
logger.warning("set possibily duplicated not 0 {}".format(group)) logger.warning("set possibly duplicated not 0 {}".format(group))
group.save() group.save()
# set the possibly duplicated group for the current object # set the possibly duplicated group for the current object
@ -392,7 +396,6 @@ class Event(models.Model):
def same_event_by_data(self, other): def same_event_by_data(self, other):
for attr in Event.data_fields(): for attr in Event.data_fields():
if str(getattr(self, attr)) != str(getattr(other, attr)): if str(getattr(self, attr)) != str(getattr(other, attr)):
logger.warning("on trouve une différence dans {}: {} vs {}".format(attr, getattr(self, attr), getattr(other, attr)))
return False return False
return True return True
@ -414,6 +417,11 @@ class Event(models.Model):
# set attributes # set attributes
for attr in Event.data_fields(): for attr in Event.data_fields():
setattr(self, attr, getattr(other, attr)) setattr(self, attr, getattr(other, attr))
# set status according to the input status
if other.status is not None:
self.status = other.status
# add a possible missing uuid # add a possible missing uuid
if self.uuids is None: if self.uuids is None:
self.uuids = [] self.uuids = []
@ -423,12 +431,27 @@ class Event(models.Model):
# Limitation: the given events should not be considered similar one to another... # Limitation: the given events should not be considered similar one to another...
def import_events(events, remove_missing=False): def import_events(events, remove_missing_from_source=None):
to_import = [] to_import = []
to_update = [] to_update = []
min_date = timezone.now().date()
max_date = None
uuids = set()
# for each event, check if it's a new one, or a one to be updated # for each event, check if it's a new one, or a one to be updated
for event in events: for event in events:
sdate = date.fromisoformat(event.start_day)
edate = date.fromisoformat(event.end_day)
if min_date is None or min_date > sdate:
min_date = sdate
if max_date is None or max_date < sdate:
max_date = sdate
if max_date is None or (event.end_day is not None and max_date < edate):
max_date = edate
if len(event.uuids) > 0:
uuids.add(event.uuids[0])
# imported events should be updated # imported events should be updated
event.set_in_importation_process() event.set_in_importation_process()
event.prepare_save() event.prepare_save()
@ -442,8 +465,7 @@ class Event(models.Model):
if same_imported: if same_imported:
# if this event exists, it will be updated with new data only if the data is fresher # if this event exists, it will be updated with new data only if the data is fresher
logger.warning("{} vs {}".format(same_imported.modified_date, event.modified_date)) if same_imported.modified_date < event.modified_date or event.status != same_imported.status:
if same_imported.modified_date < event.modified_date:
same_imported.update(event) same_imported.update(event)
same_imported.set_in_importation_process() same_imported.set_in_importation_process()
same_imported.prepare_save() same_imported.prepare_save()
@ -478,17 +500,26 @@ class Event(models.Model):
# then import all the new events # then import all the new events
imported = Event.objects.bulk_create(to_import) imported = Event.objects.bulk_create(to_import)
nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids"]) nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids", "status"])
nb_removed = 0 nb_draft = 0
if remove_missing: if remove_missing_from_source is not None:
# events that are missing from the import but in database are turned into drafts # events that are missing from the import but in database are turned into drafts
# only if they are in the future # only if they are in the future
# TODO
# TODO: ajouter self.source, ou faire référence à l'objet BatchImportation
pass
return imported, nb_updated, nb_removed in_interval = Event.objects.filter(((Q(end_day__isnull=True) & Q(start_day__gte=min_date) & Q(start_day__lte=max_date)) |
(Q(end_day__isnull=False) & ~(Q(start_day__gt=max_date) | Q(end_day__lt=min_date)))) & Q(import_sources__contains=[remove_missing_from_source]) & Q(status=Event.STATUS.PUBLISHED) & Q(uuids__len__gt=0))
to_draft = []
for e in in_interval:
if len(uuids.intersection(e.uuids)) == 0:
e.status = Event.STATUS.TRASH
e.prepare_save()
to_draft.append(e)
nb_draft = Event.objects.bulk_update(to_draft, fields = ["status"])
return imported, nb_updated, nb_draft

View File

@ -29,7 +29,7 @@
<th>initial</th> <th>initial</th>
<th>importés</th> <th>importés</th>
<th>mis à jour</th> <th>mis à jour</th>
<th>supprimés</th> <th>dépubliés</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>