Migration au nouveau paradigme de dupliqués

Fix #184
This commit is contained in:
Jean-Marie Favreau 2024-11-09 23:00:29 +01:00
parent 11790f0200
commit 489d2e2f0f
14 changed files with 409 additions and 381 deletions

View File

@ -80,6 +80,8 @@ class EventForm(ModelForm):
"imported_date",
"modified_date",
"moderated_date",
"import_sources",
"uuids"
]
widgets = {
"start_day": TextInput(
@ -98,9 +100,7 @@ class EventForm(ModelForm):
),
"end_day": TextInput(attrs={"type": "date"}),
"end_time": TextInput(attrs={"type": "time"}),
"uuids": MultipleHiddenInput(),
"other_versions": HiddenInput(),
"import_sources": MultipleHiddenInput(),
"reference_urls": DynamicArrayWidgetURLs(),
"tags": DynamicArrayWidgetTags(),
}
@ -164,12 +164,16 @@ class FixDuplicates(Form):
super().__init__(*args, **kwargs)
choices = []
initial = None
for i, e in enumerate(events):
if e.status != Event.STATUS.TRASH:
il = auc[i]
msg = ""
if e.modified():
if e.local_version():
msg = _(" (locally modified version)")
initial = "Select" + il
if e.pure_import():
msg = _(" (synchronized on import version)")
choices += [
(
"Select" + il,
@ -177,8 +181,10 @@ class FixDuplicates(Form):
)
]
extra = ""
if edup.has_modified():
if edup.has_local_version():
extra = _(" Warning: a version is already locally modified.")
if initial is None:
initial = "Merge"
choices += [
("Merge", _("Create a new version by merging.") + extra)
]
@ -193,6 +199,7 @@ class FixDuplicates(Form):
choices += [("NotDuplicates", _("Make all versions independent."))]
self.fields["action"].choices = choices
self.fields["action"].initial = initial
def is_action_no_duplicates(self):
return self.cleaned_data["action"] == "NotDuplicates"

File diff suppressed because it is too large Load Diff

View File

@ -237,18 +237,24 @@ class DuplicatedEvents(models.Model):
def fixed(self):
return not self.representative is None
def is_published(self):
return len([e for e in self.get_duplicated() if e.is_published()]) > 0
def has_modified(self):
return len([e for e in self.get_duplicated() if e.modified()]) > 0
def has_local_version(self):
return len([e for e in self.get_duplicated() if e.local_version()]) > 0
def get_local_version(self):
if self.representative and self.representative.modified():
if self.representative and self.representative.local_version():
return self.representative
l = [e for e in self.get_duplicated() if e.modified()]
l = [e for e in self.get_duplicated() if e.local_version()]
if len(l) == 0:
return None
else:
l.sort(key=lambda x: -x.modified_date)
l.sort(key=lambda x: x.modified_date, reverse=True)
return l[0]
def merge_into(self, other):
@ -256,7 +262,8 @@ class DuplicatedEvents(models.Model):
for e in self.get_duplicated():
# change their group membership
e.other_versions = other
# save them
# save them without updating modified date
e.set_no_modification_date_changed()
e.save()
other.representative = None
other.save()
@ -304,27 +311,6 @@ class DuplicatedEvents(models.Model):
nb, d = singletons.delete()
return nb
def fix_similar_entries():
to_be_fixed = []
for d in DuplicatedEvents.not_fixed_qs().prefetch_related('event_set'):
comp = Event.get_comparison(d.get_duplicated())
similar = len([c for c in comp if not c["similar"]]) == 0
if similar:
to_be_fixed.append(d)
nb = len(to_be_fixed)
if nb > 0:
logger.warning("Removing: " + str(nb) + " similar duplicated")
for d in to_be_fixed:
if len(d.get_duplicated()) == 0:
logger.warning(" empty")
else:
logger.warning(" " + d.get_duplicated()[0].title)
for s in to_be_fixed:
s.fix()
return nb
def not_fixed_qs(qs=None, fixed=False):
if not qs:
@ -673,6 +659,21 @@ class Event(models.Model):
return False
return self.modified_date is None or (self.modified_date - self.imported_date).total_seconds() <= 0
def local_version(self):
return self.imported_date is None or self.modified()
def get_reference_urls(self):
res = [] if self.reference_urls is None else self.reference_urls
if self.other_versions:
for o in self.other_versions.get_duplicated():
if o.status == Event.STATUS.PUBLISHED and not o.reference_urls is None:
res += o.reference_urls
res = list(set(res))
res.sort()
return res
def get_local_version(self):
# a non-pure import is a local version
@ -689,7 +690,12 @@ class Event(models.Model):
return Event.objects.filter(status=Event.STATUS.DRAFT).count()
def get_qs_events_with_unkwnon_place():
return Event.objects.filter(exact_location__isnull=True).filter(~Q(status=Event.STATUS.TRASH)).filter(Q(other_versions=None)|~Q(other_versions__representative=F('pk')))
return Event.objects.filter(exact_location__isnull=True). \
filter(~Q(status=Event.STATUS.TRASH)). \
filter(Q(other_versions=None)|Q(other_versions__representative=F('pk')))
def is_representative(self):
return self.other_versions is None or self.other_versions.representative == self
def download_image(self):
# first download file
@ -717,13 +723,19 @@ class Event(models.Model):
def set_in_importation_process(self):
self.in_importation_process = True
def is_no_modification_date_changed(self):
return hasattr(self, "no_modification_date_changed")
def set_no_modification_date_changed(self):
self.no_modification_date_changed = True
def update_modification_dates(self):
now = timezone.now()
if not self.id:
self.created_date = now
if self.is_in_importation_process():
self.imported_date = now
if self.modified_date is None or not self.is_in_importation_process():
if self.modified_date is None or not self.is_no_modification_date_changed():
self.modified_date = now
def get_recurrence_at_date(self, year, month, day):
@ -872,8 +884,13 @@ class Event(models.Model):
# if it exists similar events, add this relation to the event
if len(similar_events) != 0:
self.set_other_versions(similar_events)
# check if it's a clone (that will become representative)
clone = self.pk is None and not self.other_versions is None
# check if we need to clean the other_versions
if (
not clone and
self.pk and
self.other_versions is not None
and self.other_versions.nb_duplicated() == 1
@ -881,8 +898,14 @@ class Event(models.Model):
self.other_versions.delete()
self.other_versions = None
# first save the current object
super().save(*args, **kwargs)
# then if its a clone, update the representative
if clone:
self.other_versions.representative = self
self.other_versions.save()
def from_structure(event_structure, import_source=None):
if "category" in event_structure and event_structure["category"] is not None:
try:
@ -1057,9 +1080,9 @@ class Event(models.Model):
def masked(self):
return self.other_versions and self.other_versions.representative != self
def get_comparison(events, all=True):
def get_comparison(events):
result = []
for attr in Event.data_fields(all=all, local_img=False, exact_location=False):
for attr in Event.data_fields(local_img=False, exact_location=False):
values = [getattr(e, attr) for e in events]
values = ["" if v is None else v for v in values]
values = [[] if attr == "tags" and v == "" else v for v in values]
@ -1067,20 +1090,22 @@ class Event(models.Model):
if attr == "image":
values = [v.split("?")[0] if "fbcdn.net" in v else v for v in values]
if len(set([str(v) for v in values])) == 1:
if attr == "description":
values = [v.replace("\r\n", "\n") for v in values]
if len(set([str(v).strip() for v in values])) == 1:
result.append({"similar": True, "key": attr, "values": values[0]})
else:
result.append({"similar": False, "key": attr, "values": values})
return result
def similar(self, event, all=True):
res = Event.get_comparison([self, event], all)
def similar(self, event):
res = Event.get_comparison([self, event])
for r in res:
if not r["similar"]:
return False
return True
def set_other_versions(self, events):
def set_other_versions(self, events, force_non_fixed=False):
# get existing groups
groups = list(
set([e.other_versions for e in events] + [self.other_versions])
@ -1093,6 +1118,9 @@ class Event(models.Model):
else:
# otherwise merge existing groups
group = DuplicatedEvents.merge_groups(groups)
if force_non_fixed:
group.representative = None
group.save()
# set the possibly duplicated group for the current object
@ -1106,13 +1134,10 @@ class Event(models.Model):
elist = list(events) + ([self] if self.pk is not None else [])
Event.objects.bulk_update(elist, fields=["other_versions"])
def data_fields(all=False, local_img=True, exact_location=True):
if all:
result = ["category"]
else:
result = []
result += [
def data_fields(local_img=True, exact_location=True):
result = [
"category",
"tags",
"title",
"location",
"start_day",
@ -1131,15 +1156,6 @@ class Event(models.Model):
result += ["tags"]
return result
def same_event_by_data(self, other):
for attr in Event.data_fields():
if str(getattr(self, attr)) != str(getattr(other, attr)):
return False
return True
def find_same_event_by_data_in_list(self, events):
return [e for e in events if self.same_event_by_data(e)]
def find_last_imported(events):
events = [e for e in events if e.imported_date is not None]
if len(events) == 0:
@ -1148,13 +1164,8 @@ class Event(models.Model):
events.sort(key=lambda e: e.imported_date, reverse=True)
return events[0]
def find_last_imported_not_modified(events):
events = [
e
for e in events
if e.imported_date is not None
and (e.modified_date is None or e.modified_date <= e.imported_date)
]
def find_last_pure_import(events):
events = [e for e in events if e.pure_import()]
if len(events) == 0:
return None
else:
@ -1162,13 +1173,6 @@ class Event(models.Model):
return events[0]
def update(self, other):
# we do not modify the category (local categories are more important)
# however, we add supplementary tags
if other.tags:
if not self.tags:
self.tags = []
self.tags += [t for t in other.tags if not t in self.tags]
# set attributes
for attr in Event.data_fields():
@ -1223,25 +1227,36 @@ class Event(models.Model):
if len(same_events) != 0:
# check if one event has been imported and not modified in this list
same_imported = Event.find_last_imported_not_modified(same_events)
# check if the imported events are similar or not (considering data and not category or tags)
same_events_not_similar = [
e for e in same_events if not e.similar(event, False)
]
if same_imported or len(same_events_not_similar) == 0:
# TODO: revoir ici:
if not same_imported:
same_imported = Event.find_last_imported(same_events)
same_imported = Event.find_last_pure_import(same_events)
# if this event exists, it will be updated with new data only if the data is fresher
if same_imported.modified_date < event.modified_date:
same_imported.update(event)
same_imported.set_in_importation_process()
same_imported.prepare_save()
to_update.append(same_imported)
# if not, we check if it does not match exactly with another
if not same_imported:
for e in same_events:
if event.similar(e):
same_imported = e
break
if same_imported:
# reopen DuplicatedEvents if required
if not event.similar(same_imported) and same_imported.other_versions:
if same_imported.status != Event.STATUS.TRASH:
if same_imported.other_versions.is_published():
if same_imported.other_versions.representative != same_imported:
same_imported.other_versions.representative = None
same_imported.other_versions.save()
same_imported.update(event)
same_imported.set_in_importation_process()
same_imported.prepare_save()
to_update.append(same_imported)
else:
# otherwise, the new event possibly a duplication of the remaining others.
event.set_other_versions(same_events)
# check if it should be published
trash = len([e for e in same_events if e.status != Event.STATUS.TRASH]) == 0
if trash:
event.status = Event.STATUS.TRASH
event.set_other_versions(same_events, force_non_fixed=not trash)
# it will be imported
to_import.append(event)
else:
@ -1250,18 +1265,9 @@ class Event(models.Model):
# if it exists similar events, add this relation to the event
if len(similar_events) != 0:
# check if an event from the list is exactly the same as the new one (using data)
same_events = event.find_same_event_by_data_in_list(similar_events)
if same_events is not None and len(same_events) > 0:
# merge with the first one
same_events[0].update(event)
same_events[0].set_in_importation_process()
same_events[0].prepare_save()
to_update.append(same_events[0])
else:
# the event is possibly a duplication of the others
event.set_other_versions(similar_events)
to_import.append(event)
# the event is possibly a duplication of the others
event.set_other_versions(similar_events, force_non_fixed=True)
to_import.append(event)
else:
# import this new event
to_import.append(event)
@ -1300,6 +1306,8 @@ class Event(models.Model):
for e in in_interval:
if len(uuids.intersection(e.uuids)) == 0:
e.status = Event.STATUS.TRASH
# save them without updating modified date
e.set_no_modification_date_changed()
e.prepare_save()
to_draft.append(e)

View File

@ -549,6 +549,10 @@ article#filters {
font-size: 100%;
}
}
header .remarque {
font-style: italic;
}
.django-ckeditor-widget {
width: 100%;

View File

@ -14,7 +14,7 @@
{% if e.imported_date %}<li>Dernière importation&nbsp;: {{ e.imported_date }}</li>{% endif %}
<li>État&nbsp;:
{% if e.pure_import %}version fidèle à la source importée{% endif %}
{% if e.modified %}<strong>version modifiée localement</strong>{% endif %}
{% if e.local_version %}<strong>version modifiée localement</strong>{% endif %}
</li>
</ul>
</div>

View File

@ -0,0 +1,16 @@
{% if user.is_authenticated %}
<p class="footer">Création&nbsp;: {{ event.created_date }}
{% if event.modified %}
— dernière modification&nbsp;: {{ event.modified_date }}
{% endif %}
{% if event.imported_date %}
— dernière importation&nbsp;: {{ event.imported_date }}
{% endif %}
{% if event.moderated_date %}
— dernière modération&nbsp;: {{ event.moderated_date }}
{% endif %}
{% if event.pure_import %}
<strong>version importée</strong>
{% endif %}
</p>
{% endif %}

View File

@ -0,0 +1,14 @@
{% load utils_extra %}
{% with event.get_reference_urls as refs %}
{% if refs|length > 0 %}
<p>Source{{ refs|pluralize }}&nbsp;:
{% for eurl in refs %}
<a href="{{ eurl }}">{{ eurl|hostname }}</a>{% if not forloop.last %}, {% endif %}
{% endfor %}
</p>
{% else %}
<p><em>À notre connaissance, cet événement n'est pas référencé autre part sur internet.</em></p>
{% endif %}
{% endwith %}

View File

@ -84,29 +84,19 @@
{% endwith %}
{% endwith %}
</article>
{% if event.other_versions %}
{% if event.other_versions and not event.other_versions.fixed %}
{% with poss_dup=event.get_other_versions|only_allowed:user.is_authenticated %}
{% if poss_dup|length > 0 %}
<article id="liste-dupliques">
<header>
{% if event.other_versions.representative %}
<h2>Sources multiples</h2>
<p class="remarque">L'événement affiché est également disponible
{% if poss_dup.count == 1 %}
dans une autre version
{% else %}
dans d'autres versions
{% endif %}&nbsp;:</p>
{% else %}
<h2>Possibles doublons</h2>
<p class="remarque">Notre algorithme a détecté que l'événement affiché pourrait être dupliqué sur l'agenda, et consultable dans
<p class="remarque">L'événement affiché pourrait être dupliqué sur l'agenda, et consultable dans
{% if poss_dup.count == 1 %}
une autre version
{% else %}
d'autres versions
{% endif %}
ci-dessous.</p>
{% endif %}
ci-dessous. Nous faisons notre maximum pour résoudre cette duplication temporaire.</p>
</header>
<nav>
<ul class="no-breakline">

View File

@ -48,6 +48,7 @@
{% if perms.agenda_culturel.change_event %}
<footer>
<div class="buttons">
{% include "agenda_culturel/event-date-info-inc.html" %}
{% include "agenda_culturel/edit-buttons-inc.html" with event=event %}
</div>
</footer>

View File

@ -59,17 +59,9 @@
<a href="{% url 'view_tag' tag|prepare_tag %}" role="button" class="small-cat">{{ tag }}</a>
{% endfor %}
</p>
{% include "agenda_culturel/event-sources-inc.html" %}
{% if event.reference_urls %}
<p>Source{{ event.reference_urls|pluralize }}&nbsp;:
{% for eurl in event.reference_urls %}
<a href="{{ eurl }}">{{ eurl|hostname }}</a>{% if not forloop.last %}, {% endif %}
{% endfor %}
</p>
{% else %}
<p><em>À notre connaissance, cet événement n'est pas référencé autre part sur internet.</em></p>
{% endif %}
{% if event.has_recurrences %}
<p class="footer">
{% picto_from_name "repeat" %}

View File

@ -46,15 +46,7 @@
{% endfor %}
</p>
{% if event.reference_urls %}
<p>Source{{ event.reference_urls|pluralize }}&nbsp;:
{% for eurl in event.reference_urls %}
<a href="{{ eurl }}">{{ eurl|hostname }}</a>{% if not forloop.last %}, {% endif %}
{% endfor %}
</p>
{% else %}
<p><em>À notre connaissance, cet événement n'est pas référencé autre part sur internet.</em></p>
{% endif %}
{% include "agenda_culturel/event-sources-inc.html" %}
</div>
{% if perms.agenda_culturel.change_event %}
<div class="buttons">

View File

@ -17,7 +17,6 @@
{{ event.title }}</a></p>
{% picto_from_name "map-pin" %}
{% include "agenda_culturel/event-location-inc.html" with event=event %}
</header>
<p>{% picto_from_name "calendar" %}
{% if event.end_day and event.end_day != event.start_day %}du{% else %}le{% endif %}
{% include "agenda_culturel/date-times-inc.html" with event=event %}
@ -34,6 +33,7 @@
{% endif %}
</p>
{% endif %}
{% if event.tags and event.tags|length > 0 %}
<p>
{% picto_from_name "tag" %}
{% for tag in event.tags %}
@ -41,7 +41,12 @@
{% if not forloop.last %}, {% endif %}
{% endfor %}
</p>
<div class="description">
{% endif %}
</header>
<div class="description">
{% if event.description %}{{ event.description |truncatewords:60 }}{% else %}<em>pas de description</em>{% endif %}
</div>
<footer>
{% include "agenda_culturel/event-date-info-inc.html" %}
</footer>

View File

@ -18,26 +18,38 @@
{% picto_from_name "map-pin" %}
{% include "agenda_culturel/event-location-inc.html" with event=event %}
</p>
{% if event.other_versions %}
{% with poss_dup=event.get_other_not_trash_versions|only_allowed:user.is_authenticated %}
{% if poss_dup|length > 0 %}
<p class="remarque">
{% if event.other_versions.representative %}
cet événement existe <a href="{% if user.is_authenticated %}{{ event.other_versions.get_absolute_url }}{% else %}#liste-dupliques{% endif %}">en plusieurs versions</a>,
{% if event.masked %}
vous pouvez consulter <a href="{{ event.other_versions.get_one_event.get_absolute_url }}">la version mise en avant</a>
{% else %}
et vous consultez la version mise en avant
{% endif %}
{% else %}
cet événement existe probablement <a href="{% if user.is_authenticated %}{{ event.other_versions.get_absolute_url }}{% else %}#liste-dupliques{% endif %}">en plusieurs versions</a>
{% if user.is_authenticated %}
{% if event.other_versions %}
{% with poss_dup=event.get_other_not_trash_versions %}
{% if poss_dup|length > 0 %}
<p class="remarque">
{% if event.other_versions.representative %}
cet événement existe <a href="{{ event.other_versions.get_absolute_url }}">en plusieurs versions</a>,
{% if event.masked %}
vous pouvez consulter <a href="{{ event.other_versions.get_one_event.get_absolute_url }}">la version mise en avant</a>
{% else %}
et vous consultez la version mise en avant
{% endif %}
{% else %}
cet événement existe probablement <a href="{{ event.other_versions.get_absolute_url }}">en plusieurs versions</a>
{% endif %}
</p>
{% endif %}
{% endwith %}
{% endif %}
{% else %}
{% if event.other_versions.representative and event.masked %}
<p class="remarque">
Vous consultez l'événement dans une version non consolidée. Nous vous invitons
à consulter sa <a href="{{ event.other_versions.representative.get_absolute_url }}">version représentative</a>.
</p>
{% endif %}
</p>
{% endif %}
{% endwith %}
{% endif %}
</header>
{% if event.has_image_url %}
<article class='illustration'>
<img src="{{ event.get_image_url }}" alt="{{ event.image_alt }}" />
@ -55,15 +67,8 @@
{% endfor %}
</p>
{% if event.reference_urls %}
<p>Cet événement est proposé par
{% for eurl in event.reference_urls %}
<a href="{{ eurl }}">{{ eurl|hostname }}</a>{% if not forloop.last %}, {% endif %}
{% endfor %}
</p>
{% else %}
<p><em>À notre connaissance, cet événement n'est pas référencé autre part sur internet.</em></p>
{% endif %}
{% include "agenda_culturel/event-sources-inc.html" %}
{% if event.has_recurrences %}
<p class="footer">
@ -78,22 +83,8 @@
{% endif %}
</p>
{% endif %}
<p class="footer">Création&nbsp;: {{ event.created_date }}
{% if event.modified %}
— dernière modification&nbsp;: {{ event.modified_date }}
{% endif %}
{% if event.imported_date %}
— dernière importation&nbsp;: {{ event.imported_date }}
{% endif %}
{% if event.moderated_date %}
— dernière modération&nbsp;: {{ event.moderated_date }}
{% endif %}
{% if event.pure_import %}
<strong>version importée</strong>
{% endif %}
{{ event.delai }}
</p>
{% include "agenda_culturel/event-date-info-inc.html" %}
</div>
<div class="buttons">
<a href="{% url 'export_event_ical' event.start_day.year event.start_day.month event.start_day.day event.id %}" role="button">Exporter ical {% picto_from_name "calendar" %}</a>

View File

@ -575,6 +575,8 @@ class EventUpdateView(
# if no DuplicatedEvents is associated, create one
obj.other_versions = DuplicatedEvents.objects.create()
obj.other_versions.save()
# save them without updating modified date
obj.set_no_modification_date_changed()
obj.save()
result["other_versions"] = obj.other_versions
@ -1542,6 +1544,7 @@ def fix_duplicate(request, pk):
event.other_versions = None
if edup.representative == event:
edup.representative = None
event.set_no_modification_date_changed()
event.save()
edup.save()
messages.success(request, _("The event has been withdrawn from the group and made independent."))
@ -1575,12 +1578,11 @@ class DuplicatedEventsUpdateView(LoginRequiredMixin, UpdateView):
@permission_required("agenda_culturel.view_duplicatedevents")
def duplicates(request):
nb_removed = DuplicatedEvents.remove_singletons()
nb_similar = DuplicatedEvents.fix_similar_entries()
if nb_removed > 0 or nb_similar > 0:
if nb_removed > 0:
messages.success(
request,
_("Cleaning up duplicates: {} item(s) fixed.").format(
nb_removed + nb_similar
nb_removed
),
)
@ -1624,6 +1626,8 @@ def set_duplicate(request, year, month, day, pk):
if form.is_valid():
selected = [o for o in others if o.pk == int(form.cleaned_data["event"])]
event.set_other_versions(selected)
# save them without updating modified date
event.set_no_modification_date_changed()
event.save()
if request.user.is_authenticated:
messages.success(request, _("The event was successfully duplicated."))
@ -1997,7 +2001,7 @@ def fix_unknown_places(request):
# get all places
places = Place.objects.all()
# get all events without exact location
u_events = Event.objects.filter(exact_location__isnull=True)
u_events = Event.get_qs_events_with_unkwnon_place()
to_be_updated = []
# try to find matches