Première version fonctionnelle de l'import d'événements

This commit is contained in:
Jean-Marie Favreau 2023-12-29 16:56:38 +01:00
parent 07729353ae
commit bec3fef0bf
17 changed files with 650 additions and 137 deletions

View File

@ -12,6 +12,7 @@ from selenium.webdriver.chrome.options import Options
import icalendar
from datetime import datetime, date
import json
from bs4 import BeautifulSoup
@ -76,7 +77,7 @@ class Extractor(ABC):
def clear_events(self):
self.events = []
def add_event(self, title, category, start_day, location, description, tags, url=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False):
def add_event(self, title, category, start_day, location, description, tags, uuid, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False):
if title is None:
print("ERROR: cannot import an event without name")
return
@ -88,13 +89,12 @@ class Extractor(ABC):
"title": title,
"category": category,
"start_day": start_day,
"uuid": uuid,
"location": location,
"descritpion": description,
"description": description,
"tags": tags,
"published": published
}
if url is not None:
event["url"] = url
if url_human is not None:
event["url_human"] = url_human
if start_time is not None:
@ -167,6 +167,12 @@ class ICALExtractor(Extractor):
location = self.default_value_if_exists(default_values, "location")
description = self.get_item_from_vevent(event, "DESCRIPTION")
if description is not None:
soup = BeautifulSoup(description)
delimiter = '\n'
for line_break in soup.findAll('br'):
line_break.replaceWith(delimiter)
description = soup.get_text()
last_modified = self.get_item_from_vevent(event, "LAST_MODIFIED")
@ -183,7 +189,7 @@ class ICALExtractor(Extractor):
if rrule is not None:
print("Recurrent event not yet supported", rrule)
self.add_event(title, category, start_day, location, description, tags, url=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified, published=published)
self.add_event(title, category, start_day, location, description, tags, uuid=event_url, url_human=url_human, start_time=start_time, end_day=end_day, end_time=end_time, last_modified=last_modified, published=published)
return self.get_structure()

View File

@ -1,6 +1,6 @@
from django.contrib import admin
from django import forms
from .models import Event, Category, StaticContent
from .models import Event, Category, StaticContent, DuplicatedEvents, BatchImportation
from django_better_admin_arrayfield.admin.mixins import DynamicArrayMixin
from django_better_admin_arrayfield.forms.widgets import DynamicArrayWidget
from django_better_admin_arrayfield.models.fields import DynamicArrayField
@ -8,6 +8,8 @@ from django_better_admin_arrayfield.models.fields import DynamicArrayField
admin.site.register(Category)
admin.site.register(StaticContent)
admin.site.register(DuplicatedEvents)
admin.site.register(BatchImportation)
class URLWidget(DynamicArrayWidget):

View File

@ -25,12 +25,17 @@ app.config_from_object("django.conf:settings", namespace="CELERY")
# Load task modules from all registered Django apps.
app.autodiscover_tasks()
def close_import_task(taskid, success, error_message):
def close_import_task(taskid, success, error_message, importer):
from agenda_culturel.models import BatchImportation
task = BatchImportation.objects.get(celery_id=taskid)
task.status = BatchImportation.STATUS.SUCCESS if success else BatchImportation.STATUS.FAILED
fields = ["status"]
task.nb_initial = importer.get_nb_events()
task.nb_imported = importer.get_nb_imported_events()
task.nb_updated = importer.get_nb_updated_events()
task.nb_removed = importer.get_nb_removed_events()
fields = ["status", "nb_initial", "nb_updated", "nb_imported", "nb_removed"]
if not success:
task.error_message = error_message
fields.append("error_message")
@ -46,10 +51,14 @@ def import_events_from_json(self, json):
importer = EventsImporter(self.request.id)
success, error_message = importer.import_events(json)
try:
success, error_message = importer.import_events(json)
# finally, close task
close_import_task(self.request.id, success, error_message)
# finally, close task
close_import_task(self.request.id, success, error_message, importer)
except Exception as e:
logger.error(e)
close_import_task(self.request.id, False, e, importer)
@app.task(bind=True)

View File

@ -1,8 +1,8 @@
from django.forms import ModelForm, ValidationError, TextInput, Form, URLField, MultipleHiddenInput
from django.forms import ModelForm, ValidationError, TextInput, Form, URLField, MultipleHiddenInput, Textarea, CharField
from datetime import date
from .models import Event
from .models import Event, BatchImportation
from django.utils.translation import gettext_lazy as _
class EventSubmissionForm(Form):
@ -13,7 +13,7 @@ class EventForm(ModelForm):
class Meta:
model = Event
fields = '__all__'
exclude = ["possibly_duplicated"]
widgets = {
'start_day': TextInput(attrs={'type': 'date', 'onchange': 'update_datetimes(event);', "onfocus": "this.oldvalue = this.value;"}),
'start_time': TextInput(attrs={'type': 'time', 'onchange': 'update_datetimes(event);', "onfocus": "this.oldvalue = this.value;"}),
@ -54,3 +54,25 @@ class EventForm(ModelForm):
return end_time
class BatchImportationForm(ModelForm):
json = CharField(label="JSON (facultatif)", widget=Textarea(attrs={"rows":"10"}), help_text=_("JSON in the format expected for the import. If the JSON is provided here, we will ignore the URLs given above, and use the information provided by the json without importing any additional events from the URL."), required=False)
class Meta:
model = BatchImportation
fields = ['source', 'browsable_url']
def clean(self):
cleaned_data = super().clean()
json = cleaned_data.get("json")
source = cleaned_data.get("source")
browsable_url = cleaned_data.get("browsable_url")
if (not json or json == "") and (not source or source == "") and (not browsable_url or browsable_url == ""):
raise ValidationError(_("You need to fill in either the json or the source possibly supplemented by the navigable URL."))
# Always return a value to use as the new cleaned data, even if
# this method didn't change it.
return cleaned_data

View File

@ -1,14 +1,37 @@
from agenda_culturel.models import Event
import json
from datetime import datetime
class EventsImporter:
def __init__(self, celery_id):
self.celery_id = celery_id
self.error_message = ""
self.init_result_properties()
def init_result_properties(self):
self.event_objects = []
self.db_event_objects = []
self.nb_updated = 0
self.nb_removed = 0
self.date = None
self.url = None
def get_nb_events(self):
return len(self.event_objects)
def get_nb_imported_events(self):
return len(self.db_event_objects)
def get_nb_updated_events(self):
return self.nb_updated
def get_nb_removed_events(self):
return self.nb_removed
def import_events(self, json_structure):
self.init_result_properties()
try:
structure = json.loads(json_structure)
@ -17,32 +40,53 @@ class EventsImporter:
if not "header" in structure:
return (False, "JSON is not correctly structured: missing header")
if not "events" in structure:
return (False, "JSON is not correctly structured: missing events")
if "url" in structure["header"]:
self.url = structure["header"]["url"]
else:
return (False, "JSON is not correctly structured: missing url in header")
if "url" in structure["header"]:
if "date" in structure["header"]:
self.date = structure["header"]["date"]
# load events
# get events
for event in structure["events"]:
self.import_event(event)
if "created_date" not in event:
if self.date is not None:
event["created_date"] = self.date
else:
event["created_date"] = datetime.now()
# update object with infos from header, and with the list of imported objects
# TODO
# events that are missing from the import but in database are turned into drafts
# TODO
if not self.import_event(event):
return (False, self.error_message)
# import them
self.save_imported()
return (True, "")
def save_imported(self):
self.db_event_objects, self.nb_updated, self.nb_removed = Event.import_events(self.event_objects, remove_missing=True)
def is_valid_event_structure(self, event):
if "title" not in event:
self.error_message = "JSON is not correctly structured: one event without title"
return False
if "start_day" not in event:
self.error_message = "JSON is not correctly structured: one event without start_day"
return False
return True
def import_event(self, event):
# TODO
pass
if self.is_valid_event_structure(event):
event_obj = Event.from_structure(event)
self.event_objects.append(event_obj)
return True
else:
return False

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: agenda_culturel\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-12-23 08:38+0000\n"
"POT-Creation-Date: 2023-12-29 15:35+0000\n"
"PO-Revision-Date: 2023-10-29 14:16+0000\n"
"Last-Translator: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
"Language-Team: Jean-Marie Favreau <jeanmarie.favreau@free.fr>\n"
@ -17,261 +17,311 @@ msgstr ""
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: agenda_culturel/forms.py:37
#: agenda_culturel/forms.py:38
msgid "The end date must be after the start date."
msgstr "La date de fin doit être après la date de début."
#: agenda_culturel/forms.py:52
#: agenda_culturel/forms.py:53
msgid "The end time cannot be earlier than the start time."
msgstr "L'heure de fin ne peut pas être avant l'heure de début."
#: agenda_culturel/models.py:23 agenda_culturel/models.py:52
#: agenda_culturel/models.py:211
#: agenda_culturel/forms.py:61
msgid ""
"JSON in the format expected for the import. If the JSON is provided here, we "
"will ignore the URLs given above, and use the information provided by the "
"json without importing any additional events from the URL."
msgstr ""
"JSON au format attendu pour l'import. Si le JSON est fourni ici, on ignorera "
"les URL données au dessus, et on utilisera les informations fournies par le "
"json sans réaliser d'importation supplémentaire d'événements depuis l'URL."
#: agenda_culturel/forms.py:74
msgid ""
"You need to fill in either the json or the source possibly supplemented by "
"the navigable URL."
msgstr ""
"Vous devez renseigner soit le json soit la source éventuellement complétée "
"de l'URL navigable."
#: agenda_culturel/models.py:26 agenda_culturel/models.py:55
#: agenda_culturel/models.py:465
msgid "Name"
msgstr "Nom"
#: agenda_culturel/models.py:23 agenda_culturel/models.py:52
#: agenda_culturel/models.py:26 agenda_culturel/models.py:55
msgid "Category name"
msgstr "Nom de la catégorie"
#: agenda_culturel/models.py:24
#: agenda_culturel/models.py:27
msgid "Content"
msgstr "Contenu"
#: agenda_culturel/models.py:24
#: agenda_culturel/models.py:27
msgid "Text as shown to the visitors"
msgstr "Text tel que présenté aux visiteureuses"
#: agenda_culturel/models.py:25
#: agenda_culturel/models.py:28
msgid "URL path"
msgstr ""
#: agenda_culturel/models.py:25
#: agenda_culturel/models.py:28
msgid "URL path where the content is included."
msgstr ""
#: agenda_culturel/models.py:53
#: agenda_culturel/models.py:56
msgid "Alternative Name"
msgstr "Nom alternatif"
#: agenda_culturel/models.py:53
#: agenda_culturel/models.py:56
msgid "Alternative name used with a time period"
msgstr "Nom alternatif utilisé avec une période de temps"
#: agenda_culturel/models.py:54
#: agenda_culturel/models.py:57
msgid "Short name"
msgstr "Nom court"
#: agenda_culturel/models.py:54
#: agenda_culturel/models.py:57
msgid "Short name of the category"
msgstr "Nom court de la catégorie"
#: agenda_culturel/models.py:55
#: agenda_culturel/models.py:58
msgid "Color"
msgstr "Couleur"
#: agenda_culturel/models.py:55
#: agenda_culturel/models.py:58
msgid "Color used as background for the category"
msgstr "Couleur utilisée comme fond de la catégorie"
#: agenda_culturel/models.py:92 agenda_culturel/models.py:109
#: agenda_culturel/models.py:95 agenda_culturel/models.py:142
msgid "Category"
msgstr "Catégorie"
#: agenda_culturel/models.py:93
#: agenda_culturel/models.py:96
msgid "Categories"
msgstr "Catégories"
#: agenda_culturel/models.py:98
#: agenda_culturel/models.py:130
msgid "Published"
msgstr "Publié"
#: agenda_culturel/models.py:99
#: agenda_culturel/models.py:131
msgid "Draft"
msgstr "Brouillon"
#: agenda_culturel/models.py:100
#: agenda_culturel/models.py:132
msgid "Trash"
msgstr "Corbeille"
#: agenda_culturel/models.py:105
#: agenda_culturel/models.py:138
msgid "Title"
msgstr "Titre"
#: agenda_culturel/models.py:105
#: agenda_culturel/models.py:138
msgid "Short title"
msgstr "Titre court"
#: agenda_culturel/models.py:107 agenda_culturel/models.py:238
#: agenda_culturel/models.py:140 agenda_culturel/models.py:492
msgid "Status"
msgstr "Status"
#: agenda_culturel/models.py:109
#: agenda_culturel/models.py:142
msgid "Category of the event"
msgstr "Catégorie de l'événement"
#: agenda_culturel/models.py:111
#: agenda_culturel/models.py:144
msgid "Day of the event"
msgstr "Date de l'événement"
#: agenda_culturel/models.py:112
#: agenda_culturel/models.py:145
msgid "Starting time"
msgstr "Heure de début"
#: agenda_culturel/models.py:114
#: agenda_culturel/models.py:147
msgid "End day of the event"
msgstr "Fin de l'événement"
#: agenda_culturel/models.py:114
#: agenda_culturel/models.py:147
msgid "End day of the event, only required if different from the start day."
msgstr ""
"Date de fin de l'événement, uniquement nécessaire s'il est différent du "
"premier jour de l'événement"
#: agenda_culturel/models.py:115
#: agenda_culturel/models.py:148
msgid "Final time"
msgstr "Heure de fin"
#: agenda_culturel/models.py:117
#: agenda_culturel/models.py:150
msgid "Location"
msgstr "Localisation"
#: agenda_culturel/models.py:117
#: agenda_culturel/models.py:150
msgid "Address of the event"
msgstr "Adresse de l'événement"
#: agenda_culturel/models.py:119
#: agenda_culturel/models.py:152
msgid "Description"
msgstr "Description"
#: agenda_culturel/models.py:119
#: agenda_culturel/models.py:152
msgid "General description of the event"
msgstr "Description générale de l'événement"
#: agenda_culturel/models.py:121
#: agenda_culturel/models.py:154
msgid "Illustration (local image)"
msgstr "Illustration (image locale)"
#: agenda_culturel/models.py:121
#: agenda_culturel/models.py:154
msgid "Illustration image stored in the agenda server"
msgstr "Image d'illustration stockée sur le serveur de l'agenda"
#: agenda_culturel/models.py:123
#: agenda_culturel/models.py:156
msgid "Illustration"
msgstr "Illustration"
#: agenda_culturel/models.py:123
#: agenda_culturel/models.py:156
msgid "URL of the illustration image"
msgstr "URL de l'image illustrative"
#: agenda_culturel/models.py:124
#: agenda_culturel/models.py:157
msgid "Illustration description"
msgstr "Description de l'illustration"
#: agenda_culturel/models.py:124
#: agenda_culturel/models.py:157
msgid "Alternative text used by screen readers for the image"
msgstr "Texte alternatif utiliser par les lecteurs d'écrans pour l'image"
#: agenda_culturel/models.py:126
#: agenda_culturel/models.py:159
msgid "UUIDs"
msgstr "UUIDs"
#: agenda_culturel/models.py:159
msgid "UUIDs from import to detect duplicated entries."
msgstr "UUIDs utilisés pendant l'import pour détecter les entrées dupliquées"
#: agenda_culturel/models.py:160
msgid "URLs"
msgstr "URLs"
#: agenda_culturel/models.py:126
#: agenda_culturel/models.py:160
msgid "List of all the urls where this event can be found."
msgstr "Liste de toutes les urls où l'événement peut être trouvé."
#: agenda_culturel/models.py:128
#: agenda_culturel/models.py:162
msgid "Tags"
msgstr "Étiquettes"
#: agenda_culturel/models.py:128
#: agenda_culturel/models.py:162
msgid "A list of tags that describe the event."
msgstr "Une liste d'étiquettes décrivant l'événement"
#: agenda_culturel/models.py:158
#: agenda_culturel/models.py:164
msgid "Possibly duplicated"
msgstr "Possibles doublons"
#: agenda_culturel/models.py:194
msgid "Event"
msgstr "Événement"
#: agenda_culturel/models.py:159
#: agenda_culturel/models.py:195
msgid "Events"
msgstr "Événements"
#: agenda_culturel/models.py:210
#: agenda_culturel/models.py:464
msgid "Subject"
msgstr "Sujet"
#: agenda_culturel/models.py:210
#: agenda_culturel/models.py:464
msgid "The subject of your message"
msgstr "Sujet de votre message"
#: agenda_culturel/models.py:211
#: agenda_culturel/models.py:465
msgid "Your name"
msgstr "Votre nom"
#: agenda_culturel/models.py:212
#: agenda_culturel/models.py:466
msgid "Email address"
msgstr "Adresse email"
#: agenda_culturel/models.py:212
#: agenda_culturel/models.py:466
msgid "Your email address"
msgstr "Votre adresse email"
#: agenda_culturel/models.py:213
#: agenda_culturel/models.py:467
msgid "Message"
msgstr "Message"
#: agenda_culturel/models.py:213
#: agenda_culturel/models.py:467
msgid "Your message"
msgstr "Votre message"
#: agenda_culturel/models.py:217 agenda_culturel/views.py:341
#: agenda_culturel/models.py:471 agenda_culturel/views.py:343
msgid "Closed"
msgstr "Fermé"
#: agenda_culturel/models.py:217
#: agenda_culturel/models.py:471
msgid "this message has been processed and no longer needs to be handled"
msgstr "Ce message a été traité et ne nécessite plus d'être pris en charge"
#: agenda_culturel/models.py:218
#: agenda_culturel/models.py:472
msgid "Comments"
msgstr "Commentaires"
#: agenda_culturel/models.py:218
#: agenda_culturel/models.py:472
msgid "Comments on the message from the moderation team"
msgstr "Commentaires sur ce message par l'équipe de modération"
#: agenda_culturel/models.py:227
#: agenda_culturel/models.py:481
msgid "Running"
msgstr ""
#: agenda_culturel/models.py:228
#: agenda_culturel/models.py:482
msgid "Canceled"
msgstr "Annulé"
#: agenda_culturel/models.py:229
#: agenda_culturel/models.py:483
msgid "Success"
msgstr "Succès"
#: agenda_culturel/models.py:230
#: agenda_culturel/models.py:484
msgid "Failed"
msgstr "Erreur"
#: agenda_culturel/models.py:235
#: agenda_culturel/models.py:489
msgid "Source"
msgstr "Source"
#: agenda_culturel/models.py:235
#: agenda_culturel/models.py:489
msgid "URL of the source document"
msgstr "URL du document source"
#: agenda_culturel/models.py:236
#: agenda_culturel/models.py:490
msgid "Browsable url"
msgstr "URL navigable"
#: agenda_culturel/models.py:236
#: agenda_culturel/models.py:490
msgid "URL of the corresponding document that will be shown to visitors."
msgstr "URL correspondant au document et qui sera montrée aux visiteurs"
#: agenda_culturel/models.py:494
msgid "Error message"
msgstr "Votre message"
#: agenda_culturel/models.py:496
msgid "Number of collected events"
msgstr "Nombre d'événements collectés"
#: agenda_culturel/models.py:497
msgid "Number of imported events"
msgstr "Nombre d'événements importés"
#: agenda_culturel/models.py:498
msgid "Number of updated events"
msgstr "Nombre d'événements mis à jour"
#: agenda_culturel/models.py:499
msgid "Number of removed events"
msgstr "Nombre d'événements supprimés"
#: agenda_culturel/settings/base.py:134
msgid "English"
msgstr "anglais"
@ -280,27 +330,27 @@ msgstr "anglais"
msgid "French"
msgstr "français"
#: agenda_culturel/views.py:188
#: agenda_culturel/views.py:190
msgid "The static content has been successfully updated."
msgstr "Le contenu statique a été modifié avec succès."
#: agenda_culturel/views.py:194
#: agenda_culturel/views.py:196
msgid "The event has been successfully modified."
msgstr "L'événement a été modifié avec succès."
#: agenda_culturel/views.py:205
#: agenda_culturel/views.py:207
msgid "The event has been successfully deleted."
msgstr "L'événement a été supprimé avec succès"
#: agenda_culturel/views.py:222
#: agenda_culturel/views.py:224
msgid "The status has been successfully modified."
msgstr "Le status a été modifié avec succès."
#: agenda_culturel/views.py:244
#: agenda_culturel/views.py:246
msgid "The event is saved."
msgstr "L'événement est enregistré."
#: agenda_culturel/views.py:247
#: agenda_culturel/views.py:249
msgid ""
"The event has been submitted and will be published as soon as it has been "
"validated by the moderation team."
@ -308,7 +358,7 @@ msgstr ""
"L'événement a été soumis et sera publié dès qu'il aura été validé par "
"l'équipe de modération."
#: agenda_culturel/views.py:277
#: agenda_culturel/views.py:279
msgid ""
"The event has been successfully extracted, and you can now submit it after "
"modifying it if necessary."
@ -316,7 +366,7 @@ msgstr ""
"L'événement a été extrait avec succès, vous pouvez maintenant le soumettre "
"après l'avoir modifié au besoin."
#: agenda_culturel/views.py:281
#: agenda_culturel/views.py:283
msgid ""
"Unable to extract an event from the proposed URL. Please use the form below "
"to submit the event."
@ -324,12 +374,12 @@ msgstr ""
"Impossible d'extraire un événement depuis l'URL proposée. Veuillez utiliser "
"le formulaire ci-dessous pour soumettre l'événement."
#: agenda_culturel/views.py:290
#: agenda_culturel/views.py:292
msgid "This URL has already been submitted, and you can find the event below."
msgstr ""
"Cette URL a déjà été soumise, et vous trouverez l'événement ci-dessous."
#: agenda_culturel/views.py:294
#: agenda_culturel/views.py:296
msgid ""
"This URL has already been submitted, but has not been selected for "
"publication by the moderation team."
@ -337,23 +387,23 @@ msgstr ""
"Cette URL a déjà été soumise, mais n'a pas été retenue par l'équipe de "
"modération pour la publication."
#: agenda_culturel/views.py:296
#: agenda_culturel/views.py:298
msgid "This URL has already been submitted and is awaiting moderation."
msgstr "Cette URL a déjà été soumise, et est en attente de modération"
#: agenda_culturel/views.py:318
#: agenda_culturel/views.py:320
msgid "Your message has been sent successfully."
msgstr "L'événement a été supprimé avec succès"
#: agenda_culturel/views.py:326
#: agenda_culturel/views.py:328
msgid "The contact message properties has been successfully modified."
msgstr "Les propriétés du message de contact ont été modifié avec succès."
#: agenda_culturel/views.py:341
#: agenda_culturel/views.py:343
msgid "Open"
msgstr "Ouvert"
#: agenda_culturel/views.py:381
#: agenda_culturel/views.py:383
msgid "Search"
msgstr "Rechercher"
@ -361,7 +411,7 @@ msgstr "Rechercher"
msgid "The import has been run successfully."
msgstr "L'import a été lancé avec succès"
#: agenda_culturel/views.py:507
#: agenda_culturel/views.py:521
msgid "The import has been canceled."
msgstr "L'import a été annulé"

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2023-12-23 12:51
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0015_event_uuids'),
]
operations = [
migrations.AddField(
model_name='batchimportation',
name='nb_removed',
field=models.PositiveIntegerField(default=0, verbose_name='Number of removed events'),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2023-12-23 13:01
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0016_batchimportation_nb_removed'),
]
operations = [
migrations.AddField(
model_name='batchimportation',
name='nb_updated',
field=models.PositiveIntegerField(default=0, verbose_name='Number of updated events'),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.2.7 on 2023-12-23 13:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0017_batchimportation_nb_updated'),
]
operations = [
migrations.AddField(
model_name='event',
name='imported_date',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AlterField(
model_name='event',
name='created_date',
field=models.DateTimeField(editable=False),
),
migrations.AlterField(
model_name='event',
name='modified_date',
field=models.DateTimeField(blank=True, null=True),
),
]

View File

@ -0,0 +1,25 @@
# Generated by Django 4.2.7 on 2023-12-29 11:44
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0018_event_imported_date_alter_event_created_date_and_more'),
]
operations = [
migrations.CreateModel(
name='DuplicatedEvents',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
],
),
migrations.AddField(
model_name='event',
name='possibly_duplicated',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='agenda_culturel.duplicatedevents'),
),
]

View File

@ -0,0 +1,11 @@
from django.db import migrations
from django.contrib.postgres.operations import TrigramExtension
class Migration(migrations.Migration):
dependencies = [
('agenda_culturel', '0019_duplicatedevents_event_possibly_duplicated'),
]
operations = [TrigramExtension()]

View File

@ -9,6 +9,9 @@ from urllib.parse import urlparse
import urllib.request
import os
from django.core.files import File
from django.utils import timezone
from django.contrib.postgres.search import TrigramSimilarity
from django.db.models import Q
from django.template.defaultfilters import date as _date
@ -92,6 +95,35 @@ class Category(models.Model):
verbose_name = _('Category')
verbose_name_plural = _('Categories')
class DuplicatedEvents(models.Model):
def nb_duplicated(self):
return Event.objects.filter(possibly_duplicated=self).count()
def merge_into(self, other):
# for all objects associated to this group
for e in Event.objects.filter(possibly_duplicated=self):
# change their group membership
e.possibly_duplicated = other
# save them
e.save()
# then delete the empty group
self.delete()
def merge_groups(groups):
if len(groups) == 0:
return None
elif len(groups) == 1:
return groups[0]
else:
result = groups[0]
for g in groups[1:]:
g.merge_into(result)
return result
class Event(models.Model):
class STATUS(models.TextChoices):
@ -99,8 +131,9 @@ class Event(models.Model):
DRAFT = "draft", _("Draft")
TRASH = "trash", _("Trash")
created_date = models.DateTimeField(auto_now_add=True)
modified_date = models.DateTimeField(auto_now=True)
created_date = models.DateTimeField(editable=False)
imported_date = models.DateTimeField(blank=True, null=True)
modified_date = models.DateTimeField(blank=True, null=True)
title = models.CharField(verbose_name=_('Title'), help_text=_('Short title'), max_length=512)
@ -128,6 +161,8 @@ class Event(models.Model):
tags = ArrayField(models.CharField(max_length=64), verbose_name=_('Tags'), help_text=_("A list of tags that describe the event."), blank=True, null=True)
possibly_duplicated = models.ForeignKey(DuplicatedEvents, verbose_name=_('Possibly duplicated'), on_delete=models.SET_NULL, null=True, blank=True)
def get_consolidated_end_day(self, intuitive=True):
if intuitive:
end_day = self.get_consolidated_end_day(False)
@ -180,30 +215,248 @@ class Event(models.Model):
return self.status == Event.STATUS.TRASH
def modified(self):
return abs((self.modified_date - self.created_date).total_seconds()) > 1
return self.modified_date is None or abs((self.modified_date - self.created_date).total_seconds()) > 1
def nb_draft_events():
return Event.objects.filter(status=Event.STATUS.DRAFT).count()
def save(self, *args, **kwargs):
def download_image(self):
# first download file
a = urlparse(self.image)
basename = os.path.basename(a.path)
try:
tmpfile, _ = urllib.request.urlretrieve(self.image)
except:
return None
# if the download is ok, then create the corresponding file object
self.local_image = File(name=basename, file=open(tmpfile, "rb"))
def update_dates(self):
now = timezone.now()
if not self.id:
self.created_date = now
if hasattr(self, "require_imported_date"):
self.imported_date = now
self.modified_date = now
def prepare_save(self):
self.update_dates()
# if the image is defined but not locally downloaded
if self.image and not self.local_image:
# first download file
a = urlparse(self.image)
basename = os.path.basename(a.path)
self.download_image()
try:
tmpfile, _ = urllib.request.urlretrieve(self.image)
except:
return None
# if the download is ok, then create the corresponding file object
self.local_image = File(name=basename, file=open(tmpfile, "rb"))
def save(self, *args, **kwargs):
self.prepare_save()
# delete duplicated group if it's only with one element
if self.possibly_duplicated is not None and self.possibly_duplicated.nb_duplicated() == 1:
self.possibly_duplicated.delete()
self.possibly_duplicated = None
super().save(*args, **kwargs)
def from_structure(event_structure):
if "category" in event_structure and event_structure["category"] is not None:
event_structure["category"] = Category.objects.get(name=event_structure["category"])
if "uuid" in event_structure and event_structure["uuid"] is not None:
event_structure["uuids"] = [event_structure["uuid"]]
del event_structure["uuid"]
if "published" in event_structure and event_structure["published"] is not None:
event_structure["status"] = Event.STATUS.PUBLISHED
del event_structure["published"]
else:
event_structure["status"] = Event.STATUS.DRAFT
if "url_human" in event_structure and event_structure["url_human"] is not None:
event_structure["reference_urls"] = [event_structure["url_human"]]
del event_structure["url_human"]
if "last_modified" in event_structure and event_structure["last_modified"] is not None:
event_structure["created_date"] = event_structure["last_modified"]
del event_structure["last_modified"]
else:
event_structure["created_date"] = timezone.now()
if "start_time" in event_structure:
event_structure["start_time"] = time.fromisoformat(event_structure["start_time"])
if "end_time" in event_structure:
event_structure["end_time"] = time.fromisoformat(event_structure["end_time"])
if "description" in event_structure and event_structure["description"] is None:
event_structure["description"] = ""
return Event(**event_structure)
def find_similar_events(self):
start_time_test = Q(start_time=self.start_time)
if self.start_time is not None:
# convert str start_time to time
if isinstance(self.start_time, str):
self.start_time = time.fromisoformat(self.start_time)
interval = (time(self.start_time.hour - 1, self.start_time.minute) if self.start_time.hour >= 1 else time(0, 0),
time(self.start_time.hour + 1, self.start_time.minute) if self.start_time.hour < 23 else time(23, 59))
start_time_test = start_time_test | Q(start_time__range=interval)
return Event.objects.annotate(similarity_title=TrigramSimilarity("title", self.title)). \
annotate(similarity_location=TrigramSimilarity("location", self.location)). \
filter(Q(start_day=self.start_day) & start_time_test & Q(similarity_title__gt=0.5) & Q(similarity_title__gt=0.3))
def find_same_events_by_uuid(self):
return None if self.uuids is None or len(self.uuids) == 0 else Event.objects.filter(uuids__contains=self.uuids)
def get_possibly_duplicated(self):
if self.possibly_duplicated is None:
return []
else:
return Event.objects.filter(possibly_duplicated=self.possibly_duplicated).exclude(pk=self.pk)
def set_possibly_duplicated(self, events):
# get existing groups
groups = list(set([e.possibly_duplicated for e in events] + [self.possibly_duplicated]))
groups = [g for g in groups if g is not None]
# do we have to create a new group?
if len(groups) == 0:
group = DuplicatedEvents.objects.create()
logger.warning("set possibily duplicated 0 {}".format(group))
else:
# otherwise merge existing groups
group = DuplicatedEvents.merge_groups(groups)
logger.warning("set possibily duplicated not 0 {}".format(group))
group.save()
# set the possibly duplicated group for the current object
self.possibly_duplicated = group
# and for the other events
for e in events:
e.possibly_duplicated = group
# finally save the other events
Event.objects.bulk_update(events, fields=["possibly_duplicated"])
def data_fields():
return ["title", "location", "start_day", "start_time", "end_day", "end_time", "description", "image", "image_alt", "image_alt", "reference_urls"]
def same_event_by_data(self, other):
for attr in Event.data_fields():
if str(getattr(self, attr)) != str(getattr(other, attr)):
logger.warning("on trouve une différence dans {}: {} vs {}".format(attr, getattr(self, attr), getattr(other, attr)))
return False
return True
def find_same_event_by_data_in_list(self, events):
return [e for e in events if self.same_event_by_data(e)]
def find_last_imported_not_modified(events):
events = [e for e in events if e.imported_date is not None and (e.modified_date is None or e.modified_date <= e.imported_date)]
if len(events) == 0:
return None
else:
events.sort(key=lambda e: e.imported_date, reverse=True)
return events[0]
def update(self, other):
# TODO: what about category, tags?
# set attributes
for attr in Event.data_fields():
setattr(self, attr, getattr(other, attr))
# add a possible missing uuid
if self.uuids is None:
self.uuids = []
for uuid in other.uuids:
if not uuid in self.uuids:
self.uuids.append(uuid)
# Limitation: the given events should not be considered similar one to another...
def import_events(events, remove_missing=False):
to_import = []
to_update = []
# for each event, check if it's a new one, or a one to be updated
for event in events:
# imported events should be updated
event.require_imported_date = True
event.prepare_save()
# check if the event has already be imported (using uuid)
same_events = event.find_same_events_by_uuid()
if len(same_events) != 0:
# check if one event has been imported and not modified in this list
same_imported = Event.find_last_imported_not_modified(same_events)
if same_imported:
# if this event exists, it will be updated with new data
same_imported.update(event)
same_imported.require_imported_date = True
same_imported.prepare_save()
to_update.append(same_imported)
else:
# otherwise, the new event possibly a duplication of the others.
event.set_possibly_duplicated(same_events)
# it will be imported
to_import.append(event)
else:
# if uuid is unique (or not available), check for similar events
similar_events = event.find_similar_events()
# if it exists similar events, add this relation to the event
if len(similar_events) != 0:
# check if an event from the list is exactly the same as the new one (using data)
same_events = event.find_same_event_by_data_in_list(similar_events)
if same_events is not None and len(same_events) > 0:
# merge with the first one
same_events[0].update(event)
same_events[0].require_imported_date = True
same_events[0].prepare_save()
to_update.append(same_events[0])
else:
# the event is possibly a duplication of the others
event.set_possibly_duplicated(similar_events)
to_import.append(event)
else:
# import this new event
to_import.append(event)
# then import all the new events
imported = Event.objects.bulk_create(to_import)
nb_updated = Event.objects.bulk_update(to_update, fields = Event.data_fields() + ["imported_date", "modified_date", "uuids"])
nb_removed = 0
if remove_missing:
# events that are missing from the import but in database are turned into drafts
# TODO
# TODO: ajouter self.source, ou faire référence à l'objet BatchImportation
pass
return imported, nb_updated, nb_removed
class ContactMessage(models.Model):
@ -242,5 +495,7 @@ class BatchImportation(models.Model):
nb_initial = models.PositiveIntegerField(verbose_name=_('Number of collected events'), default=0)
nb_imported = models.PositiveIntegerField(verbose_name=_('Number of imported events'), default=0)
nb_updated = models.PositiveIntegerField(verbose_name=_('Number of updated events'), default=0)
nb_removed = models.PositiveIntegerField(verbose_name=_('Number of removed events'), default=0)
celery_id = models.CharField(max_length=128, default="")

View File

@ -196,3 +196,6 @@ if os_getenv("EMAIL_BACKEND"):
EMAIL_USE_TLS = os_getenv("EMAIL_USE_TLS", False)
EMAIL_USE_SSL = os_getenv("EMAIL_USE_SSL", False)
DEFAULT_FROM_EMAIL = os_getenv("DEFAULT_FROM_EMAIL")
# increase upload size for debug experiments
DATA_UPLOAD_MAX_MEMORY_SIZE = 10 * 2621440

View File

@ -11,12 +11,6 @@
<article>
<form method="post">{% csrf_token %}
{{ form.as_p }}
<p>
<label for="id_json">JSON (facultatif) :</label>
<textarea id="id_json" name="json" rows="10"></textarea>
<span class="helptext">JSON au format attendu pour l'import. Si le JSON est fourni ici, on ignorera les URL données au dessus, et on utilisera les informations fournies par le json sans réaliser d'importation supplémentaire d'événements depuis l'URL.</span>
</p>
<input type="submit" value="Envoyer">
</form>
</article>

View File

@ -1,4 +1,4 @@
!<{% extends "agenda_culturel/page.html" %}
{% extends "agenda_culturel/page.html" %}
{% block title %}Importations par lot{% endblock %}
@ -19,10 +19,17 @@
<table role="grid">
<thead>
<tr>
<th>Identifiant</th>
<th>Date</th>
<th>Status</th>
<th>Action</th>
<th rowspan="2">Identifiant</th>
<th rowspan="2">Date</th>
<th rowspan="2">Status</th>
<th rowspan="2">Action</th>
<th colspan="4">événements</th>
</tr>
<tr>
<th>initial</th>
<th>importés</th>
<th>mis à jour</th>
<th>supprimés</th>
</tr>
</thead>
<tbody>
@ -32,6 +39,10 @@
<td>{{ obj.created_date }}</td>
<td><span{% if obj.status == "failed" %} data-tooltip="{{ obj.error_message }}"{% endif %}>{{ obj.status }}</span></td>
<td>{% if obj.status == "running" %}<a href="{% url 'cancel_import' obj.id %}">Annuler</a>{% endif %}</td>
<td>{% if obj.status == "success" %}{{ obj.nb_initial }}{% endif %}</td>
<td>{% if obj.status == "success" %}{{ obj.nb_imported }}{% endif %}</td>
<td>{% if obj.status == "success" %}{{ obj.nb_updated }}{% endif %}</td>
<td>{% if obj.status == "success" %}{{ obj.nb_removed }}{% endif %}</td>
</tr>
{% endfor %}
</tbody>

View File

@ -16,6 +16,7 @@
{% include "agenda_culturel/single-event/event-single-inc.html" with event=event filter=filter %}
<aside>
<!-- TODO: en même temps -->
<article>
<head>
<h2>À la même date</h2>
@ -29,8 +30,23 @@
{% endfor %}
</ul>
</nav>
</article>
{% if event.possibly_duplicated %}
<article>
<head>
<h2>Possiblement dupliqués</h2>
</head>
<ul>
{% for e in event.get_possibly_duplicated %}
<li>
<a href="{{ e.get_absolute_url }}">{{ e }}</a>
</li>
{% endfor %}
</ul>
</article>
{% endif %}
</aside>

View File

@ -11,7 +11,7 @@ from django.http import HttpResponseRedirect
from django.urls import reverse
import urllib
from .forms import EventSubmissionForm, EventForm
from .forms import EventSubmissionForm, EventForm, BatchImportationForm
from .models import Event, Category, StaticContent, ContactMessage, BatchImportation
from django.utils import timezone
@ -484,21 +484,22 @@ def imports(request):
return render(request, 'agenda_culturel/imports.html', {'paginator_filter': response} )
class BatchImportationCreateView(SuccessMessageMixin, LoginRequiredMixin, CreateView):
model = BatchImportation
fields = ['source', 'browsable_url']
success_url = reverse_lazy('imports')
success_message = _('The import has been run successfully.')
form_class = BatchImportationForm
def form_valid(self, form):
# run import
if "json" in form.data and form.data["json"] is not None and form.data["json"].strip() != "":
result = import_events_from_json.delay(form.data["json"])
else:
result = import_events_from_url.delay(self.object.source, self.object.browsable_url)
result = import_events_from_url.delay(form.data["source"], form.data["browsable_url"])
# update the object with celery_id
form.instance.celery_id = result.id