From c5db83cf870e429e9b22c025d52552b1b04bc6a3 Mon Sep 17 00:00:00 2001 From: Jean-Marie Favreau Date: Sat, 3 Feb 2024 18:57:39 +0100 Subject: [PATCH] L'import facebook partage maintenant son code avec les autres imports Fix #80 --- experimentations/.gitignore | 3 + experimentations/get_facebook_event.py | 179 +++--------------- src/agenda_culturel/celery.py | 10 +- .../{importation.py => db_importer.py} | 2 +- .../import_tasks/downloader.py | 14 +- src/agenda_culturel/import_tasks/extractor.py | 29 ++- .../extractor_facebook.py} | 167 +++++----------- .../import_tasks/extractor_ical.py | 3 + src/agenda_culturel/import_tasks/importer.py | 28 ++- src/agenda_culturel/static/style.scss | 6 + .../templates/agenda_culturel/event_form.html | 45 ++++- src/agenda_culturel/views.py | 24 ++- 12 files changed, 209 insertions(+), 301 deletions(-) create mode 100644 experimentations/.gitignore rename src/agenda_culturel/{importation.py => db_importer.py} (99%) rename src/agenda_culturel/{extractors.py => import_tasks/extractor_facebook.py} (58%) diff --git a/experimentations/.gitignore b/experimentations/.gitignore new file mode 100644 index 0000000..0c94727 --- /dev/null +++ b/experimentations/.gitignore @@ -0,0 +1,3 @@ +*.json +*.html +*.ical diff --git a/experimentations/get_facebook_event.py b/experimentations/get_facebook_event.py index 19defd6..45d2b5b 100755 --- a/experimentations/get_facebook_event.py +++ b/experimentations/get_facebook_event.py @@ -1,171 +1,40 @@ #!/usr/bin/python3 # coding: utf-8 -import requests -import hashlib import os -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.chrome.options import Options - - -from bs4 import BeautifulSoup - import json +import sys -class SimpleEvent: +# getting the name of the directory +# where the this file is present. +current = os.path.dirname(os.path.realpath(__file__)) + +# Getting the parent directory name +# where the current directory is present. +parent = os.path.dirname(current) + +# adding the parent directory to +# the sys.path. +sys.path.append(parent) - def __init__(self, data): - self.elements = {} - - for key in ["id", "start_timestamp", "end_timestamp"]: - self.elements[key] = data[key] if key in data else None - - if "parent_event" in data: - self.parent = SimpleEvent(data["parent_event"]) +from src.agenda_culturel.import_tasks.downloader import * +from src.agenda_culturel.import_tasks.extractor import * +from src.agenda_culturel.import_tasks.importer import * +from src.agenda_culturel.import_tasks.extractor_facebook import * -class Event: - - name = "event" - keys = [ - ["start_time_formatted", 'start_timestamp', - 'is_past', - "name", - "price_info", - "cover_media_renderer", - "event_creator", - "id", - "day_time_sentence", - "event_place", - "comet_neighboring_siblings"], - ["event_description"], - ["start_timestamp", "end_timestamp"] - ] - rules = { - "event_description": { "description": ["text"]}, - "cover_media_renderer": {"image_alt": ["cover_photo", "photo", "accessibility_caption"], "image": ["cover_photo", "photo", "full_image", "uri"]}, - "event_creator": { "event_creator_name": ["name"], "event_creator_url": ["url"] }, - "event_place": {"event_place_name": ["name"] } - } - - def __init__(self, i, event): - self.fragments = {} - self.elements = {} - self.neighbor_events = None - self.possible_end_timestamp = [] - self.add_fragment(i, event) - - def add_fragment(self, i, event): - self.fragments[i] = event - - if Event.keys[i] == ["start_timestamp", "end_timestamp"]: - self.get_possible_end_timestamp(i, event) - else: - for k in Event.keys[i]: - if k == "comet_neighboring_siblings": - self.get_neighbor_events(event[k]) - elif k in Event.rules: - for nk, rule in Event.rules[k].items(): - c = event[k] - for ki in rule: - c = c[ki] - self.elements[nk] = c - else: - self.elements[k] = event[k] - - def get_possible_end_timestamp(self, i, data): - self.possible_end_timestamp.append(dict((k, data[k]) for k in Event.keys[i])) - - def get_neighbor_events(self, data): - self.neighbor_events = [SimpleEvent(d) for d in data] - - def __str__(self): - return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events]) - - def consolidate_current_event(self): - if self.neighbor_events is not None and "id" in self.elements and "end_timestamp" not in self.elements: - id = self.elements["id"] - for ne in self.neighbor_events: - if ne.elements["id"] == id: - self.elements["end_timestamp"] = ne.elements["end_timestamp"] - - if "end_timestamp" not in self.elements and len(self.possible_end_timestamp) != 0: - for s in self.possible_end_timestamp: - if s["start_timestamp"] == self.elements["start_timestamp"]: - self.elements["end_timestamp"] = s["end_timestamp"] - break - - def find_event_fragment_in_array(array, event, first = True): - if isinstance(array, dict): - - seen = False - for i, ks in enumerate(Event.keys): - if len(ks) == len([k for k in ks if k in array]): - seen = True - if event is None: - event = Event(i, array) - else: - event.add_fragment(i, array) - # only consider the first of Event.keys - break - if not seen: - for k in array: - event = Event.find_event_fragment_in_array(array[k], event, False) - elif isinstance(array, list): - for e in array: - event = Event.find_event_fragment_in_array(e, event, False) - - if event is not None and first: - event.consolidate_current_event() - return event -#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315" -#url="https://www.facebook.com/events/2294199997432316/2294200007432315/" -#url="https://www.facebook.com/events/635247792092358/" -url="https://www.facebook.com/events/872781744074648" -url="https://www.facebook.com/events/1432798543943663?" -#url_cal = "https://www.facebook.com/events/ical/export/?eid=993406668581410" -#url="https://jmtrivial.info" -cachedir = "cache" -result = hashlib.md5(url.encode()) -hash = result.hexdigest() +if __name__ == "__main__": -filename = os.path.join(cachedir, hash + ".html") + u2e = URL2Events(ChromiumHeadlessDownloader(), FacebookEventExtractor(single_event=True)) + url="https://www.facebook.com/events/872781744074648" -if os.path.isfile(filename): - # print("Use cache") - with open(filename) as f: - doc = "\n".join(f.readlines()) -else: - print("Download page") + events = u2e.process(url, cache = "fb.html", published = True) - options = Options() - options.add_argument("--headless=new") - service = Service("/usr/bin/chromedriver") - - driver = webdriver.Chrome(service=service, options=options) - driver.get(url) - doc = driver.page_source - driver.quit() - - dir = os.path.dirname(filename) - if not os.path.exists(dir): - os.makedirs(dir) - with open(filename, "w") as text_file: - text_file.write(doc) - - -soup = BeautifulSoup(doc) - -event = None -for json_script in soup.find_all('script', type="application/json"): - json_txt = json_script.get_text() - json_struct = json.loads(json_txt) - - event = Event.find_event_fragment_in_array(json_struct, event) - -print(event) + exportfile = "event-facebook.json" + print("Saving events to file {}".format(exportfile)) + with open(exportfile, "w") as f: + json.dump(events, f, indent=4, default=str) diff --git a/src/agenda_culturel/celery.py b/src/agenda_culturel/celery.py index 07b635f..1b8958c 100644 --- a/src/agenda_culturel/celery.py +++ b/src/agenda_culturel/celery.py @@ -5,8 +5,6 @@ from celery import Celery from celery.schedules import crontab from celery.utils.log import get_task_logger -from .extractors import ExtractorAllURLs - from .import_tasks.downloader import * from .import_tasks.extractor import * from .import_tasks.importer import * @@ -53,7 +51,7 @@ def close_import_task(taskid, success, error_message, importer): @app.task(bind=True) def import_events_from_json(self, json): from agenda_culturel.models import Event, BatchImportation - from .importation import EventsImporter + from .db_importer import DBImporterEvents # create a batch importation importation = BatchImportation(celery_id=self.request.id) @@ -63,7 +61,7 @@ def import_events_from_json(self, json): logger.info("Import events from json: {}".format(self.request.id)) - importer = EventsImporter(self.request.id) + importer = DBImporterEvents(self.request.id) #try: success, error_message = importer.import_events(json) @@ -78,7 +76,7 @@ def import_events_from_json(self, json): @app.task(bind=True) def run_recurrent_import(self, pk): from agenda_culturel.models import RecurrentImport, BatchImportation - from .importation import EventsImporter + from .db_importer import DBImporterEvents from django.shortcuts import get_object_or_404 logger.info("Run recurrent import: {}".format(self.request.id)) @@ -92,7 +90,7 @@ def run_recurrent_import(self, pk): importation.save() # create an importer - importer = EventsImporter(self.request.id) + importer = DBImporterEvents(self.request.id) # prepare downloading and extracting processes downloader = SimpleDownloader() if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE else ChromiumHeadlessDownloader() diff --git a/src/agenda_culturel/importation.py b/src/agenda_culturel/db_importer.py similarity index 99% rename from src/agenda_culturel/importation.py rename to src/agenda_culturel/db_importer.py index bf29e0d..fd43a82 100644 --- a/src/agenda_culturel/importation.py +++ b/src/agenda_culturel/db_importer.py @@ -7,7 +7,7 @@ import logging logger = logging.getLogger(__name__) -class EventsImporter: +class DBImporterEvents: def __init__(self, celery_id): self.celery_id = celery_id diff --git a/src/agenda_culturel/import_tasks/downloader.py b/src/agenda_culturel/import_tasks/downloader.py index ef39f01..751eba9 100644 --- a/src/agenda_culturel/import_tasks/downloader.py +++ b/src/agenda_culturel/import_tasks/downloader.py @@ -37,14 +37,18 @@ class ChromiumHeadlessDownloader(Downloader): def __init__(self): super().__init__() - options = Options() - options.add_argument("--headless=new") - service = Service("/usr/bin/chromedriver") - self.driver = webdriver.Chrome(service=service, options=options) + self.options = Options() + self.options.add_argument("--headless=new") + self.options.add_argument("--disable-dev-shm-usage") + self.options.add_argument("--no-sandbox") + self.service = Service("/usr/bin/chromedriver") def download(self, url): print("Download {}".format(url)) + self.driver = webdriver.Chrome(service=self.service, options=self.options) self.driver.get(url) - return driver.page_source + doc = self.driver.page_source + self.driver.quit() + return doc diff --git a/src/agenda_culturel/import_tasks/extractor.py b/src/agenda_culturel/import_tasks/extractor.py index 768b208..e1530c0 100644 --- a/src/agenda_culturel/import_tasks/extractor.py +++ b/src/agenda_culturel/import_tasks/extractor.py @@ -13,6 +13,10 @@ class Extractor(ABC): def extract(self, content, url, url_human = None): pass + @abstractmethod + def clean_url(url): + pass + def set_header(self, url): self.header["url"] = url self.header["date"] = datetime.now() @@ -20,7 +24,7 @@ class Extractor(ABC): def clear_events(self): self.events = [] - def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False): + def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None): if title is None: print("ERROR: cannot import an event without name") return @@ -36,8 +40,11 @@ class Extractor(ABC): "location": location, "description": description, "tags": tags, - "published": published + "published": published, + "image": image, + "image_alt": image_alt } + # TODO: pourquoi url_human et non reference_url if url_human is not None: event["url_human"] = url_human if start_time is not None: @@ -60,3 +67,21 @@ class Extractor(ABC): def get_structure(self): return { "header": self.header, "events": self.events} + + def clean_url(url): + from .extractor_ical import ICALExtractor + from .extractor_facebook import FacebookEventExtractor + + result = url + for e in [ICALExtractor, FacebookEventExtractor]: + result = e.clean_url(result) + return result + + def get_default_extractors(single_event=False): + from .extractor_ical import ICALExtractor + from .extractor_facebook import FacebookEventExtractor + + if single_event: + return [FacebookEventExtractor(single_event=True)] + else: + return [ICALExtractor(), FacebookEventExtractor(single_event=False)] \ No newline at end of file diff --git a/src/agenda_culturel/extractors.py b/src/agenda_culturel/import_tasks/extractor_facebook.py similarity index 58% rename from src/agenda_culturel/extractors.py rename to src/agenda_culturel/import_tasks/extractor_facebook.py index 76dd75e..43913ba 100644 --- a/src/agenda_culturel/extractors.py +++ b/src/agenda_culturel/import_tasks/extractor_facebook.py @@ -1,65 +1,18 @@ -from abc import ABC, abstractmethod +import icalendar +import warnings -from django.db import models - -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.chrome.options import Options - -import urllib.request -from tempfile import NamedTemporaryFile -from urllib.parse import urlparse -import os - -from bs4 import BeautifulSoup - -import json from datetime import datetime, date +from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning +from urllib.parse import urlparse + +from .extractor import * +import json import logging logger = logging.getLogger(__name__) -class Extractor: - - name = None - - @abstractmethod - def is_known_url(url): - pass - - @abstractmethod - def extract(url): - pass - - @abstractmethod - def clean_url(url): - pass - - def download(url): - try: - options = Options() - options.add_argument("--headless=new") - options.add_argument("--disable-dev-shm-usage") - options.add_argument("--no-sandbox") - service = Service("/usr/bin/chromedriver") - - driver = webdriver.Chrome(service=service, options=options) - driver.get(url) - doc = driver.page_source - driver.quit() - return doc - except Exception as e: - logger.error(e) - return None - - - - - -class ExtractorFacebook(Extractor): - - name = "Facebook" +class FacebookEventExtractor(Extractor): class SimpleFacebookEvent: @@ -70,7 +23,7 @@ class ExtractorFacebook(Extractor): self.elements[key] = data[key] if key in data else None if "parent_event" in data: - self.parent = ExtractorFacebook.SimpleFacebookEvent(data["parent_event"]) + self.parent = FacebookEventExtractor.SimpleFacebookEvent(data["parent_event"]) class FacebookEvent: @@ -119,14 +72,14 @@ class ExtractorFacebook(Extractor): def add_fragment(self, i, event): self.fragments[i] = event - if ExtractorFacebook.FacebookEvent.keys[i] == ["start_timestamp", "end_timestamp"]: + if FacebookEventExtractor.FacebookEvent.keys[i] == ["start_timestamp", "end_timestamp"]: self.get_possible_end_timestamp(i, event) else: - for k in ExtractorFacebook.FacebookEvent.keys[i]: + for k in FacebookEventExtractor.FacebookEvent.keys[i]: if k == "comet_neighboring_siblings": self.get_neighbor_events(event[k]) - elif k in ExtractorFacebook.FacebookEvent.rules: - for nk, rule in ExtractorFacebook.FacebookEvent.rules[k].items(): + elif k in FacebookEventExtractor.FacebookEvent.rules: + for nk, rule in FacebookEventExtractor.FacebookEvent.rules[k].items(): error = False c = event[k] for ki in rule: @@ -141,11 +94,11 @@ class ExtractorFacebook(Extractor): def get_possible_end_timestamp(self, i, data): - self.possible_end_timestamp.append(dict((k, data[k]) for k in ExtractorFacebook.FacebookEvent.keys[i])) + self.possible_end_timestamp.append(dict((k, data[k]) for k in FacebookEventExtractor.FacebookEvent.keys[i])) def get_neighbor_events(self, data): - self.neighbor_events = [ExtractorFacebook.SimpleFacebookEvent(d) for d in data] + self.neighbor_events = [FacebookEventExtractor.SimpleFacebookEvent(d) for d in data] def __str__(self): return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events]) @@ -168,21 +121,21 @@ class ExtractorFacebook(Extractor): if isinstance(array, dict): seen = False - for i, ks in enumerate(ExtractorFacebook.FacebookEvent.keys): + for i, ks in enumerate(FacebookEventExtractor.FacebookEvent.keys): if len(ks) == len([k for k in ks if k in array]): seen = True if event is None: - event = ExtractorFacebook.FacebookEvent(i, array) + event = FacebookEventExtractor.FacebookEvent(i, array) else: event.add_fragment(i, array) # only consider the first of FacebookEvent.keys break if not seen: for k in array: - event = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(array[k], event, False) + event = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(array[k], event, False) elif isinstance(array, list): for e in array: - event = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(e, event, False) + event = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(e, event, False) if event is not None and first: event.consolidate_current_event() @@ -190,28 +143,33 @@ class ExtractorFacebook(Extractor): def build_event(self, url): - from .models import Event - image = self.get_element("image") + return { + "title": self.get_element("name"), + "category": None, + "start_day": self.get_element_date("start_timestamp"), + "location": self.get_element("event_place_name"), + "description": self.get_element("description"), + "tags": [], + "uuid": url, + "url_human": url, + "start_time": self.get_element_time("start_timestamp"), + "end_day": self.get_element_date("end_timestamp"), + "end_time": self.get_element_time("end_timestamp"), + "image": self.get_element("image"), + "image_alt": self.get_element("image"), + } - return Event(title=self.get_element("name"), - status=Event.STATUS.DRAFT, - start_day=self.get_element_date("start_timestamp"), - start_time=self.get_element_time("start_timestamp"), - end_day=self.get_element_date("end_timestamp"), - end_time=self.get_element_time("end_timestamp"), - location=self.get_element("event_place_name"), - description=self.get_element("description"), - image=self.get_element("image"), - image_alt=self.get_element("image_alt"), - uuids=[url], - reference_urls=[url]) + + def __init__(self, single_event=False): + self.single_event = single_event + super().__init__() def clean_url(url): - if ExtractorFacebook.is_known_url(url): + if FacebookEventExtractor.is_known_url(url): u = urlparse(url) return "https://www.facebook.com" + u.path else: @@ -222,46 +180,23 @@ class ExtractorFacebook(Extractor): return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"] - def process_page(txt, url): + def extract(self, content, url, url_human = None, default_values = None, published = False): + # NOTE: this method does not use url_human = None and default_values = None + # get step by step all information from the content fevent = None - soup = BeautifulSoup(txt, "html.parser") + soup = BeautifulSoup(content, "html.parser") for json_script in soup.find_all('script', type="application/json"): json_txt = json_script.get_text() json_struct = json.loads(json_txt) - fevent = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(json_struct, fevent) + fevent = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(json_struct, fevent) if fevent is not None: - logger.info("Facebook event: " + str(fevent)) - result = fevent.build_event(url) - return result + self.set_header(url) + event = fevent.build_event(url) + logger.warning("published: " + str(published)) + event["published"] = published + self.add_event(**event) + return self.get_structure() - return None - - -class ExtractorAllURLs: - - extractors = [ExtractorFacebook] - - def clean_url(url): - result = url - for e in ExtractorAllURLs.extractors: - result = e.clean_url(result) - return result - - def extract(url): - logger.info("Run extraction") - - txt = Extractor.download(url) - if txt is None: - logger.info("Cannot download url") - return None - - for e in ExtractorAllURLs.extractors: - result = e.process_page(txt, url) - if result is not None: - return result - else: - logger.info("Not a " + e.name + " link") - - return None + return None \ No newline at end of file diff --git a/src/agenda_culturel/import_tasks/extractor_ical.py b/src/agenda_culturel/import_tasks/extractor_ical.py index 7d812c3..b83572c 100644 --- a/src/agenda_culturel/import_tasks/extractor_ical.py +++ b/src/agenda_culturel/import_tasks/extractor_ical.py @@ -39,6 +39,9 @@ class ICALExtractor(Extractor): return day, time + def clean_url(url): + return url + def extract(self, content, url, url_human = None, default_values = None, published = False): warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) diff --git a/src/agenda_culturel/import_tasks/importer.py b/src/agenda_culturel/import_tasks/importer.py index 44b57a9..68d5b2b 100644 --- a/src/agenda_culturel/import_tasks/importer.py +++ b/src/agenda_culturel/import_tasks/importer.py @@ -6,13 +6,13 @@ from .extractor import * class URL2Events: - def __init__(self, downloader, extractor): + def __init__(self, downloader = SimpleDownloader(), extractor = None, single_event=False): self.downloader = downloader self.extractor = extractor + self.single_event = single_event - def process(self, url, url_human = None, cache = None, default_values = None, published = False): - + def get_content(self, url, cache = None): if cache and os.path.exists(cache): print("Loading cache ({})".format(cache)) with open(cache) as f: @@ -27,5 +27,25 @@ class URL2Events: os.makedirs(dir) with open(cache, "w") as text_file: text_file.write(content) + return content + + + def process(self, url, url_human = None, cache = None, default_values = None, published = False): + content = self.get_content(url, cache) + + if content is None: + return None + + if self.extractor is not None: + return self.extractor.extract(content, url, url_human, default_values, published) + else: + # if the extractor is not defined, use a list of default extractors + for e in Extractor.get_default_extractors(self.single_event): + #try: + events = e.extract(content, url, url_human, default_values, published) + if events is not None: + return events + #except: + # continue + return None - return self.extractor.extract(content, url, url_human, default_values, published) diff --git a/src/agenda_culturel/static/style.scss b/src/agenda_culturel/static/style.scss index cc97c3d..e577ae3 100644 --- a/src/agenda_culturel/static/style.scss +++ b/src/agenda_culturel/static/style.scss @@ -345,6 +345,9 @@ article#filters { .helptext, .subentry-search, .remarque { font-size: 80%; margin-top: -0.7em; + ul { + font-size: 100%; + } } .django-ckeditor-widget { @@ -648,6 +651,9 @@ aside nav a.badge { /* mise en forme pour les récurrences */ .container-fluid article form p .recurrence-widget { + @extend article; + width: 100%; + border: 0; .header a, .add-button { @extend [role="button"]; diff --git a/src/agenda_culturel/templates/agenda_culturel/event_form.html b/src/agenda_culturel/templates/agenda_culturel/event_form.html index 7aa343c..2ca5b2c 100644 --- a/src/agenda_culturel/templates/agenda_culturel/event_form.html +++ b/src/agenda_culturel/templates/agenda_culturel/event_form.html @@ -27,11 +27,14 @@ {% load static_content_extra %} -{% if object %} -

Édition de l'événement {{ object.title }} ({{ object.start_day }})

-{% else %} -

Édition de l'événement importé

-{% endif %} +
+
+ {% if object %} +

Édition de l'événement {{ object.title }} ({{ object.start_day }})

+ {% else %} +

Édition de l'événement importé

+ {% endif %} +
{% csrf_token %} @@ -42,5 +45,37 @@
+{% if object %} + +{% endif %} +
{% endblock %} \ No newline at end of file diff --git a/src/agenda_culturel/views.py b/src/agenda_culturel/views.py index 79ca65b..14a28c3 100644 --- a/src/agenda_culturel/views.py +++ b/src/agenda_culturel/views.py @@ -30,7 +30,10 @@ from django.contrib import messages from django.contrib.messages.views import SuccessMessageMixin from .calendar import CalendarMonth, CalendarWeek, CalendarDay -from .extractors import ExtractorAllURLs + +from .import_tasks.importer import URL2Events +from .import_tasks.extractor import Extractor +from .import_tasks.downloader import ChromiumHeadlessDownloader from .celery import app as celery_app, import_events_from_json, run_recurrent_import @@ -262,7 +265,7 @@ def import_from_url(request): logger = logging.getLogger(__name__) if request.method == 'POST' and "title" in request.POST: - form = EventForm(request.POST) + form = EventForm(request.POST, is_authenticated=request.user.is_authenticated) if form.is_valid(): new_event = form.save() if request.user.is_authenticated: @@ -284,25 +287,32 @@ def import_from_url(request): form_event = EventForm(initial=initial) if request.method == 'POST': - form = EventSubmissionForm(request.POST) + form = EventSubmissionForm(request.POST) if form.is_valid(): cd = form.cleaned_data url = cd.get('url') - url = ExtractorAllURLs.clean_url(url) + url = Extractor.clean_url(url) existing = Event.objects.filter(uuids__contains=[url]) if len(existing) == 0: - event = ExtractorAllURLs.extract(url) + event = None + + u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True) + events_structure = u2e.process(url, published=request.user.is_authenticated) + if events_structure is not None and "events" in events_structure and len(events_structure["events"]) > 0: + event = Event.from_structure(events_structure["events"][0], events_structure["header"]["url"]) + # TODO: use celery to import the other events + if event != None: - form = EventForm(instance=event) + form = EventForm(instance=event, is_authenticated=request.user.is_authenticated) messages.success(request, _("The event has been successfully extracted, and you can now submit it after modifying it if necessary.")) return render(request, 'agenda_culturel/event_form.html', context={'form': form }) else: - form = EventForm(initial={'reference_urls': [url]}) + form = EventForm(initial={'reference_urls': [url]}, is_authenticated=request.user.is_authenticated) messages.error(request, _("Unable to extract an event from the proposed URL. Please use the form below to submit the event.")) return render(request, 'agenda_culturel/import.html', context={'form': form, 'form_event': form_event}) else: