L'import facebook partage maintenant son code avec les autres imports

Fix #80
This commit is contained in:
Jean-Marie Favreau 2024-02-03 18:57:39 +01:00
parent 3ebc53995b
commit c5db83cf87
12 changed files with 209 additions and 301 deletions

3
experimentations/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.json
*.html
*.ical

View File

@ -1,171 +1,40 @@
#!/usr/bin/python3 #!/usr/bin/python3
# coding: utf-8 # coding: utf-8
import requests
import hashlib
import os import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import json import json
import sys
class SimpleEvent: # getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
def __init__(self, data): # Getting the parent directory name
self.elements = {} # where the current directory is present.
parent = os.path.dirname(current)
for key in ["id", "start_timestamp", "end_timestamp"]: # adding the parent directory to
self.elements[key] = data[key] if key in data else None # the sys.path.
sys.path.append(parent)
if "parent_event" in data: from src.agenda_culturel.import_tasks.downloader import *
self.parent = SimpleEvent(data["parent_event"]) from src.agenda_culturel.import_tasks.extractor import *
from src.agenda_culturel.import_tasks.importer import *
from src.agenda_culturel.import_tasks.extractor_facebook import *
class Event:
name = "event"
keys = [
["start_time_formatted", 'start_timestamp',
'is_past',
"name",
"price_info",
"cover_media_renderer",
"event_creator",
"id",
"day_time_sentence",
"event_place",
"comet_neighboring_siblings"],
["event_description"],
["start_timestamp", "end_timestamp"]
]
rules = {
"event_description": { "description": ["text"]},
"cover_media_renderer": {"image_alt": ["cover_photo", "photo", "accessibility_caption"], "image": ["cover_photo", "photo", "full_image", "uri"]},
"event_creator": { "event_creator_name": ["name"], "event_creator_url": ["url"] },
"event_place": {"event_place_name": ["name"] }
}
def __init__(self, i, event):
self.fragments = {}
self.elements = {}
self.neighbor_events = None
self.possible_end_timestamp = []
self.add_fragment(i, event)
def add_fragment(self, i, event):
self.fragments[i] = event
if Event.keys[i] == ["start_timestamp", "end_timestamp"]:
self.get_possible_end_timestamp(i, event)
else:
for k in Event.keys[i]:
if k == "comet_neighboring_siblings":
self.get_neighbor_events(event[k])
elif k in Event.rules:
for nk, rule in Event.rules[k].items():
c = event[k]
for ki in rule:
c = c[ki]
self.elements[nk] = c
else:
self.elements[k] = event[k]
def get_possible_end_timestamp(self, i, data):
self.possible_end_timestamp.append(dict((k, data[k]) for k in Event.keys[i]))
def get_neighbor_events(self, data):
self.neighbor_events = [SimpleEvent(d) for d in data]
def __str__(self):
return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events])
def consolidate_current_event(self):
if self.neighbor_events is not None and "id" in self.elements and "end_timestamp" not in self.elements:
id = self.elements["id"]
for ne in self.neighbor_events:
if ne.elements["id"] == id:
self.elements["end_timestamp"] = ne.elements["end_timestamp"]
if "end_timestamp" not in self.elements and len(self.possible_end_timestamp) != 0:
for s in self.possible_end_timestamp:
if s["start_timestamp"] == self.elements["start_timestamp"]:
self.elements["end_timestamp"] = s["end_timestamp"]
break
def find_event_fragment_in_array(array, event, first = True):
if isinstance(array, dict):
seen = False
for i, ks in enumerate(Event.keys):
if len(ks) == len([k for k in ks if k in array]):
seen = True
if event is None:
event = Event(i, array)
else:
event.add_fragment(i, array)
# only consider the first of Event.keys
break
if not seen:
for k in array:
event = Event.find_event_fragment_in_array(array[k], event, False)
elif isinstance(array, list):
for e in array:
event = Event.find_event_fragment_in_array(e, event, False)
if event is not None and first:
event.consolidate_current_event()
return event
#url="https://www.facebook.com/events/ical/export/?eid=2294200007432315"
#url="https://www.facebook.com/events/2294199997432316/2294200007432315/" if __name__ == "__main__":
#url="https://www.facebook.com/events/635247792092358/"
u2e = URL2Events(ChromiumHeadlessDownloader(), FacebookEventExtractor(single_event=True))
url="https://www.facebook.com/events/872781744074648" url="https://www.facebook.com/events/872781744074648"
url="https://www.facebook.com/events/1432798543943663?"
#url_cal = "https://www.facebook.com/events/ical/export/?eid=993406668581410"
#url="https://jmtrivial.info"
cachedir = "cache" events = u2e.process(url, cache = "fb.html", published = True)
result = hashlib.md5(url.encode())
hash = result.hexdigest()
filename = os.path.join(cachedir, hash + ".html") exportfile = "event-facebook.json"
print("Saving events to file {}".format(exportfile))
if os.path.isfile(filename): with open(exportfile, "w") as f:
# print("Use cache") json.dump(events, f, indent=4, default=str)
with open(filename) as f:
doc = "\n".join(f.readlines())
else:
print("Download page")
options = Options()
options.add_argument("--headless=new")
service = Service("/usr/bin/chromedriver")
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)
doc = driver.page_source
driver.quit()
dir = os.path.dirname(filename)
if not os.path.exists(dir):
os.makedirs(dir)
with open(filename, "w") as text_file:
text_file.write(doc)
soup = BeautifulSoup(doc)
event = None
for json_script in soup.find_all('script', type="application/json"):
json_txt = json_script.get_text()
json_struct = json.loads(json_txt)
event = Event.find_event_fragment_in_array(json_struct, event)
print(event)

View File

@ -5,8 +5,6 @@ from celery import Celery
from celery.schedules import crontab from celery.schedules import crontab
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from .extractors import ExtractorAllURLs
from .import_tasks.downloader import * from .import_tasks.downloader import *
from .import_tasks.extractor import * from .import_tasks.extractor import *
from .import_tasks.importer import * from .import_tasks.importer import *
@ -53,7 +51,7 @@ def close_import_task(taskid, success, error_message, importer):
@app.task(bind=True) @app.task(bind=True)
def import_events_from_json(self, json): def import_events_from_json(self, json):
from agenda_culturel.models import Event, BatchImportation from agenda_culturel.models import Event, BatchImportation
from .importation import EventsImporter from .db_importer import DBImporterEvents
# create a batch importation # create a batch importation
importation = BatchImportation(celery_id=self.request.id) importation = BatchImportation(celery_id=self.request.id)
@ -63,7 +61,7 @@ def import_events_from_json(self, json):
logger.info("Import events from json: {}".format(self.request.id)) logger.info("Import events from json: {}".format(self.request.id))
importer = EventsImporter(self.request.id) importer = DBImporterEvents(self.request.id)
#try: #try:
success, error_message = importer.import_events(json) success, error_message = importer.import_events(json)
@ -78,7 +76,7 @@ def import_events_from_json(self, json):
@app.task(bind=True) @app.task(bind=True)
def run_recurrent_import(self, pk): def run_recurrent_import(self, pk):
from agenda_culturel.models import RecurrentImport, BatchImportation from agenda_culturel.models import RecurrentImport, BatchImportation
from .importation import EventsImporter from .db_importer import DBImporterEvents
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
logger.info("Run recurrent import: {}".format(self.request.id)) logger.info("Run recurrent import: {}".format(self.request.id))
@ -92,7 +90,7 @@ def run_recurrent_import(self, pk):
importation.save() importation.save()
# create an importer # create an importer
importer = EventsImporter(self.request.id) importer = DBImporterEvents(self.request.id)
# prepare downloading and extracting processes # prepare downloading and extracting processes
downloader = SimpleDownloader() if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE else ChromiumHeadlessDownloader() downloader = SimpleDownloader() if rimport.downloader == RecurrentImport.DOWNLOADER.SIMPLE else ChromiumHeadlessDownloader()

View File

@ -7,7 +7,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class EventsImporter: class DBImporterEvents:
def __init__(self, celery_id): def __init__(self, celery_id):
self.celery_id = celery_id self.celery_id = celery_id

View File

@ -37,14 +37,18 @@ class ChromiumHeadlessDownloader(Downloader):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
options = Options() self.options = Options()
options.add_argument("--headless=new") self.options.add_argument("--headless=new")
service = Service("/usr/bin/chromedriver") self.options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(service=service, options=options) self.options.add_argument("--no-sandbox")
self.service = Service("/usr/bin/chromedriver")
def download(self, url): def download(self, url):
print("Download {}".format(url)) print("Download {}".format(url))
self.driver = webdriver.Chrome(service=self.service, options=self.options)
self.driver.get(url) self.driver.get(url)
return driver.page_source doc = self.driver.page_source
self.driver.quit()
return doc

View File

@ -13,6 +13,10 @@ class Extractor(ABC):
def extract(self, content, url, url_human = None): def extract(self, content, url, url_human = None):
pass pass
@abstractmethod
def clean_url(url):
pass
def set_header(self, url): def set_header(self, url):
self.header["url"] = url self.header["url"] = url
self.header["date"] = datetime.now() self.header["date"] = datetime.now()
@ -20,7 +24,7 @@ class Extractor(ABC):
def clear_events(self): def clear_events(self):
self.events = [] self.events = []
def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False): def add_event(self, title, category, start_day, location, description, tags, uuid, recurrences=None, url_human=None, start_time=None, end_day=None, end_time=None, last_modified=None, published=False, image=None, image_alt=None):
if title is None: if title is None:
print("ERROR: cannot import an event without name") print("ERROR: cannot import an event without name")
return return
@ -36,8 +40,11 @@ class Extractor(ABC):
"location": location, "location": location,
"description": description, "description": description,
"tags": tags, "tags": tags,
"published": published "published": published,
"image": image,
"image_alt": image_alt
} }
# TODO: pourquoi url_human et non reference_url
if url_human is not None: if url_human is not None:
event["url_human"] = url_human event["url_human"] = url_human
if start_time is not None: if start_time is not None:
@ -60,3 +67,21 @@ class Extractor(ABC):
def get_structure(self): def get_structure(self):
return { "header": self.header, "events": self.events} return { "header": self.header, "events": self.events}
def clean_url(url):
from .extractor_ical import ICALExtractor
from .extractor_facebook import FacebookEventExtractor
result = url
for e in [ICALExtractor, FacebookEventExtractor]:
result = e.clean_url(result)
return result
def get_default_extractors(single_event=False):
from .extractor_ical import ICALExtractor
from .extractor_facebook import FacebookEventExtractor
if single_event:
return [FacebookEventExtractor(single_event=True)]
else:
return [ICALExtractor(), FacebookEventExtractor(single_event=False)]

View File

@ -1,65 +1,18 @@
from abc import ABC, abstractmethod import icalendar
import warnings
from django.db import models
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import urllib.request
from tempfile import NamedTemporaryFile
from urllib.parse import urlparse
import os
from bs4 import BeautifulSoup
import json
from datetime import datetime, date from datetime import datetime, date
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from urllib.parse import urlparse
from .extractor import *
import json
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Extractor: class FacebookEventExtractor(Extractor):
name = None
@abstractmethod
def is_known_url(url):
pass
@abstractmethod
def extract(url):
pass
@abstractmethod
def clean_url(url):
pass
def download(url):
try:
options = Options()
options.add_argument("--headless=new")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
service = Service("/usr/bin/chromedriver")
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)
doc = driver.page_source
driver.quit()
return doc
except Exception as e:
logger.error(e)
return None
class ExtractorFacebook(Extractor):
name = "Facebook"
class SimpleFacebookEvent: class SimpleFacebookEvent:
@ -70,7 +23,7 @@ class ExtractorFacebook(Extractor):
self.elements[key] = data[key] if key in data else None self.elements[key] = data[key] if key in data else None
if "parent_event" in data: if "parent_event" in data:
self.parent = ExtractorFacebook.SimpleFacebookEvent(data["parent_event"]) self.parent = FacebookEventExtractor.SimpleFacebookEvent(data["parent_event"])
class FacebookEvent: class FacebookEvent:
@ -119,14 +72,14 @@ class ExtractorFacebook(Extractor):
def add_fragment(self, i, event): def add_fragment(self, i, event):
self.fragments[i] = event self.fragments[i] = event
if ExtractorFacebook.FacebookEvent.keys[i] == ["start_timestamp", "end_timestamp"]: if FacebookEventExtractor.FacebookEvent.keys[i] == ["start_timestamp", "end_timestamp"]:
self.get_possible_end_timestamp(i, event) self.get_possible_end_timestamp(i, event)
else: else:
for k in ExtractorFacebook.FacebookEvent.keys[i]: for k in FacebookEventExtractor.FacebookEvent.keys[i]:
if k == "comet_neighboring_siblings": if k == "comet_neighboring_siblings":
self.get_neighbor_events(event[k]) self.get_neighbor_events(event[k])
elif k in ExtractorFacebook.FacebookEvent.rules: elif k in FacebookEventExtractor.FacebookEvent.rules:
for nk, rule in ExtractorFacebook.FacebookEvent.rules[k].items(): for nk, rule in FacebookEventExtractor.FacebookEvent.rules[k].items():
error = False error = False
c = event[k] c = event[k]
for ki in rule: for ki in rule:
@ -141,11 +94,11 @@ class ExtractorFacebook(Extractor):
def get_possible_end_timestamp(self, i, data): def get_possible_end_timestamp(self, i, data):
self.possible_end_timestamp.append(dict((k, data[k]) for k in ExtractorFacebook.FacebookEvent.keys[i])) self.possible_end_timestamp.append(dict((k, data[k]) for k in FacebookEventExtractor.FacebookEvent.keys[i]))
def get_neighbor_events(self, data): def get_neighbor_events(self, data):
self.neighbor_events = [ExtractorFacebook.SimpleFacebookEvent(d) for d in data] self.neighbor_events = [FacebookEventExtractor.SimpleFacebookEvent(d) for d in data]
def __str__(self): def __str__(self):
return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events]) return str(self.elements) + "\n Neighbors: " + ", ".join([ne.elements["id"] for ne in self.neighbor_events])
@ -168,21 +121,21 @@ class ExtractorFacebook(Extractor):
if isinstance(array, dict): if isinstance(array, dict):
seen = False seen = False
for i, ks in enumerate(ExtractorFacebook.FacebookEvent.keys): for i, ks in enumerate(FacebookEventExtractor.FacebookEvent.keys):
if len(ks) == len([k for k in ks if k in array]): if len(ks) == len([k for k in ks if k in array]):
seen = True seen = True
if event is None: if event is None:
event = ExtractorFacebook.FacebookEvent(i, array) event = FacebookEventExtractor.FacebookEvent(i, array)
else: else:
event.add_fragment(i, array) event.add_fragment(i, array)
# only consider the first of FacebookEvent.keys # only consider the first of FacebookEvent.keys
break break
if not seen: if not seen:
for k in array: for k in array:
event = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(array[k], event, False) event = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(array[k], event, False)
elif isinstance(array, list): elif isinstance(array, list):
for e in array: for e in array:
event = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(e, event, False) event = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(e, event, False)
if event is not None and first: if event is not None and first:
event.consolidate_current_event() event.consolidate_current_event()
@ -190,28 +143,33 @@ class ExtractorFacebook(Extractor):
def build_event(self, url): def build_event(self, url):
from .models import Event
image = self.get_element("image") image = self.get_element("image")
return {
"title": self.get_element("name"),
"category": None,
"start_day": self.get_element_date("start_timestamp"),
"location": self.get_element("event_place_name"),
"description": self.get_element("description"),
"tags": [],
"uuid": url,
"url_human": url,
"start_time": self.get_element_time("start_timestamp"),
"end_day": self.get_element_date("end_timestamp"),
"end_time": self.get_element_time("end_timestamp"),
"image": self.get_element("image"),
"image_alt": self.get_element("image"),
}
return Event(title=self.get_element("name"),
status=Event.STATUS.DRAFT, def __init__(self, single_event=False):
start_day=self.get_element_date("start_timestamp"), self.single_event = single_event
start_time=self.get_element_time("start_timestamp"), super().__init__()
end_day=self.get_element_date("end_timestamp"),
end_time=self.get_element_time("end_timestamp"),
location=self.get_element("event_place_name"),
description=self.get_element("description"),
image=self.get_element("image"),
image_alt=self.get_element("image_alt"),
uuids=[url],
reference_urls=[url])
def clean_url(url): def clean_url(url):
if ExtractorFacebook.is_known_url(url): if FacebookEventExtractor.is_known_url(url):
u = urlparse(url) u = urlparse(url)
return "https://www.facebook.com" + u.path return "https://www.facebook.com" + u.path
else: else:
@ -222,46 +180,23 @@ class ExtractorFacebook(Extractor):
return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"] return u.netloc in ["facebook.com", "www.facebook.com", "m.facebook.com"]
def process_page(txt, url): def extract(self, content, url, url_human = None, default_values = None, published = False):
# NOTE: this method does not use url_human = None and default_values = None
# get step by step all information from the content
fevent = None fevent = None
soup = BeautifulSoup(txt, "html.parser") soup = BeautifulSoup(content, "html.parser")
for json_script in soup.find_all('script', type="application/json"): for json_script in soup.find_all('script', type="application/json"):
json_txt = json_script.get_text() json_txt = json_script.get_text()
json_struct = json.loads(json_txt) json_struct = json.loads(json_txt)
fevent = ExtractorFacebook.FacebookEvent.find_event_fragment_in_array(json_struct, fevent) fevent = FacebookEventExtractor.FacebookEvent.find_event_fragment_in_array(json_struct, fevent)
if fevent is not None: if fevent is not None:
logger.info("Facebook event: " + str(fevent)) self.set_header(url)
result = fevent.build_event(url) event = fevent.build_event(url)
return result logger.warning("published: " + str(published))
event["published"] = published
return None self.add_event(**event)
return self.get_structure()
class ExtractorAllURLs:
extractors = [ExtractorFacebook]
def clean_url(url):
result = url
for e in ExtractorAllURLs.extractors:
result = e.clean_url(result)
return result
def extract(url):
logger.info("Run extraction")
txt = Extractor.download(url)
if txt is None:
logger.info("Cannot download url")
return None
for e in ExtractorAllURLs.extractors:
result = e.process_page(txt, url)
if result is not None:
return result
else:
logger.info("Not a " + e.name + " link")
return None return None

View File

@ -39,6 +39,9 @@ class ICALExtractor(Extractor):
return day, time return day, time
def clean_url(url):
return url
def extract(self, content, url, url_human = None, default_values = None, published = False): def extract(self, content, url, url_human = None, default_values = None, published = False):
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)

View File

@ -6,13 +6,13 @@ from .extractor import *
class URL2Events: class URL2Events:
def __init__(self, downloader, extractor): def __init__(self, downloader = SimpleDownloader(), extractor = None, single_event=False):
self.downloader = downloader self.downloader = downloader
self.extractor = extractor self.extractor = extractor
self.single_event = single_event
def process(self, url, url_human = None, cache = None, default_values = None, published = False): def get_content(self, url, cache = None):
if cache and os.path.exists(cache): if cache and os.path.exists(cache):
print("Loading cache ({})".format(cache)) print("Loading cache ({})".format(cache))
with open(cache) as f: with open(cache) as f:
@ -27,5 +27,25 @@ class URL2Events:
os.makedirs(dir) os.makedirs(dir)
with open(cache, "w") as text_file: with open(cache, "w") as text_file:
text_file.write(content) text_file.write(content)
return content
def process(self, url, url_human = None, cache = None, default_values = None, published = False):
content = self.get_content(url, cache)
if content is None:
return None
if self.extractor is not None:
return self.extractor.extract(content, url, url_human, default_values, published) return self.extractor.extract(content, url, url_human, default_values, published)
else:
# if the extractor is not defined, use a list of default extractors
for e in Extractor.get_default_extractors(self.single_event):
#try:
events = e.extract(content, url, url_human, default_values, published)
if events is not None:
return events
#except:
# continue
return None

View File

@ -345,6 +345,9 @@ article#filters {
.helptext, .subentry-search, .remarque { .helptext, .subentry-search, .remarque {
font-size: 80%; font-size: 80%;
margin-top: -0.7em; margin-top: -0.7em;
ul {
font-size: 100%;
}
} }
.django-ckeditor-widget { .django-ckeditor-widget {
@ -648,6 +651,9 @@ aside nav a.badge {
/* mise en forme pour les récurrences */ /* mise en forme pour les récurrences */
.container-fluid article form p .recurrence-widget { .container-fluid article form p .recurrence-widget {
@extend article;
width: 100%;
border: 0;
.header a, .add-button { .header a, .add-button {
@extend [role="button"]; @extend [role="button"];

View File

@ -27,11 +27,14 @@
{% load static_content_extra %} {% load static_content_extra %}
<article>
<header>
{% if object %} {% if object %}
<h1>Édition de l'événement {{ object.title }} ({{ object.start_day }})</h1> <h1>Édition de l'événement {{ object.title }} ({{ object.start_day }})</h1>
{% else %} {% else %}
<h1>Édition de l'événement importé</h1> <h1>Édition de l'événement importé</h1>
{% endif %} {% endif %}
</header>
<div id="container"></div> <div id="container"></div>
<form method="post">{% csrf_token %} <form method="post">{% csrf_token %}
@ -42,5 +45,37 @@
<input type="submit" value="Enregistrer"> <input type="submit" value="Enregistrer">
</div> </div>
</form> </form>
{% if object %}
<footer class="remarque">
Informations complémentaires non éditables&nbsp;:
<ul>
{% if object.created_date %}<li>Création&nbsp;: {{ object.created_date }}</li>{% endif %}
{% if object.modified_date %}<li>Dernière modification&nbsp;: {{ object.modified_date }}</li>{% endif %}
{% if object.imported_date %}<li>Dernière importation&nbsp;: {{ object.imported_date }}</li>{% endif %}
{% if object.uuids %}
{% if object.uuids|length > 0 %}
<li>UUIDs (identifiants uniques d'événements dans les sources)&nbsp;:
<ul>
{% for u in object.uuids %}
<li>{{ u }}</li>
{% endfor %}
</ul></li>
{% endif %}
{% endif %}
{% if object.import_sources %}
{% if object.import_sources|length > 0 %}
<li>Sources d'import&nbsp;:
<ul>
{% for u in object.import_sources %}
<li><a href="{{ u }}">{{ u }}</a></li>
{% endfor %}
</ul>
</li>
{% endif %}
{% endif %}
</ul>
</footer>
{% endif %}
</article>
{% endblock %} {% endblock %}

View File

@ -30,7 +30,10 @@ from django.contrib import messages
from django.contrib.messages.views import SuccessMessageMixin from django.contrib.messages.views import SuccessMessageMixin
from .calendar import CalendarMonth, CalendarWeek, CalendarDay from .calendar import CalendarMonth, CalendarWeek, CalendarDay
from .extractors import ExtractorAllURLs
from .import_tasks.importer import URL2Events
from .import_tasks.extractor import Extractor
from .import_tasks.downloader import ChromiumHeadlessDownloader
from .celery import app as celery_app, import_events_from_json, run_recurrent_import from .celery import app as celery_app, import_events_from_json, run_recurrent_import
@ -262,7 +265,7 @@ def import_from_url(request):
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if request.method == 'POST' and "title" in request.POST: if request.method == 'POST' and "title" in request.POST:
form = EventForm(request.POST) form = EventForm(request.POST, is_authenticated=request.user.is_authenticated)
if form.is_valid(): if form.is_valid():
new_event = form.save() new_event = form.save()
if request.user.is_authenticated: if request.user.is_authenticated:
@ -284,25 +287,32 @@ def import_from_url(request):
form_event = EventForm(initial=initial) form_event = EventForm(initial=initial)
if request.method == 'POST': if request.method == 'POST':
form = EventSubmissionForm(request.POST)
form = EventSubmissionForm(request.POST)
if form.is_valid(): if form.is_valid():
cd = form.cleaned_data cd = form.cleaned_data
url = cd.get('url') url = cd.get('url')
url = ExtractorAllURLs.clean_url(url) url = Extractor.clean_url(url)
existing = Event.objects.filter(uuids__contains=[url]) existing = Event.objects.filter(uuids__contains=[url])
if len(existing) == 0: if len(existing) == 0:
event = ExtractorAllURLs.extract(url) event = None
u2e = URL2Events(ChromiumHeadlessDownloader(), single_event=True)
events_structure = u2e.process(url, published=request.user.is_authenticated)
if events_structure is not None and "events" in events_structure and len(events_structure["events"]) > 0:
event = Event.from_structure(events_structure["events"][0], events_structure["header"]["url"])
# TODO: use celery to import the other events
if event != None: if event != None:
form = EventForm(instance=event) form = EventForm(instance=event, is_authenticated=request.user.is_authenticated)
messages.success(request, _("The event has been successfully extracted, and you can now submit it after modifying it if necessary.")) messages.success(request, _("The event has been successfully extracted, and you can now submit it after modifying it if necessary."))
return render(request, 'agenda_culturel/event_form.html', context={'form': form }) return render(request, 'agenda_culturel/event_form.html', context={'form': form })
else: else:
form = EventForm(initial={'reference_urls': [url]}) form = EventForm(initial={'reference_urls': [url]}, is_authenticated=request.user.is_authenticated)
messages.error(request, _("Unable to extract an event from the proposed URL. Please use the form below to submit the event.")) messages.error(request, _("Unable to extract an event from the proposed URL. Please use the form below to submit the event."))
return render(request, 'agenda_culturel/import.html', context={'form': form, 'form_event': form_event}) return render(request, 'agenda_culturel/import.html', context={'form': form, 'form_event': form_event})
else: else: