Première intégration d'un événement FB

This commit is contained in:
Jean-Marie Favreau 2023-10-18 19:53:26 +02:00
parent d69ed7f3d8
commit 0af45f6d23
8 changed files with 61 additions and 32 deletions

View File

@ -5,7 +5,7 @@ WORKDIR /usr/src/app
RUN --mount=type=cache,target=/var/cache/apt \ RUN --mount=type=cache,target=/var/cache/apt \
apt-get update && \ apt-get update && \
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver \ apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*

View File

@ -3,4 +3,4 @@
set -o errexit set -o errexit
set -o nounset set -o nounset
celery -A "$APP_NAME" worker -l info python3 /usr/local/lib/python3.11/site-packages/watchdog/watchmedo.py auto-restart -d agenda_culturel -p '*.py' --recursive -- celery -A "$APP_NAME" worker -l info

View File

@ -45,6 +45,7 @@ services:
volumes: volumes:
- redis_data:/data - redis_data:/data
celery-worker: &celery-worker celery-worker: &celery-worker
container_name: "${APP_NAME}-celery-worker" container_name: "${APP_NAME}-celery-worker"
build: build:

View File

@ -6,6 +6,7 @@ from celery.utils.log import get_task_logger
from .extractors import ExtractorAllURLs from .extractors import ExtractorAllURLs
# Set the default Django settings module for the 'celery' program. # Set the default Django settings module for the 'celery' program.
APP_ENV = os.getenv("APP_ENV", "dev") APP_ENV = os.getenv("APP_ENV", "dev")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"agenda_culturel.settings.{APP_ENV}") os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"agenda_culturel.settings.{APP_ENV}")
@ -27,15 +28,25 @@ app.autodiscover_tasks()
@app.task(bind=True) @app.task(bind=True)
def create_event_from_submission(self, url): def create_event_from_submission(self, url):
from agenda_culturel.models import Event
logger.info(f"{url=}") logger.info(f"{url=}")
try:
logger.info("About to create event from submission") if len(Event.objects.filter(reference_urls__contains=[url])) != 0:
events = ExtractorAllURLs.extract(url) logger.info("Already known url: ", url)
# TODO else:
except BadHeaderError: try:
logger.info("BadHeaderError") logger.info("About to create event from submission")
except Exception as e: events = ExtractorAllURLs.extract(url)
logger.error(e)
if events != None:
for e in events:
e.save()
except BadHeaderError:
logger.info("BadHeaderError")
except Exception as e:
logger.error(e)
app.conf.timezone = "Europe/Paris" app.conf.timezone = "Europe/Paris"

View File

@ -1,5 +1,5 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
#from .models import Event
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
@ -8,6 +8,8 @@ from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import json import json
from datetime import datetime
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
@ -67,21 +69,27 @@ class ExtractorFacebook(Extractor):
return v return v
return None return None
def extract(url):
txt = Extractor.download(url) def build_event(self, url):
if txt is None: from .models import Event
logger.error("Cannot download " + url) # TODO
return None return Event(title=self.data["name"],
else: status=Event.STATUS.DRAFT,
soup = BeautifulSoup(txt, "html.parser") start_day=datetime.fromtimestamp(self.data["start_timestamp"]),
for json_script in soup.find_all('script', type="application/json"): reference_urls=[url])
json_txt = json_script.get_text()
json_struct = json.loads(json_txt) def process_page(txt, url):
fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
if fevent != None: soup = BeautifulSoup(txt, "html.parser")
logger.info(str(fevent)) for json_script in soup.find_all('script', type="application/json"):
result = "TODO" json_txt = json_script.get_text()
return result json_struct = json.loads(json_txt)
fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
if fevent != None:
logger.info(str(fevent.data))
result = fevent.build_event(url)
return [result]
return None return None
@ -92,11 +100,18 @@ class ExtractorAllURLs:
def extract(url): def extract(url):
logger.info("Run extraction") logger.info("Run extraction")
result = ExtractorFacebook.extract(url) txt = Extractor.download(url)
if txt is None:
logger.info("Cannot download url")
return None
if result is None: result = ExtractorFacebook.process_page(txt, url)
if result is not None:
return result
else:
logger.info("Not a Facebook link") logger.info("Not a Facebook link")
# add here other extrators
pass
return result # TODO: add here other extrators
return None

View File

@ -1 +1,2 @@
<h1>{{ object.title }}</h1> <h1>{{ object.title }}</h1>
<p>Date&nbsp;: {{ object.start_day }}</p>

View File

@ -10,7 +10,7 @@ from .views import *
urlpatterns = [ urlpatterns = [
path("", EventListView.as_view(), name="home"), path("", EventListView.as_view(), name="home"),
re_path(r'^(?P<mode>' + '|'.join([dm.value for dm in DisplayModes]) + ')/$', view_interval, name='view_interval'), re_path(r'^(?P<mode>' + '|'.join([dm.value for dm in DisplayModes]) + ')/$', view_interval, name='view_interval'),
path("event/<pk>-<extra>", EventDetailView.as_view(), name="view_event"), path("event/<int:pk>-<extra>", EventDetailView.as_view(), name="view_event"),
path("proposer", EventSubmissionFormView.as_view(), name="event_submission_form"), path("proposer", EventSubmissionFormView.as_view(), name="event_submission_form"),
path("admin/", admin.site.urls), path("admin/", admin.site.urls),
path("test_app/", include("test_app.urls")), path("test_app/", include("test_app.urls")),

View File

@ -22,3 +22,4 @@ redis==4.5.5
whitenoise==6.4.0 whitenoise==6.4.0
selenium==4.14.0 selenium==4.14.0
BeautifulSoup4==4.12.2 BeautifulSoup4==4.12.2
watchdog==3.0.0