Première intégration d'un événement FB
This commit is contained in:
parent
d69ed7f3d8
commit
0af45f6d23
@ -5,7 +5,7 @@ WORKDIR /usr/src/app
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt \
|
||||
apt-get update && \
|
||||
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver \
|
||||
apt-get install --no-install-recommends -y build-essential libpq-dev gettext chromium-driver \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
|
@ -3,4 +3,4 @@
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
celery -A "$APP_NAME" worker -l info
|
||||
python3 /usr/local/lib/python3.11/site-packages/watchdog/watchmedo.py auto-restart -d agenda_culturel -p '*.py' --recursive -- celery -A "$APP_NAME" worker -l info
|
||||
|
@ -45,6 +45,7 @@ services:
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
|
||||
|
||||
celery-worker: &celery-worker
|
||||
container_name: "${APP_NAME}-celery-worker"
|
||||
build:
|
||||
|
@ -6,6 +6,7 @@ from celery.utils.log import get_task_logger
|
||||
|
||||
from .extractors import ExtractorAllURLs
|
||||
|
||||
|
||||
# Set the default Django settings module for the 'celery' program.
|
||||
APP_ENV = os.getenv("APP_ENV", "dev")
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", f"agenda_culturel.settings.{APP_ENV}")
|
||||
@ -27,15 +28,25 @@ app.autodiscover_tasks()
|
||||
|
||||
@app.task(bind=True)
|
||||
def create_event_from_submission(self, url):
|
||||
from agenda_culturel.models import Event
|
||||
|
||||
logger.info(f"{url=}")
|
||||
try:
|
||||
logger.info("About to create event from submission")
|
||||
events = ExtractorAllURLs.extract(url)
|
||||
# TODO
|
||||
except BadHeaderError:
|
||||
logger.info("BadHeaderError")
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
if len(Event.objects.filter(reference_urls__contains=[url])) != 0:
|
||||
logger.info("Already known url: ", url)
|
||||
else:
|
||||
try:
|
||||
logger.info("About to create event from submission")
|
||||
events = ExtractorAllURLs.extract(url)
|
||||
|
||||
if events != None:
|
||||
for e in events:
|
||||
e.save()
|
||||
|
||||
except BadHeaderError:
|
||||
logger.info("BadHeaderError")
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
|
||||
|
||||
app.conf.timezone = "Europe/Paris"
|
||||
|
@ -1,5 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
#from .models import Event
|
||||
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
@ -8,6 +8,8 @@ from selenium.webdriver.chrome.options import Options
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
@ -67,21 +69,27 @@ class ExtractorFacebook(Extractor):
|
||||
return v
|
||||
return None
|
||||
|
||||
def extract(url):
|
||||
txt = Extractor.download(url)
|
||||
if txt is None:
|
||||
logger.error("Cannot download " + url)
|
||||
return None
|
||||
else:
|
||||
soup = BeautifulSoup(txt, "html.parser")
|
||||
for json_script in soup.find_all('script', type="application/json"):
|
||||
json_txt = json_script.get_text()
|
||||
json_struct = json.loads(json_txt)
|
||||
fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
|
||||
if fevent != None:
|
||||
logger.info(str(fevent))
|
||||
result = "TODO"
|
||||
return result
|
||||
|
||||
def build_event(self, url):
|
||||
from .models import Event
|
||||
# TODO
|
||||
return Event(title=self.data["name"],
|
||||
status=Event.STATUS.DRAFT,
|
||||
start_day=datetime.fromtimestamp(self.data["start_timestamp"]),
|
||||
reference_urls=[url])
|
||||
|
||||
def process_page(txt, url):
|
||||
|
||||
soup = BeautifulSoup(txt, "html.parser")
|
||||
for json_script in soup.find_all('script', type="application/json"):
|
||||
json_txt = json_script.get_text()
|
||||
json_struct = json.loads(json_txt)
|
||||
fevent = ExtractorFacebook.FacebookEvent.find_event_in_array(json_struct)
|
||||
if fevent != None:
|
||||
logger.info(str(fevent.data))
|
||||
|
||||
result = fevent.build_event(url)
|
||||
return [result]
|
||||
|
||||
return None
|
||||
|
||||
@ -92,11 +100,18 @@ class ExtractorAllURLs:
|
||||
def extract(url):
|
||||
logger.info("Run extraction")
|
||||
|
||||
result = ExtractorFacebook.extract(url)
|
||||
txt = Extractor.download(url)
|
||||
if txt is None:
|
||||
logger.info("Cannot download url")
|
||||
return None
|
||||
|
||||
if result is None:
|
||||
result = ExtractorFacebook.process_page(txt, url)
|
||||
|
||||
if result is not None:
|
||||
return result
|
||||
else:
|
||||
logger.info("Not a Facebook link")
|
||||
# add here other extrators
|
||||
pass
|
||||
|
||||
return result
|
||||
# TODO: add here other extrators
|
||||
|
||||
return None
|
||||
|
@ -1 +1,2 @@
|
||||
<h1>{{ object.title }}</h1>
|
||||
<p>Date : {{ object.start_day }}</p>
|
@ -10,7 +10,7 @@ from .views import *
|
||||
urlpatterns = [
|
||||
path("", EventListView.as_view(), name="home"),
|
||||
re_path(r'^(?P<mode>' + '|'.join([dm.value for dm in DisplayModes]) + ')/$', view_interval, name='view_interval'),
|
||||
path("event/<pk>-<extra>", EventDetailView.as_view(), name="view_event"),
|
||||
path("event/<int:pk>-<extra>", EventDetailView.as_view(), name="view_event"),
|
||||
path("proposer", EventSubmissionFormView.as_view(), name="event_submission_form"),
|
||||
path("admin/", admin.site.urls),
|
||||
path("test_app/", include("test_app.urls")),
|
||||
|
@ -22,3 +22,4 @@ redis==4.5.5
|
||||
whitenoise==6.4.0
|
||||
selenium==4.14.0
|
||||
BeautifulSoup4==4.12.2
|
||||
watchdog==3.0.0
|
||||
|
Loading…
Reference in New Issue
Block a user