This commit is contained in:
Sirius Nottin 2024-10-12 21:13:48 +02:00
parent 2feb6219f7
commit c1073451fd
No known key found for this signature in database
GPG Key ID: C298AD85801542AE

View File

@ -1,7 +1,4 @@
import icalendar
import warnings
import bbcode
import re
from datetime import datetime, date, timedelta
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
@ -12,6 +9,19 @@ from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
class Timeslot:
def __init__(self, start_time, end_time):
self.start_time = start_time
self.end_time = end_time
def merge(self, hours):
self.start_time = min(self.start_time, hours.start_time)
if not hours.end_time is None:
if not self.end_time is None:
self.end_time = max(self.end_time, hours.end_time)
else:
self.end_time = hours.end_time
class CExtractor(Extractor):
@ -32,6 +42,21 @@ class CExtractor(Extractor):
return (year, month)
def find_hours(text):
text = re.split(r"[ -/=>]+", text)
text = [Extractor.parse_french_time(k) for k in text]
text = [k for k in text if not k is None]
match len(text):
case 0:
return None
case 1:
return Timeslot(text[0], None)
case 2:
return Timeslot(text[0], text[1])
case _:
return None
def is_nickname(text):
return '@' in text
@ -42,7 +67,27 @@ class CExtractor(Extractor):
if word in text:
return True
return False
def find_timeslot(text):
text = re.sub(' +', ' ', text).split(' ')
day_name = text[0]
day_num = text[1]
hours = text[2]
if not Extractor.guess_day_name(day_name):
return None
day_num = [c for c in re.split(r'\D+', day_num) if c != ""]
if len(day_num) == 0:
return None
day_num = int(day_num[0])
hours = CExtractor.find_hours(hours)
if hours is None:
return None
return (day_num, hours)
#['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
#['Mercredi 16 :']
@ -52,25 +97,22 @@ class CExtractor(Extractor):
result = []
date = None
tstart = None
tend = None
slot = None
is_open = False
# for each element in the paragraph
for e in p.stripped_strings:
day = CExtractor.find_day_name(e)
day = CExtractor.find_timeslot(e)
if not day is None:
if not date is None and is_open:
# we reach a new day
result.append((date, tstart, tend))
result.append((date, slot))
if isinstance(day, tuple):
date = day[0]
tstart = day[1]
tend = day[2]
slot = day[1]
else:
date = day
tstart = None
tend = None
slot = None
is_open = False
continue
elif not is_open:
@ -83,15 +125,10 @@ class CExtractor(Extractor):
hours = CExtractor.find_hours(e)
if not hours is None:
# we found hours
if tstart is None:
tstart = hours[0]
if slot is None:
slot = hours
else:
tstart = min(tstart, hours[0])
if tend is None:
tend = hours[1]
else:
tend = max(tend, hours[1])
slot.merge(hours)
continue
if CExtractor.is_canceled(e):
@ -100,7 +137,7 @@ class CExtractor(Extractor):
if not date is None and is_open:
# we reach a new day
result.append((date, tstart, tend))
result.append((date, slot))
return result
# [(10, time(14, 0, 0), time(17, 0, 0)), ]
@ -124,7 +161,7 @@ class CExtractor(Extractor):
# annule
# menage
for p in description.select('p'):
CExtractor.find_time_slots(p)
CExtractor.find_timeslots(p)
if not '@' in p.text:
continue