This commit is contained in:
Sirius Nottin 2024-10-12 21:13:48 +02:00
parent 2feb6219f7
commit c1073451fd
No known key found for this signature in database
GPG Key ID: C298AD85801542AE

View File

@ -1,7 +1,4 @@
import icalendar import re
import warnings
import bbcode
from datetime import datetime, date, timedelta from datetime import datetime, date, timedelta
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
@ -12,6 +9,19 @@ from celery.utils.log import get_task_logger
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
class Timeslot:
def __init__(self, start_time, end_time):
self.start_time = start_time
self.end_time = end_time
def merge(self, hours):
self.start_time = min(self.start_time, hours.start_time)
if not hours.end_time is None:
if not self.end_time is None:
self.end_time = max(self.end_time, hours.end_time)
else:
self.end_time = hours.end_time
class CExtractor(Extractor): class CExtractor(Extractor):
@ -32,6 +42,21 @@ class CExtractor(Extractor):
return (year, month) return (year, month)
def find_hours(text):
text = re.split(r"[ -/=>]+", text)
text = [Extractor.parse_french_time(k) for k in text]
text = [k for k in text if not k is None]
match len(text):
case 0:
return None
case 1:
return Timeslot(text[0], None)
case 2:
return Timeslot(text[0], text[1])
case _:
return None
def is_nickname(text): def is_nickname(text):
return '@' in text return '@' in text
@ -43,6 +68,26 @@ class CExtractor(Extractor):
return True return True
return False return False
def find_timeslot(text):
text = re.sub(' +', ' ', text).split(' ')
day_name = text[0]
day_num = text[1]
hours = text[2]
if not Extractor.guess_day_name(day_name):
return None
day_num = [c for c in re.split(r'\D+', day_num) if c != ""]
if len(day_num) == 0:
return None
day_num = int(day_num[0])
hours = CExtractor.find_hours(hours)
if hours is None:
return None
return (day_num, hours)
#['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)'] #['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
#['Mercredi 16 :'] #['Mercredi 16 :']
@ -52,25 +97,22 @@ class CExtractor(Extractor):
result = [] result = []
date = None date = None
tstart = None slot = None
tend = None
is_open = False is_open = False
# for each element in the paragraph # for each element in the paragraph
for e in p.stripped_strings: for e in p.stripped_strings:
day = CExtractor.find_day_name(e) day = CExtractor.find_timeslot(e)
if not day is None: if not day is None:
if not date is None and is_open: if not date is None and is_open:
# we reach a new day # we reach a new day
result.append((date, tstart, tend)) result.append((date, slot))
if isinstance(day, tuple): if isinstance(day, tuple):
date = day[0] date = day[0]
tstart = day[1] slot = day[1]
tend = day[2]
else: else:
date = day date = day
tstart = None slot = None
tend = None
is_open = False is_open = False
continue continue
elif not is_open: elif not is_open:
@ -83,15 +125,10 @@ class CExtractor(Extractor):
hours = CExtractor.find_hours(e) hours = CExtractor.find_hours(e)
if not hours is None: if not hours is None:
# we found hours if slot is None:
if tstart is None: slot = hours
tstart = hours[0]
else: else:
tstart = min(tstart, hours[0]) slot.merge(hours)
if tend is None:
tend = hours[1]
else:
tend = max(tend, hours[1])
continue continue
if CExtractor.is_canceled(e): if CExtractor.is_canceled(e):
@ -100,7 +137,7 @@ class CExtractor(Extractor):
if not date is None and is_open: if not date is None and is_open:
# we reach a new day # we reach a new day
result.append((date, tstart, tend)) result.append((date, slot))
return result return result
# [(10, time(14, 0, 0), time(17, 0, 0)), ] # [(10, time(14, 0, 0), time(17, 0, 0)), ]
@ -124,7 +161,7 @@ class CExtractor(Extractor):
# annule # annule
# menage # menage
for p in description.select('p'): for p in description.select('p'):
CExtractor.find_time_slots(p) CExtractor.find_timeslots(p)
if not '@' in p.text: if not '@' in p.text:
continue continue