wip
This commit is contained in:
parent
2feb6219f7
commit
c1073451fd
@ -1,7 +1,4 @@
|
||||
import icalendar
|
||||
import warnings
|
||||
|
||||
import bbcode
|
||||
import re
|
||||
|
||||
from datetime import datetime, date, timedelta
|
||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||
@ -12,6 +9,19 @@ from celery.utils.log import get_task_logger
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
class Timeslot:
|
||||
def __init__(self, start_time, end_time):
|
||||
self.start_time = start_time
|
||||
self.end_time = end_time
|
||||
|
||||
def merge(self, hours):
|
||||
self.start_time = min(self.start_time, hours.start_time)
|
||||
|
||||
if not hours.end_time is None:
|
||||
if not self.end_time is None:
|
||||
self.end_time = max(self.end_time, hours.end_time)
|
||||
else:
|
||||
self.end_time = hours.end_time
|
||||
|
||||
class CExtractor(Extractor):
|
||||
|
||||
@ -32,6 +42,21 @@ class CExtractor(Extractor):
|
||||
|
||||
return (year, month)
|
||||
|
||||
def find_hours(text):
|
||||
text = re.split(r"[ -/=>]+", text)
|
||||
text = [Extractor.parse_french_time(k) for k in text]
|
||||
text = [k for k in text if not k is None]
|
||||
match len(text):
|
||||
case 0:
|
||||
return None
|
||||
case 1:
|
||||
return Timeslot(text[0], None)
|
||||
case 2:
|
||||
return Timeslot(text[0], text[1])
|
||||
case _:
|
||||
return None
|
||||
|
||||
|
||||
def is_nickname(text):
|
||||
return '@' in text
|
||||
|
||||
@ -42,7 +67,27 @@ class CExtractor(Extractor):
|
||||
if word in text:
|
||||
return True
|
||||
return False
|
||||
|
||||
def find_timeslot(text):
|
||||
text = re.sub(' +', ' ', text).split(' ')
|
||||
day_name = text[0]
|
||||
day_num = text[1]
|
||||
hours = text[2]
|
||||
|
||||
if not Extractor.guess_day_name(day_name):
|
||||
return None
|
||||
|
||||
day_num = [c for c in re.split(r'\D+', day_num) if c != ""]
|
||||
if len(day_num) == 0:
|
||||
return None
|
||||
day_num = int(day_num[0])
|
||||
|
||||
hours = CExtractor.find_hours(hours)
|
||||
if hours is None:
|
||||
return None
|
||||
|
||||
return (day_num, hours)
|
||||
|
||||
|
||||
#['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
|
||||
#['Mercredi 16 :']
|
||||
@ -52,25 +97,22 @@ class CExtractor(Extractor):
|
||||
result = []
|
||||
|
||||
date = None
|
||||
tstart = None
|
||||
tend = None
|
||||
slot = None
|
||||
is_open = False
|
||||
|
||||
# for each element in the paragraph
|
||||
for e in p.stripped_strings:
|
||||
day = CExtractor.find_day_name(e)
|
||||
day = CExtractor.find_timeslot(e)
|
||||
if not day is None:
|
||||
if not date is None and is_open:
|
||||
# we reach a new day
|
||||
result.append((date, tstart, tend))
|
||||
result.append((date, slot))
|
||||
if isinstance(day, tuple):
|
||||
date = day[0]
|
||||
tstart = day[1]
|
||||
tend = day[2]
|
||||
slot = day[1]
|
||||
else:
|
||||
date = day
|
||||
tstart = None
|
||||
tend = None
|
||||
slot = None
|
||||
is_open = False
|
||||
continue
|
||||
elif not is_open:
|
||||
@ -83,15 +125,10 @@ class CExtractor(Extractor):
|
||||
|
||||
hours = CExtractor.find_hours(e)
|
||||
if not hours is None:
|
||||
# we found hours
|
||||
if tstart is None:
|
||||
tstart = hours[0]
|
||||
if slot is None:
|
||||
slot = hours
|
||||
else:
|
||||
tstart = min(tstart, hours[0])
|
||||
if tend is None:
|
||||
tend = hours[1]
|
||||
else:
|
||||
tend = max(tend, hours[1])
|
||||
slot.merge(hours)
|
||||
continue
|
||||
|
||||
if CExtractor.is_canceled(e):
|
||||
@ -100,7 +137,7 @@ class CExtractor(Extractor):
|
||||
|
||||
if not date is None and is_open:
|
||||
# we reach a new day
|
||||
result.append((date, tstart, tend))
|
||||
result.append((date, slot))
|
||||
|
||||
return result
|
||||
# [(10, time(14, 0, 0), time(17, 0, 0)), ]
|
||||
@ -124,7 +161,7 @@ class CExtractor(Extractor):
|
||||
# annule
|
||||
# menage
|
||||
for p in description.select('p'):
|
||||
CExtractor.find_time_slots(p)
|
||||
CExtractor.find_timeslots(p)
|
||||
|
||||
if not '@' in p.text:
|
||||
continue
|
||||
|
Loading…
Reference in New Issue
Block a user