wip
This commit is contained in:
parent
2feb6219f7
commit
c1073451fd
@ -1,7 +1,4 @@
|
|||||||
import icalendar
|
import re
|
||||||
import warnings
|
|
||||||
|
|
||||||
import bbcode
|
|
||||||
|
|
||||||
from datetime import datetime, date, timedelta
|
from datetime import datetime, date, timedelta
|
||||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||||
@ -12,6 +9,19 @@ from celery.utils.log import get_task_logger
|
|||||||
|
|
||||||
logger = get_task_logger(__name__)
|
logger = get_task_logger(__name__)
|
||||||
|
|
||||||
|
class Timeslot:
|
||||||
|
def __init__(self, start_time, end_time):
|
||||||
|
self.start_time = start_time
|
||||||
|
self.end_time = end_time
|
||||||
|
|
||||||
|
def merge(self, hours):
|
||||||
|
self.start_time = min(self.start_time, hours.start_time)
|
||||||
|
|
||||||
|
if not hours.end_time is None:
|
||||||
|
if not self.end_time is None:
|
||||||
|
self.end_time = max(self.end_time, hours.end_time)
|
||||||
|
else:
|
||||||
|
self.end_time = hours.end_time
|
||||||
|
|
||||||
class CExtractor(Extractor):
|
class CExtractor(Extractor):
|
||||||
|
|
||||||
@ -32,6 +42,21 @@ class CExtractor(Extractor):
|
|||||||
|
|
||||||
return (year, month)
|
return (year, month)
|
||||||
|
|
||||||
|
def find_hours(text):
|
||||||
|
text = re.split(r"[ -/=>]+", text)
|
||||||
|
text = [Extractor.parse_french_time(k) for k in text]
|
||||||
|
text = [k for k in text if not k is None]
|
||||||
|
match len(text):
|
||||||
|
case 0:
|
||||||
|
return None
|
||||||
|
case 1:
|
||||||
|
return Timeslot(text[0], None)
|
||||||
|
case 2:
|
||||||
|
return Timeslot(text[0], text[1])
|
||||||
|
case _:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def is_nickname(text):
|
def is_nickname(text):
|
||||||
return '@' in text
|
return '@' in text
|
||||||
|
|
||||||
@ -43,6 +68,26 @@ class CExtractor(Extractor):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def find_timeslot(text):
|
||||||
|
text = re.sub(' +', ' ', text).split(' ')
|
||||||
|
day_name = text[0]
|
||||||
|
day_num = text[1]
|
||||||
|
hours = text[2]
|
||||||
|
|
||||||
|
if not Extractor.guess_day_name(day_name):
|
||||||
|
return None
|
||||||
|
|
||||||
|
day_num = [c for c in re.split(r'\D+', day_num) if c != ""]
|
||||||
|
if len(day_num) == 0:
|
||||||
|
return None
|
||||||
|
day_num = int(day_num[0])
|
||||||
|
|
||||||
|
hours = CExtractor.find_hours(hours)
|
||||||
|
if hours is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return (day_num, hours)
|
||||||
|
|
||||||
|
|
||||||
#['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
|
#['Samedi 12', '@Manon', '14:30-18:00', 'Dimanches 13', '@gaeldu63', '14h30 (j utilise la scie a format)']
|
||||||
#['Mercredi 16 :']
|
#['Mercredi 16 :']
|
||||||
@ -52,25 +97,22 @@ class CExtractor(Extractor):
|
|||||||
result = []
|
result = []
|
||||||
|
|
||||||
date = None
|
date = None
|
||||||
tstart = None
|
slot = None
|
||||||
tend = None
|
|
||||||
is_open = False
|
is_open = False
|
||||||
|
|
||||||
# for each element in the paragraph
|
# for each element in the paragraph
|
||||||
for e in p.stripped_strings:
|
for e in p.stripped_strings:
|
||||||
day = CExtractor.find_day_name(e)
|
day = CExtractor.find_timeslot(e)
|
||||||
if not day is None:
|
if not day is None:
|
||||||
if not date is None and is_open:
|
if not date is None and is_open:
|
||||||
# we reach a new day
|
# we reach a new day
|
||||||
result.append((date, tstart, tend))
|
result.append((date, slot))
|
||||||
if isinstance(day, tuple):
|
if isinstance(day, tuple):
|
||||||
date = day[0]
|
date = day[0]
|
||||||
tstart = day[1]
|
slot = day[1]
|
||||||
tend = day[2]
|
|
||||||
else:
|
else:
|
||||||
date = day
|
date = day
|
||||||
tstart = None
|
slot = None
|
||||||
tend = None
|
|
||||||
is_open = False
|
is_open = False
|
||||||
continue
|
continue
|
||||||
elif not is_open:
|
elif not is_open:
|
||||||
@ -83,15 +125,10 @@ class CExtractor(Extractor):
|
|||||||
|
|
||||||
hours = CExtractor.find_hours(e)
|
hours = CExtractor.find_hours(e)
|
||||||
if not hours is None:
|
if not hours is None:
|
||||||
# we found hours
|
if slot is None:
|
||||||
if tstart is None:
|
slot = hours
|
||||||
tstart = hours[0]
|
|
||||||
else:
|
else:
|
||||||
tstart = min(tstart, hours[0])
|
slot.merge(hours)
|
||||||
if tend is None:
|
|
||||||
tend = hours[1]
|
|
||||||
else:
|
|
||||||
tend = max(tend, hours[1])
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if CExtractor.is_canceled(e):
|
if CExtractor.is_canceled(e):
|
||||||
@ -100,7 +137,7 @@ class CExtractor(Extractor):
|
|||||||
|
|
||||||
if not date is None and is_open:
|
if not date is None and is_open:
|
||||||
# we reach a new day
|
# we reach a new day
|
||||||
result.append((date, tstart, tend))
|
result.append((date, slot))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
# [(10, time(14, 0, 0), time(17, 0, 0)), ]
|
# [(10, time(14, 0, 0), time(17, 0, 0)), ]
|
||||||
@ -124,7 +161,7 @@ class CExtractor(Extractor):
|
|||||||
# annule
|
# annule
|
||||||
# menage
|
# menage
|
||||||
for p in description.select('p'):
|
for p in description.select('p'):
|
||||||
CExtractor.find_time_slots(p)
|
CExtractor.find_timeslots(p)
|
||||||
|
|
||||||
if not '@' in p.text:
|
if not '@' in p.text:
|
||||||
continue
|
continue
|
||||||
|
Loading…
Reference in New Issue
Block a user