On intègre les nouvelles catégories aux outils d'import

This commit is contained in:
Jean-Marie Favreau 2024-11-02 15:20:12 +01:00
parent 40ce9a9cba
commit 4186b70e7e
11 changed files with 78 additions and 40 deletions

View File

@ -33,7 +33,7 @@ if __name__ == "__main__":
url_human = "https://www.lacoope.org/concerts-calendrier/"
try:
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Concert", "location": "La Coopérative"}, published = True)
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Fêtes & Concerts", "location": "La Coopérative"}, published = True)
exportfile = "events-lacoope.json"
print("Saving events to file {}".format(exportfile))

View File

@ -69,13 +69,16 @@ class CExtractor(TwoStepsExtractorNoPause):
first_cat = Extractor.remove_accents(category.split(",")[0].lower())
tags = []
if first_cat in ["grand spectacle"]:
category = "Danse"
category = "Spectacles"
tags.append("danse")
elif first_cat in ["theatre", "humour / one man show"]:
category = "Theatre"
category = "Spectacles"
tags.append("théâtre")
elif first_cat in ["chanson francaise", "musique du monde", "pop / rock", "rap", "rnb", "raggae", "variete"]:
category = "Concert"
category = "Fêtes & Concerts"
tags.append("concert")
elif first_cat in ["comedie musicale", "humour / one man show", "spectacle equestre"]:
category = "Art du spectacle"
category = "Spectacles"
elif first_cat in ["spectacle pour enfant"]:
tags = ["jeune public"]
category = None

View File

@ -10,11 +10,12 @@ class CExtractor(TwoStepsExtractor):
def category_c3c2agenda(self, category):
if not category:
return None
mapping = {"Théâtre": "Théâtre", "Concert": "Concert", "Projection": "Cinéma"}
mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
mapping_tag = {"Théâtre": "théâtre", "Concert": "concert", "Projection": None}
if category in mapping:
return mapping[category]
return mapping[category], mapping_tag[category]
else:
return None
return None, None
def build_event_url_list(self, content):
soup = BeautifulSoup(content, "html.parser")
@ -49,20 +50,23 @@ class CExtractor(TwoStepsExtractor):
description = soup.select_one(".presentation").get_text()
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
if duration is not None:
if not duration is None:
duration = Extractor.parse_french_time(duration.text)
location = self.nom_lieu
categories = []
tags = []
for t in soup.select(".sous-titre span"):
classes = t.get("class")
if classes and len(classes) > 0:
if classes[0].startswith("LIEU-"):
location = t.text
elif classes[0].startswith("THEMATIQUE-"):
cat = self.category_c3c2agenda(t.text)
if cat is not None:
cat, tag = self.category_c3c2agenda(t.text)
if cat:
categories.append(cat)
if tag:
tags.append(tag)
# TODO: parser les dates, récupérer les heures ()
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
@ -120,7 +124,7 @@ class CExtractor(TwoStepsExtractor):
dt[0],
location,
description,
[],
tags,
recurrences=None,
uuids=[event_url],
url_human=url_human,

View File

@ -11,16 +11,23 @@ class CExtractor(TwoStepsExtractor):
def category_comedie2agenda(self, category):
mapping = {
"Théâtre": "Théâtre",
"Danse": "Danse",
"Rencontre": "Sans catégorie",
"Théâtre": "Spectacles",
"Danse": "Spectacles",
"Rencontre": "Rencontres & Débats",
"Sortie de résidence": "Sans catégorie",
"PopCorn Live": "Sans catégorie",
}
mapping_tag = {
"Théâtre": "théâtre",
"Danse": "danse",
"Rencontre": None,
"Sortie de résidence": "sortie de résidence",
"PopCorn Live": None,
}
if category in mapping:
return mapping[category]
return mapping[category], mapping_tag[category]
else:
return None
return None, None
def build_event_url_list(self, content):
dates = json5.loads(content)["data"][0]
@ -55,11 +62,13 @@ class CExtractor(TwoStepsExtractor):
self.add_event_title(e_url, title)
category = e.select("div#lieuevtcal span")
if len(category) > 0:
category = self.category_comedie2agenda(
category, tag = self.category_comedie2agenda(
category[-1].contents[0]
)
if category is not None:
if category:
self.add_event_category(e_url, category)
if tag:
self.add_event_tag(e_url, tag)
location = (
e.select("div#lieuevtcal")[0]
.contents[-1]

View File

@ -38,7 +38,7 @@ class CExtractor(TwoStepsExtractor):
soup = BeautifulSoup(event_content, "html.parser")
title = soup.find("h1").contents[0]
category = "Concert"
category = "Fêtes & Concerts"
image = soup.find("meta", property="og:image")
if image:
image = image["content"]
@ -53,7 +53,7 @@ class CExtractor(TwoStepsExtractor):
if description is None:
description = ""
tags = []
tags = ["concert"]
link_calendar = soup.select('a[href^="https://calendar.google.com/calendar/"]')
if len(link_calendar) == 0:

View File

@ -58,7 +58,7 @@ class CExtractor(TwoStepsExtractor):
end_day = Extractor.guess_end_day(start_day, start_time, end_time)
url_human = event_url
tags = []
tags = ["concert"]
image = soup.select("wow-image img[fetchpriority=high]")
if image:
@ -79,7 +79,7 @@ class CExtractor(TwoStepsExtractor):
default_values,
event_url,
title,
"Concert",
"Fêtes & Concerts",
start_day,
location,
description,

View File

@ -9,11 +9,12 @@ class CExtractor(TwoStepsExtractor):
def category_fotomat2agenda(self, category):
if not category:
return None
mapping = {"Concerts": "Concert"}
mapping = {"Concerts": "Fêtes & Concerts"}
mapping_tag = {"Concerts": "concert"}
if category in mapping:
return mapping[category]
return mapping[category], mapping_tag
else:
return None
return None, None
def build_event_url_list(self, content):
soup = BeautifulSoup(content, "xml")
@ -26,9 +27,11 @@ class CExtractor(TwoStepsExtractor):
title = e.find("title").contents[0]
self.add_event_title(e_url, title)
category = self.category_fotomat2agenda(e.find("category").contents[0])
category, tag = self.category_fotomat2agenda(e.find("category").contents[0])
if category:
self.add_event_category(e_url, category)
if tag:
self.add_event_tag(e_url, tag)
def add_event_from_content(
self,

View File

@ -8,16 +8,26 @@ class CExtractor(TwoStepsExtractor):
def local2agendaCategory(self, category):
mapping = {
"Musique": "Concert",
"CONCERT": "Concert",
"VISITE": "Sans catégorie",
"Spectacle": "Théâtre",
"Rencontre": "Sans catégorie",
"Atelier": "Sans catégorie",
"Projection": "Sans catégorie",
"Musique": "Fêtes & Concerts",
"CONCERT": "Fêtes & Concerts",
"VISITE": "Visites & Expositions",
"Spectacle": "Spectacles",
"Rencontre": "Rencontres & Débats",
"Atelier": "Animations & Ateliers",
"Projection": "Cinéma",
}
mapping_tag = {
"Musique": "concert",
"CONCERT": "concert",
"VISITE": None,
"Spectacle": "rhéâtre",
"Rencontre": None,
"Atelier": "atelier",
"Projection": None,
}
if category in mapping:
return mapping[category]
return mapping[category], mapping_tag[category]
else:
return None
@ -39,9 +49,11 @@ class CExtractor(TwoStepsExtractor):
if len(categories) == 0:
categories = e.select(".mec-category")
if len(categories) > 0:
category = self.local2agendaCategory(categories[0].get_text())
if category is not None:
category, tag = self.local2agendaCategory(categories[0].get_text())
if category:
self.add_event_category(url, category)
if tag:
self.add_event_category(url, tag)
def add_event_from_content(

View File

@ -49,7 +49,7 @@ class GoogleCalendarLinkEventExtractor(Extractor):
start_day=start_day,
location=location,
description=description,
tags=None,
tags=[],
uuids=[url],
recurrences=None,
url_human=url_human,

View File

@ -144,7 +144,7 @@ def update_database(apps, cats):
if e.category and e.category.name in convert.keys():
cat, tag = convert[e.category.name].get_transfered_to_object(apps, e)
e.category = cat
if not tag is None:
if tag:
if e.tags is None:
e.tags = [tag]
else:

View File

@ -1108,7 +1108,14 @@ class Event(models.Model):
return events[0]
def update(self, other):
# TODO: what about category, tags?
# we do not modify the category (local categories are more important)
# however, we add supplementary tags
if other.tags:
if not self.tags:
self.tags = []
self.tags += [t for t in other.tags if not t in self.tags]
# set attributes
for attr in Event.data_fields():
setattr(self, attr, getattr(other, attr))