On intègre les nouvelles catégories aux outils d'import
This commit is contained in:
parent
40ce9a9cba
commit
4186b70e7e
@ -33,7 +33,7 @@ if __name__ == "__main__":
|
||||
url_human = "https://www.lacoope.org/concerts-calendrier/"
|
||||
|
||||
try:
|
||||
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Concert", "location": "La Coopérative"}, published = True)
|
||||
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Fêtes & Concerts", "location": "La Coopérative"}, published = True)
|
||||
|
||||
exportfile = "events-lacoope.json"
|
||||
print("Saving events to file {}".format(exportfile))
|
||||
|
@ -69,13 +69,16 @@ class CExtractor(TwoStepsExtractorNoPause):
|
||||
first_cat = Extractor.remove_accents(category.split(",")[0].lower())
|
||||
tags = []
|
||||
if first_cat in ["grand spectacle"]:
|
||||
category = "Danse"
|
||||
category = "Spectacles"
|
||||
tags.append("danse")
|
||||
elif first_cat in ["theatre", "humour / one man show"]:
|
||||
category = "Theatre"
|
||||
category = "Spectacles"
|
||||
tags.append("théâtre")
|
||||
elif first_cat in ["chanson francaise", "musique du monde", "pop / rock", "rap", "rnb", "raggae", "variete"]:
|
||||
category = "Concert"
|
||||
category = "Fêtes & Concerts"
|
||||
tags.append("concert")
|
||||
elif first_cat in ["comedie musicale", "humour / one man show", "spectacle equestre"]:
|
||||
category = "Art du spectacle"
|
||||
category = "Spectacles"
|
||||
elif first_cat in ["spectacle pour enfant"]:
|
||||
tags = ["jeune public"]
|
||||
category = None
|
||||
|
@ -10,11 +10,12 @@ class CExtractor(TwoStepsExtractor):
|
||||
def category_c3c2agenda(self, category):
|
||||
if not category:
|
||||
return None
|
||||
mapping = {"Théâtre": "Théâtre", "Concert": "Concert", "Projection": "Cinéma"}
|
||||
mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
|
||||
mapping_tag = {"Théâtre": "théâtre", "Concert": "concert", "Projection": None}
|
||||
if category in mapping:
|
||||
return mapping[category]
|
||||
return mapping[category], mapping_tag[category]
|
||||
else:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
def build_event_url_list(self, content):
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
@ -49,20 +50,23 @@ class CExtractor(TwoStepsExtractor):
|
||||
|
||||
description = soup.select_one(".presentation").get_text()
|
||||
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
||||
if duration is not None:
|
||||
if not duration is None:
|
||||
duration = Extractor.parse_french_time(duration.text)
|
||||
|
||||
location = self.nom_lieu
|
||||
categories = []
|
||||
tags = []
|
||||
for t in soup.select(".sous-titre span"):
|
||||
classes = t.get("class")
|
||||
if classes and len(classes) > 0:
|
||||
if classes[0].startswith("LIEU-"):
|
||||
location = t.text
|
||||
elif classes[0].startswith("THEMATIQUE-"):
|
||||
cat = self.category_c3c2agenda(t.text)
|
||||
if cat is not None:
|
||||
cat, tag = self.category_c3c2agenda(t.text)
|
||||
if cat:
|
||||
categories.append(cat)
|
||||
if tag:
|
||||
tags.append(tag)
|
||||
|
||||
# TODO: parser les dates, récupérer les heures ()
|
||||
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
|
||||
@ -120,7 +124,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
dt[0],
|
||||
location,
|
||||
description,
|
||||
[],
|
||||
tags,
|
||||
recurrences=None,
|
||||
uuids=[event_url],
|
||||
url_human=url_human,
|
||||
|
@ -11,16 +11,23 @@ class CExtractor(TwoStepsExtractor):
|
||||
|
||||
def category_comedie2agenda(self, category):
|
||||
mapping = {
|
||||
"Théâtre": "Théâtre",
|
||||
"Danse": "Danse",
|
||||
"Rencontre": "Sans catégorie",
|
||||
"Théâtre": "Spectacles",
|
||||
"Danse": "Spectacles",
|
||||
"Rencontre": "Rencontres & Débats",
|
||||
"Sortie de résidence": "Sans catégorie",
|
||||
"PopCorn Live": "Sans catégorie",
|
||||
}
|
||||
mapping_tag = {
|
||||
"Théâtre": "théâtre",
|
||||
"Danse": "danse",
|
||||
"Rencontre": None,
|
||||
"Sortie de résidence": "sortie de résidence",
|
||||
"PopCorn Live": None,
|
||||
}
|
||||
if category in mapping:
|
||||
return mapping[category]
|
||||
return mapping[category], mapping_tag[category]
|
||||
else:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
def build_event_url_list(self, content):
|
||||
dates = json5.loads(content)["data"][0]
|
||||
@ -55,11 +62,13 @@ class CExtractor(TwoStepsExtractor):
|
||||
self.add_event_title(e_url, title)
|
||||
category = e.select("div#lieuevtcal span")
|
||||
if len(category) > 0:
|
||||
category = self.category_comedie2agenda(
|
||||
category, tag = self.category_comedie2agenda(
|
||||
category[-1].contents[0]
|
||||
)
|
||||
if category is not None:
|
||||
if category:
|
||||
self.add_event_category(e_url, category)
|
||||
if tag:
|
||||
self.add_event_tag(e_url, tag)
|
||||
location = (
|
||||
e.select("div#lieuevtcal")[0]
|
||||
.contents[-1]
|
||||
|
@ -38,7 +38,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
soup = BeautifulSoup(event_content, "html.parser")
|
||||
|
||||
title = soup.find("h1").contents[0]
|
||||
category = "Concert"
|
||||
category = "Fêtes & Concerts"
|
||||
image = soup.find("meta", property="og:image")
|
||||
if image:
|
||||
image = image["content"]
|
||||
@ -53,7 +53,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
if description is None:
|
||||
description = ""
|
||||
|
||||
tags = []
|
||||
tags = ["concert"]
|
||||
|
||||
link_calendar = soup.select('a[href^="https://calendar.google.com/calendar/"]')
|
||||
if len(link_calendar) == 0:
|
||||
|
@ -58,7 +58,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
end_day = Extractor.guess_end_day(start_day, start_time, end_time)
|
||||
|
||||
url_human = event_url
|
||||
tags = []
|
||||
tags = ["concert"]
|
||||
|
||||
image = soup.select("wow-image img[fetchpriority=high]")
|
||||
if image:
|
||||
@ -79,7 +79,7 @@ class CExtractor(TwoStepsExtractor):
|
||||
default_values,
|
||||
event_url,
|
||||
title,
|
||||
"Concert",
|
||||
"Fêtes & Concerts",
|
||||
start_day,
|
||||
location,
|
||||
description,
|
||||
|
@ -9,11 +9,12 @@ class CExtractor(TwoStepsExtractor):
|
||||
def category_fotomat2agenda(self, category):
|
||||
if not category:
|
||||
return None
|
||||
mapping = {"Concerts": "Concert"}
|
||||
mapping = {"Concerts": "Fêtes & Concerts"}
|
||||
mapping_tag = {"Concerts": "concert"}
|
||||
if category in mapping:
|
||||
return mapping[category]
|
||||
return mapping[category], mapping_tag
|
||||
else:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
def build_event_url_list(self, content):
|
||||
soup = BeautifulSoup(content, "xml")
|
||||
@ -26,9 +27,11 @@ class CExtractor(TwoStepsExtractor):
|
||||
title = e.find("title").contents[0]
|
||||
self.add_event_title(e_url, title)
|
||||
|
||||
category = self.category_fotomat2agenda(e.find("category").contents[0])
|
||||
category, tag = self.category_fotomat2agenda(e.find("category").contents[0])
|
||||
if category:
|
||||
self.add_event_category(e_url, category)
|
||||
if tag:
|
||||
self.add_event_tag(e_url, tag)
|
||||
|
||||
def add_event_from_content(
|
||||
self,
|
||||
|
@ -8,16 +8,26 @@ class CExtractor(TwoStepsExtractor):
|
||||
|
||||
def local2agendaCategory(self, category):
|
||||
mapping = {
|
||||
"Musique": "Concert",
|
||||
"CONCERT": "Concert",
|
||||
"VISITE": "Sans catégorie",
|
||||
"Spectacle": "Théâtre",
|
||||
"Rencontre": "Sans catégorie",
|
||||
"Atelier": "Sans catégorie",
|
||||
"Projection": "Sans catégorie",
|
||||
"Musique": "Fêtes & Concerts",
|
||||
"CONCERT": "Fêtes & Concerts",
|
||||
"VISITE": "Visites & Expositions",
|
||||
"Spectacle": "Spectacles",
|
||||
"Rencontre": "Rencontres & Débats",
|
||||
"Atelier": "Animations & Ateliers",
|
||||
"Projection": "Cinéma",
|
||||
}
|
||||
mapping_tag = {
|
||||
"Musique": "concert",
|
||||
"CONCERT": "concert",
|
||||
"VISITE": None,
|
||||
"Spectacle": "rhéâtre",
|
||||
"Rencontre": None,
|
||||
"Atelier": "atelier",
|
||||
"Projection": None,
|
||||
}
|
||||
|
||||
if category in mapping:
|
||||
return mapping[category]
|
||||
return mapping[category], mapping_tag[category]
|
||||
else:
|
||||
return None
|
||||
|
||||
@ -39,9 +49,11 @@ class CExtractor(TwoStepsExtractor):
|
||||
if len(categories) == 0:
|
||||
categories = e.select(".mec-category")
|
||||
if len(categories) > 0:
|
||||
category = self.local2agendaCategory(categories[0].get_text())
|
||||
if category is not None:
|
||||
category, tag = self.local2agendaCategory(categories[0].get_text())
|
||||
if category:
|
||||
self.add_event_category(url, category)
|
||||
if tag:
|
||||
self.add_event_category(url, tag)
|
||||
|
||||
|
||||
def add_event_from_content(
|
||||
|
@ -49,7 +49,7 @@ class GoogleCalendarLinkEventExtractor(Extractor):
|
||||
start_day=start_day,
|
||||
location=location,
|
||||
description=description,
|
||||
tags=None,
|
||||
tags=[],
|
||||
uuids=[url],
|
||||
recurrences=None,
|
||||
url_human=url_human,
|
||||
|
@ -144,7 +144,7 @@ def update_database(apps, cats):
|
||||
if e.category and e.category.name in convert.keys():
|
||||
cat, tag = convert[e.category.name].get_transfered_to_object(apps, e)
|
||||
e.category = cat
|
||||
if not tag is None:
|
||||
if tag:
|
||||
if e.tags is None:
|
||||
e.tags = [tag]
|
||||
else:
|
||||
|
@ -1108,7 +1108,14 @@ class Event(models.Model):
|
||||
return events[0]
|
||||
|
||||
def update(self, other):
|
||||
# TODO: what about category, tags?
|
||||
|
||||
# we do not modify the category (local categories are more important)
|
||||
# however, we add supplementary tags
|
||||
if other.tags:
|
||||
if not self.tags:
|
||||
self.tags = []
|
||||
self.tags += [t for t in other.tags if not t in self.tags]
|
||||
|
||||
# set attributes
|
||||
for attr in Event.data_fields():
|
||||
setattr(self, attr, getattr(other, attr))
|
||||
|
Loading…
Reference in New Issue
Block a user