On intègre les nouvelles catégories aux outils d'import
This commit is contained in:
parent
40ce9a9cba
commit
4186b70e7e
@ -33,7 +33,7 @@ if __name__ == "__main__":
|
|||||||
url_human = "https://www.lacoope.org/concerts-calendrier/"
|
url_human = "https://www.lacoope.org/concerts-calendrier/"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Concert", "location": "La Coopérative"}, published = True)
|
events = u2e.process(url, url_human, cache = "cache-lacoope.html", default_values = {"category": "Fêtes & Concerts", "location": "La Coopérative"}, published = True)
|
||||||
|
|
||||||
exportfile = "events-lacoope.json"
|
exportfile = "events-lacoope.json"
|
||||||
print("Saving events to file {}".format(exportfile))
|
print("Saving events to file {}".format(exportfile))
|
||||||
|
@ -69,13 +69,16 @@ class CExtractor(TwoStepsExtractorNoPause):
|
|||||||
first_cat = Extractor.remove_accents(category.split(",")[0].lower())
|
first_cat = Extractor.remove_accents(category.split(",")[0].lower())
|
||||||
tags = []
|
tags = []
|
||||||
if first_cat in ["grand spectacle"]:
|
if first_cat in ["grand spectacle"]:
|
||||||
category = "Danse"
|
category = "Spectacles"
|
||||||
|
tags.append("danse")
|
||||||
elif first_cat in ["theatre", "humour / one man show"]:
|
elif first_cat in ["theatre", "humour / one man show"]:
|
||||||
category = "Theatre"
|
category = "Spectacles"
|
||||||
|
tags.append("théâtre")
|
||||||
elif first_cat in ["chanson francaise", "musique du monde", "pop / rock", "rap", "rnb", "raggae", "variete"]:
|
elif first_cat in ["chanson francaise", "musique du monde", "pop / rock", "rap", "rnb", "raggae", "variete"]:
|
||||||
category = "Concert"
|
category = "Fêtes & Concerts"
|
||||||
|
tags.append("concert")
|
||||||
elif first_cat in ["comedie musicale", "humour / one man show", "spectacle equestre"]:
|
elif first_cat in ["comedie musicale", "humour / one man show", "spectacle equestre"]:
|
||||||
category = "Art du spectacle"
|
category = "Spectacles"
|
||||||
elif first_cat in ["spectacle pour enfant"]:
|
elif first_cat in ["spectacle pour enfant"]:
|
||||||
tags = ["jeune public"]
|
tags = ["jeune public"]
|
||||||
category = None
|
category = None
|
||||||
|
@ -10,11 +10,12 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
def category_c3c2agenda(self, category):
|
def category_c3c2agenda(self, category):
|
||||||
if not category:
|
if not category:
|
||||||
return None
|
return None
|
||||||
mapping = {"Théâtre": "Théâtre", "Concert": "Concert", "Projection": "Cinéma"}
|
mapping = {"Théâtre": "Spectacles", "Concert": "Fêtes & Concerts", "Projection": "Cinéma"}
|
||||||
|
mapping_tag = {"Théâtre": "théâtre", "Concert": "concert", "Projection": None}
|
||||||
if category in mapping:
|
if category in mapping:
|
||||||
return mapping[category]
|
return mapping[category], mapping_tag[category]
|
||||||
else:
|
else:
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
@ -49,20 +50,23 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
description = soup.select_one(".presentation").get_text()
|
description = soup.select_one(".presentation").get_text()
|
||||||
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
duration = soup.select_one("#criteres .DUREE-V .valeur-critere li")
|
||||||
if duration is not None:
|
if not duration is None:
|
||||||
duration = Extractor.parse_french_time(duration.text)
|
duration = Extractor.parse_french_time(duration.text)
|
||||||
|
|
||||||
location = self.nom_lieu
|
location = self.nom_lieu
|
||||||
categories = []
|
categories = []
|
||||||
|
tags = []
|
||||||
for t in soup.select(".sous-titre span"):
|
for t in soup.select(".sous-titre span"):
|
||||||
classes = t.get("class")
|
classes = t.get("class")
|
||||||
if classes and len(classes) > 0:
|
if classes and len(classes) > 0:
|
||||||
if classes[0].startswith("LIEU-"):
|
if classes[0].startswith("LIEU-"):
|
||||||
location = t.text
|
location = t.text
|
||||||
elif classes[0].startswith("THEMATIQUE-"):
|
elif classes[0].startswith("THEMATIQUE-"):
|
||||||
cat = self.category_c3c2agenda(t.text)
|
cat, tag = self.category_c3c2agenda(t.text)
|
||||||
if cat is not None:
|
if cat:
|
||||||
categories.append(cat)
|
categories.append(cat)
|
||||||
|
if tag:
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
# TODO: parser les dates, récupérer les heures ()
|
# TODO: parser les dates, récupérer les heures ()
|
||||||
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
|
dates = [o.get("value") for o in soup.select("select.datedleb_resa option")]
|
||||||
@ -120,7 +124,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
dt[0],
|
dt[0],
|
||||||
location,
|
location,
|
||||||
description,
|
description,
|
||||||
[],
|
tags,
|
||||||
recurrences=None,
|
recurrences=None,
|
||||||
uuids=[event_url],
|
uuids=[event_url],
|
||||||
url_human=url_human,
|
url_human=url_human,
|
||||||
|
@ -11,16 +11,23 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
def category_comedie2agenda(self, category):
|
def category_comedie2agenda(self, category):
|
||||||
mapping = {
|
mapping = {
|
||||||
"Théâtre": "Théâtre",
|
"Théâtre": "Spectacles",
|
||||||
"Danse": "Danse",
|
"Danse": "Spectacles",
|
||||||
"Rencontre": "Sans catégorie",
|
"Rencontre": "Rencontres & Débats",
|
||||||
"Sortie de résidence": "Sans catégorie",
|
"Sortie de résidence": "Sans catégorie",
|
||||||
"PopCorn Live": "Sans catégorie",
|
"PopCorn Live": "Sans catégorie",
|
||||||
}
|
}
|
||||||
|
mapping_tag = {
|
||||||
|
"Théâtre": "théâtre",
|
||||||
|
"Danse": "danse",
|
||||||
|
"Rencontre": None,
|
||||||
|
"Sortie de résidence": "sortie de résidence",
|
||||||
|
"PopCorn Live": None,
|
||||||
|
}
|
||||||
if category in mapping:
|
if category in mapping:
|
||||||
return mapping[category]
|
return mapping[category], mapping_tag[category]
|
||||||
else:
|
else:
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
dates = json5.loads(content)["data"][0]
|
dates = json5.loads(content)["data"][0]
|
||||||
@ -55,11 +62,13 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
self.add_event_title(e_url, title)
|
self.add_event_title(e_url, title)
|
||||||
category = e.select("div#lieuevtcal span")
|
category = e.select("div#lieuevtcal span")
|
||||||
if len(category) > 0:
|
if len(category) > 0:
|
||||||
category = self.category_comedie2agenda(
|
category, tag = self.category_comedie2agenda(
|
||||||
category[-1].contents[0]
|
category[-1].contents[0]
|
||||||
)
|
)
|
||||||
if category is not None:
|
if category:
|
||||||
self.add_event_category(e_url, category)
|
self.add_event_category(e_url, category)
|
||||||
|
if tag:
|
||||||
|
self.add_event_tag(e_url, tag)
|
||||||
location = (
|
location = (
|
||||||
e.select("div#lieuevtcal")[0]
|
e.select("div#lieuevtcal")[0]
|
||||||
.contents[-1]
|
.contents[-1]
|
||||||
|
@ -38,7 +38,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
soup = BeautifulSoup(event_content, "html.parser")
|
soup = BeautifulSoup(event_content, "html.parser")
|
||||||
|
|
||||||
title = soup.find("h1").contents[0]
|
title = soup.find("h1").contents[0]
|
||||||
category = "Concert"
|
category = "Fêtes & Concerts"
|
||||||
image = soup.find("meta", property="og:image")
|
image = soup.find("meta", property="og:image")
|
||||||
if image:
|
if image:
|
||||||
image = image["content"]
|
image = image["content"]
|
||||||
@ -53,7 +53,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
if description is None:
|
if description is None:
|
||||||
description = ""
|
description = ""
|
||||||
|
|
||||||
tags = []
|
tags = ["concert"]
|
||||||
|
|
||||||
link_calendar = soup.select('a[href^="https://calendar.google.com/calendar/"]')
|
link_calendar = soup.select('a[href^="https://calendar.google.com/calendar/"]')
|
||||||
if len(link_calendar) == 0:
|
if len(link_calendar) == 0:
|
||||||
|
@ -58,7 +58,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
end_day = Extractor.guess_end_day(start_day, start_time, end_time)
|
end_day = Extractor.guess_end_day(start_day, start_time, end_time)
|
||||||
|
|
||||||
url_human = event_url
|
url_human = event_url
|
||||||
tags = []
|
tags = ["concert"]
|
||||||
|
|
||||||
image = soup.select("wow-image img[fetchpriority=high]")
|
image = soup.select("wow-image img[fetchpriority=high]")
|
||||||
if image:
|
if image:
|
||||||
@ -79,7 +79,7 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
default_values,
|
default_values,
|
||||||
event_url,
|
event_url,
|
||||||
title,
|
title,
|
||||||
"Concert",
|
"Fêtes & Concerts",
|
||||||
start_day,
|
start_day,
|
||||||
location,
|
location,
|
||||||
description,
|
description,
|
||||||
|
@ -9,11 +9,12 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
def category_fotomat2agenda(self, category):
|
def category_fotomat2agenda(self, category):
|
||||||
if not category:
|
if not category:
|
||||||
return None
|
return None
|
||||||
mapping = {"Concerts": "Concert"}
|
mapping = {"Concerts": "Fêtes & Concerts"}
|
||||||
|
mapping_tag = {"Concerts": "concert"}
|
||||||
if category in mapping:
|
if category in mapping:
|
||||||
return mapping[category]
|
return mapping[category], mapping_tag
|
||||||
else:
|
else:
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
def build_event_url_list(self, content):
|
def build_event_url_list(self, content):
|
||||||
soup = BeautifulSoup(content, "xml")
|
soup = BeautifulSoup(content, "xml")
|
||||||
@ -26,9 +27,11 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
title = e.find("title").contents[0]
|
title = e.find("title").contents[0]
|
||||||
self.add_event_title(e_url, title)
|
self.add_event_title(e_url, title)
|
||||||
|
|
||||||
category = self.category_fotomat2agenda(e.find("category").contents[0])
|
category, tag = self.category_fotomat2agenda(e.find("category").contents[0])
|
||||||
if category:
|
if category:
|
||||||
self.add_event_category(e_url, category)
|
self.add_event_category(e_url, category)
|
||||||
|
if tag:
|
||||||
|
self.add_event_tag(e_url, tag)
|
||||||
|
|
||||||
def add_event_from_content(
|
def add_event_from_content(
|
||||||
self,
|
self,
|
||||||
|
@ -8,16 +8,26 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
|
|
||||||
def local2agendaCategory(self, category):
|
def local2agendaCategory(self, category):
|
||||||
mapping = {
|
mapping = {
|
||||||
"Musique": "Concert",
|
"Musique": "Fêtes & Concerts",
|
||||||
"CONCERT": "Concert",
|
"CONCERT": "Fêtes & Concerts",
|
||||||
"VISITE": "Sans catégorie",
|
"VISITE": "Visites & Expositions",
|
||||||
"Spectacle": "Théâtre",
|
"Spectacle": "Spectacles",
|
||||||
"Rencontre": "Sans catégorie",
|
"Rencontre": "Rencontres & Débats",
|
||||||
"Atelier": "Sans catégorie",
|
"Atelier": "Animations & Ateliers",
|
||||||
"Projection": "Sans catégorie",
|
"Projection": "Cinéma",
|
||||||
}
|
}
|
||||||
|
mapping_tag = {
|
||||||
|
"Musique": "concert",
|
||||||
|
"CONCERT": "concert",
|
||||||
|
"VISITE": None,
|
||||||
|
"Spectacle": "rhéâtre",
|
||||||
|
"Rencontre": None,
|
||||||
|
"Atelier": "atelier",
|
||||||
|
"Projection": None,
|
||||||
|
}
|
||||||
|
|
||||||
if category in mapping:
|
if category in mapping:
|
||||||
return mapping[category]
|
return mapping[category], mapping_tag[category]
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -39,9 +49,11 @@ class CExtractor(TwoStepsExtractor):
|
|||||||
if len(categories) == 0:
|
if len(categories) == 0:
|
||||||
categories = e.select(".mec-category")
|
categories = e.select(".mec-category")
|
||||||
if len(categories) > 0:
|
if len(categories) > 0:
|
||||||
category = self.local2agendaCategory(categories[0].get_text())
|
category, tag = self.local2agendaCategory(categories[0].get_text())
|
||||||
if category is not None:
|
if category:
|
||||||
self.add_event_category(url, category)
|
self.add_event_category(url, category)
|
||||||
|
if tag:
|
||||||
|
self.add_event_category(url, tag)
|
||||||
|
|
||||||
|
|
||||||
def add_event_from_content(
|
def add_event_from_content(
|
||||||
|
@ -49,7 +49,7 @@ class GoogleCalendarLinkEventExtractor(Extractor):
|
|||||||
start_day=start_day,
|
start_day=start_day,
|
||||||
location=location,
|
location=location,
|
||||||
description=description,
|
description=description,
|
||||||
tags=None,
|
tags=[],
|
||||||
uuids=[url],
|
uuids=[url],
|
||||||
recurrences=None,
|
recurrences=None,
|
||||||
url_human=url_human,
|
url_human=url_human,
|
||||||
|
@ -144,7 +144,7 @@ def update_database(apps, cats):
|
|||||||
if e.category and e.category.name in convert.keys():
|
if e.category and e.category.name in convert.keys():
|
||||||
cat, tag = convert[e.category.name].get_transfered_to_object(apps, e)
|
cat, tag = convert[e.category.name].get_transfered_to_object(apps, e)
|
||||||
e.category = cat
|
e.category = cat
|
||||||
if not tag is None:
|
if tag:
|
||||||
if e.tags is None:
|
if e.tags is None:
|
||||||
e.tags = [tag]
|
e.tags = [tag]
|
||||||
else:
|
else:
|
||||||
|
@ -1108,7 +1108,14 @@ class Event(models.Model):
|
|||||||
return events[0]
|
return events[0]
|
||||||
|
|
||||||
def update(self, other):
|
def update(self, other):
|
||||||
# TODO: what about category, tags?
|
|
||||||
|
# we do not modify the category (local categories are more important)
|
||||||
|
# however, we add supplementary tags
|
||||||
|
if other.tags:
|
||||||
|
if not self.tags:
|
||||||
|
self.tags = []
|
||||||
|
self.tags += [t for t in other.tags if not t in self.tags]
|
||||||
|
|
||||||
# set attributes
|
# set attributes
|
||||||
for attr in Event.data_fields():
|
for attr in Event.data_fields():
|
||||||
setattr(self, attr, getattr(other, attr))
|
setattr(self, attr, getattr(other, attr))
|
||||||
|
Loading…
Reference in New Issue
Block a user