agenda_culturel/experimentations/get_arachnee_events.py

41 lines
1.4 KiB
Python
Raw Normal View History

#!/usr/bin/python3
# coding: utf-8
import os
import json
import sys
# getting the name of the directory
# where the this file is present.
current = os.path.dirname(os.path.realpath(__file__))
# Getting the parent directory name
# where the current directory is present.
parent = os.path.dirname(current)
# adding the parent directory to
# the sys.path.
sys.path.append(parent)
from src.agenda_culturel.import_tasks.downloader import *
from src.agenda_culturel.import_tasks.extractor import *
from src.agenda_culturel.import_tasks.importer import *
from src.agenda_culturel.import_tasks.custom_extractors import *
if __name__ == "__main__":
u2e = URL2Events(ChromiumHeadlessDownloader(), arachnee.CExtractor())
url = "https://www.arachnee-concerts.com/wp-admin/admin-ajax.php?action=movies-filter&per_page=9999&date=NaN.NaN.NaN&theatres=Clermont-Fd&cat=&sorting=&list_all_events=&current_page="
url_human = "https://www.arachnee-concerts.com/agenda-des-concerts/Clermont-Fd/"
try:
events = u2e.process(url, url_human, cache = "cache-arachnee.html", default_values = {}, published = True)
exportfile = "events-arachnee.json"
print("Saving events to file {}".format(exportfile))
with open(exportfile, "w") as f:
json.dump(events, f, indent=4, default=str)
except Exception as e:
print("Exception: " + str(e))