#!/usr/bin/python3 """ extracteur de données iSpindel JSON entreposées par Little Bock en HTML """ import json, pathlib try: import lxml.html as LX except ModuleNotFoundError: import sys sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml") def jsproc(data): """supprime les données inutiles d'une série""" unwanted = ("color","opacity","yAxis","dashStyle", "tooltip",) if data.get("data"): for u in unwanted: if u in data.keys(): if u == "tooltip": tt = data.get(u) suffix = tt.get("valueSuffix") if suffix: data.update({ "name": " ".join([data["name"], suffix]) }) del(data[u]) return data def hproc(path_in): """traite un document HTML""" data1, data2 = None, [] h = LX.parse(str(path_in)) x = h.xpath("//*[@id='fermentation_log_chart']") if x: data1 = x[0].get('data-chart-options') if data1: for i in json.loads(data1).pop('series'): data = jsproc(i) if data: data2.append(data) if len(data2) > 0: path_out = path_in.with_suffix('.json') with open(path_out,'w') as f: f.write(json.dumps(data2, sort_keys=False, ensure_ascii=False, indent=2)) print(f"INFO: {path_in.name} >> {path_out.name}") if __name__ == "__main__": here = pathlib.Path.cwd() hdocs = tuple(here.rglob("*.html")) if len(hdocs) == 0: print("Aucun fichier HTML ('.html') trouvé.") for i in hdocs: if i.exists() and i.stat().st_size > 0: hproc(i)