From 496e95f19192cf714e786dadad700882872e9b0e Mon Sep 17 00:00:00 2001 From: frabad Date: Fri, 6 May 2022 20:13:22 +0200 Subject: [PATCH] =?UTF-8?q?nettoyage=20r=C3=A9cursif=20des=20donn=C3=A9es?= =?UTF-8?q?=20JSON?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- LittleBock/extract.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/LittleBock/extract.py b/LittleBock/extract.py index 2c00a5c..6af5874 100755 --- a/LittleBock/extract.py +++ b/LittleBock/extract.py @@ -7,21 +7,33 @@ try: import lxml.html as LX except ModuleNotFoundError: import sys - sys.exit("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml") + sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml") import pathlib -def proc(path_in): - s, js = None, None +def jsproc(data): + """nettoie des données récursivement""" + unwanted = ["color","opacity","yAxis","dashStyle"] + for u in unwanted: + if u in data.keys(): + del(data[u]) + for k,v in data.items(): + if isinstance(v,dict): + jsproc(v) + return data + +def hproc(path_in): + """traite un document HTML""" + data1, data2 = None, [] h = LX.parse(str(path_in)) x = h.xpath("//*[@id='fermentation_log_chart']") - if x: s = x[0].get('data-chart-options') - if s: - js = json.dumps(json.loads(s).pop('series'), indent=4, sort_keys=True - ) or None - if js: + if x: data1 = x[0].get('data-chart-options') + if data1: + for i in json.loads(data1).pop('series'): + data2.append(jsproc(i)) + if len(data2) > 0: path_out = path_in.with_suffix('.json') with open(path_out,'w') as f: - f.write(js) + f.write(json.dumps(data2, indent=2, sort_keys=False)) print(f"INFO: {path_in.name} >> {path_out.name}") if __name__ == "__main__": @@ -31,6 +43,6 @@ if __name__ == "__main__": print("Aucun fichier HTML ('.html') trouvé.") for i in hdocs: if i.exists() and i.stat().st_size > 0: - proc(i) + hproc(i)