nettoyage récursif des données JSON

This commit is contained in:
frabad 2022-05-06 20:13:22 +02:00
parent 09209b3b3e
commit 496e95f191

View File

@ -7,21 +7,33 @@ try:
import lxml.html as LX import lxml.html as LX
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.exit("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml") sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml")
import pathlib import pathlib
def proc(path_in): def jsproc(data):
s, js = None, None """nettoie des données récursivement"""
unwanted = ["color","opacity","yAxis","dashStyle"]
for u in unwanted:
if u in data.keys():
del(data[u])
for k,v in data.items():
if isinstance(v,dict):
jsproc(v)
return data
def hproc(path_in):
"""traite un document HTML"""
data1, data2 = None, []
h = LX.parse(str(path_in)) h = LX.parse(str(path_in))
x = h.xpath("//*[@id='fermentation_log_chart']") x = h.xpath("//*[@id='fermentation_log_chart']")
if x: s = x[0].get('data-chart-options') if x: data1 = x[0].get('data-chart-options')
if s: if data1:
js = json.dumps(json.loads(s).pop('series'), indent=4, sort_keys=True for i in json.loads(data1).pop('series'):
) or None data2.append(jsproc(i))
if js: if len(data2) > 0:
path_out = path_in.with_suffix('.json') path_out = path_in.with_suffix('.json')
with open(path_out,'w') as f: with open(path_out,'w') as f:
f.write(js) f.write(json.dumps(data2, indent=2, sort_keys=False))
print(f"INFO: {path_in.name} >> {path_out.name}") print(f"INFO: {path_in.name} >> {path_out.name}")
if __name__ == "__main__": if __name__ == "__main__":
@ -31,6 +43,6 @@ if __name__ == "__main__":
print("Aucun fichier HTML ('.html') trouvé.") print("Aucun fichier HTML ('.html') trouvé.")
for i in hdocs: for i in hdocs:
if i.exists() and i.stat().st_size > 0: if i.exists() and i.stat().st_size > 0:
proc(i) hproc(i)