Compare commits

..

No commits in common. "3eea3854dccb47b5da78085c71a39f081bd3a378" and "009b60b8db555f3b703dc2b4c679a75ad5362ef7" have entirely different histories.

2 changed files with 17 additions and 29 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
local/ local/
*.tar.gz *.tar.gz
*.json

View File

@ -5,44 +5,33 @@ extracteur de données iSpindel JSON entreposées par Little Bock en HTML
import json import json
try: try:
import lxml.html as LX import lxml.html as LX
except ModuleNotFoundError: except ModuleNotFoundError as e:
import sys import sys
sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml") print("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml")
sys.exit()
import pathlib import pathlib
def jsproc(data): def proc(path):
"""nettoie des données récursivement""" s, js = None, None
unwanted = ["color","opacity","yAxis","dashStyle"] h = LX.parse(path.name)
for u in unwanted:
if u in data.keys():
del(data[u])
for k,v in data.items():
if isinstance(v,dict):
jsproc(v)
return data
def hproc(path_in):
"""traite un document HTML"""
data1, data2 = None, []
h = LX.parse(str(path_in))
x = h.xpath("//*[@id='fermentation_log_chart']") x = h.xpath("//*[@id='fermentation_log_chart']")
if x: data1 = x[0].get('data-chart-options') if x: s = x[0].get('data-chart-options')
if data1: if s:
for i in json.loads(data1).pop('series'): js = json.dumps(
data2.append(jsproc(i)) json.loads(s).pop('series'), indent=4, sort_keys=True) or None
if len(data2) > 0: if js:
path_out = path_in.with_suffix('.json') p = path.with_suffix('.json')
with open(path_out,'w') as f: with open(p,'w') as f:
f.write(json.dumps(data2, indent=2, sort_keys=False)) f.write(js)
print(f"INFO: {path_in.name} >> {path_out.name}") print(f"INFO: extraction des données dans {p.name}.")
if __name__ == "__main__": if __name__ == "__main__":
here = pathlib.Path.cwd() here = pathlib.Path.cwd()
hdocs = tuple(here.rglob("*.html")) hdocs = tuple(here.glob("*.html"))
if len(hdocs) == 0: if len(hdocs) == 0:
print("Aucun fichier HTML ('.html') trouvé.") print("Aucun fichier HTML ('.html') trouvé.")
for i in hdocs: for i in hdocs:
if i.exists() and i.stat().st_size > 0: if i.exists() and i.stat().st_size > 0:
hproc(i) proc(i)