mySpindel/LittleBock/extract.py

37 lines
1013 B
Python
Raw Normal View History

2022-05-06 17:33:17 +02:00
#!/usr/bin/python3
"""
extracteur de données iSpindel JSON entreposées par Little Bock en HTML
"""
import json
try:
import lxml.html as LX
except ModuleNotFoundError:
2022-05-06 17:33:17 +02:00
import sys
sys.exit("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml")
2022-05-06 17:33:17 +02:00
import pathlib
def proc(path_in):
2022-05-06 17:33:17 +02:00
s, js = None, None
h = LX.parse(str(path_in))
2022-05-06 17:33:17 +02:00
x = h.xpath("//*[@id='fermentation_log_chart']")
if x: s = x[0].get('data-chart-options')
if s:
js = json.dumps(json.loads(s).pop('series'), indent=4, sort_keys=True
) or None
2022-05-06 17:33:17 +02:00
if js:
path_out = path_in.with_suffix('.json')
with open(path_out,'w') as f:
2022-05-06 17:33:17 +02:00
f.write(js)
print(f"INFO: {path_in.name} >> {path_out.name}")
2022-05-06 17:33:17 +02:00
if __name__ == "__main__":
here = pathlib.Path.cwd()
hdocs = tuple(here.rglob("*.html"))
2022-05-06 17:33:17 +02:00
if len(hdocs) == 0:
print("Aucun fichier HTML ('.html') trouvé.")
for i in hdocs:
if i.exists() and i.stat().st_size > 0:
proc(i)