2022-05-06 17:33:17 +02:00
|
|
|
#!/usr/bin/python3
|
|
|
|
"""
|
|
|
|
extracteur de données iSpindel JSON entreposées par Little Bock en HTML
|
|
|
|
"""
|
2022-05-06 21:54:32 +02:00
|
|
|
import json, pathlib
|
2022-05-06 17:33:17 +02:00
|
|
|
try:
|
|
|
|
import lxml.html as LX
|
2022-05-06 18:23:36 +02:00
|
|
|
except ModuleNotFoundError:
|
2022-05-06 17:33:17 +02:00
|
|
|
import sys
|
2022-05-06 20:13:22 +02:00
|
|
|
sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml")
|
2022-05-06 17:33:17 +02:00
|
|
|
|
2022-05-06 20:13:22 +02:00
|
|
|
def jsproc(data):
|
2022-05-18 11:52:42 +02:00
|
|
|
"""traite les données JSON"""
|
2022-05-06 21:54:32 +02:00
|
|
|
if data.get("data"):
|
2022-05-18 11:52:42 +02:00
|
|
|
#nettoyage des données
|
|
|
|
unwanted = ("color","opacity","yAxis","dashStyle", "tooltip",)
|
2022-05-06 21:54:32 +02:00
|
|
|
for u in unwanted:
|
|
|
|
if u in data.keys():
|
2022-05-18 11:52:42 +02:00
|
|
|
if u == "tooltip": #rattache une unité à la valeur concernée
|
2022-05-06 21:54:32 +02:00
|
|
|
tt = data.get(u)
|
|
|
|
suffix = tt.get("valueSuffix")
|
|
|
|
if suffix: data.update({
|
|
|
|
"name": " ".join([data["name"], suffix])
|
|
|
|
})
|
2022-05-18 11:52:42 +02:00
|
|
|
del(data[u]) #supprime la donnée indésirable
|
2022-05-06 21:54:32 +02:00
|
|
|
return data
|
2022-05-06 20:13:22 +02:00
|
|
|
|
|
|
|
def hproc(path_in):
|
|
|
|
"""traite un document HTML"""
|
|
|
|
data1, data2 = None, []
|
2022-05-06 18:23:36 +02:00
|
|
|
h = LX.parse(str(path_in))
|
2022-05-06 17:33:17 +02:00
|
|
|
x = h.xpath("//*[@id='fermentation_log_chart']")
|
2022-05-06 20:13:22 +02:00
|
|
|
if x: data1 = x[0].get('data-chart-options')
|
|
|
|
if data1:
|
|
|
|
for i in json.loads(data1).pop('series'):
|
2022-05-06 21:54:32 +02:00
|
|
|
data = jsproc(i)
|
|
|
|
if data: data2.append(data)
|
2022-05-06 20:13:22 +02:00
|
|
|
if len(data2) > 0:
|
2022-05-06 18:23:36 +02:00
|
|
|
path_out = path_in.with_suffix('.json')
|
|
|
|
with open(path_out,'w') as f:
|
2022-05-06 21:16:59 +02:00
|
|
|
f.write(json.dumps(data2,
|
|
|
|
sort_keys=False, ensure_ascii=False, indent=2))
|
2022-05-06 18:23:36 +02:00
|
|
|
print(f"INFO: {path_in.name} >> {path_out.name}")
|
2022-05-06 17:33:17 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
here = pathlib.Path.cwd()
|
2022-05-06 18:23:36 +02:00
|
|
|
hdocs = tuple(here.rglob("*.html"))
|
2022-05-06 17:33:17 +02:00
|
|
|
if len(hdocs) == 0:
|
|
|
|
print("Aucun fichier HTML ('.html') trouvé.")
|
|
|
|
for i in hdocs:
|
|
|
|
if i.exists() and i.stat().st_size > 0:
|
2022-05-06 20:13:22 +02:00
|
|
|
hproc(i)
|
2022-05-06 17:33:17 +02:00
|
|
|
|
|
|
|
|