recherche récursive des sources de données

This commit is contained in:
frabad 2022-05-06 18:23:36 +02:00
parent 009b60b8db
commit 09209b3b3e

View File

@ -5,29 +5,28 @@ extracteur de données iSpindel JSON entreposées par Little Bock en HTML
import json import json
try: try:
import lxml.html as LX import lxml.html as LX
except ModuleNotFoundError as e: except ModuleNotFoundError:
import sys import sys
print("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml") sys.exit("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml")
sys.exit()
import pathlib import pathlib
def proc(path): def proc(path_in):
s, js = None, None s, js = None, None
h = LX.parse(path.name) h = LX.parse(str(path_in))
x = h.xpath("//*[@id='fermentation_log_chart']") x = h.xpath("//*[@id='fermentation_log_chart']")
if x: s = x[0].get('data-chart-options') if x: s = x[0].get('data-chart-options')
if s: if s:
js = json.dumps( js = json.dumps(json.loads(s).pop('series'), indent=4, sort_keys=True
json.loads(s).pop('series'), indent=4, sort_keys=True) or None ) or None
if js: if js:
p = path.with_suffix('.json') path_out = path_in.with_suffix('.json')
with open(p,'w') as f: with open(path_out,'w') as f:
f.write(js) f.write(js)
print(f"INFO: extraction des données dans {p.name}.") print(f"INFO: {path_in.name} >> {path_out.name}")
if __name__ == "__main__": if __name__ == "__main__":
here = pathlib.Path.cwd() here = pathlib.Path.cwd()
hdocs = tuple(here.glob("*.html")) hdocs = tuple(here.rglob("*.html"))
if len(hdocs) == 0: if len(hdocs) == 0:
print("Aucun fichier HTML ('.html') trouvé.") print("Aucun fichier HTML ('.html') trouvé.")
for i in hdocs: for i in hdocs: