ignore les JSON de test

nettoyage récursif des données JSON
recherche récursive des sources de données
2022-05-06 20:15:43 +02:00 · 2022-05-06 20:13:22 +02:00 · 2022-05-06 18:23:36 +02:00
2 changed files with 29 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 local/
 *.tar.gz
 *.json
--- a/LittleBock/extract.py
+++ b/LittleBock/extract.py
@ -5,33 +5,44 @@ extracteur de données iSpindel JSON entreposées par Little Bock en HTML
 import json
 try:
    import lxml.html as LX
-except ModuleNotFoundError as e:
+except ModuleNotFoundError:
    import sys
-    print("Le module 'lxml' est nécessaire.\n  http://pypi.org/lxml")
+    sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml")
    sys.exit()
 import pathlib
-def proc(path):
+def jsproc(data):
-    s, js = None, None
+    """nettoie des données récursivement"""
-    h = LX.parse(path.name)
+    unwanted = ["color","opacity","yAxis","dashStyle"]
    for u in unwanted:
        if u in data.keys():
            del(data[u])
    for k,v in data.items():
        if isinstance(v,dict):
            jsproc(v)
    return data
 def hproc(path_in):
    """traite un document HTML"""
    data1, data2 = None, []
    h = LX.parse(str(path_in))
    x = h.xpath("//*[@id='fermentation_log_chart']")
-    if x: s = x[0].get('data-chart-options')
+    if x: data1 = x[0].get('data-chart-options')
-    if s:
+    if data1:
-        js = json.dumps(
+        for i in json.loads(data1).pop('series'):
-            json.loads(s).pop('series'), indent=4, sort_keys=True) or None
+            data2.append(jsproc(i))
-    if js:
+    if len(data2) > 0:
-        p = path.with_suffix('.json')
+        path_out = path_in.with_suffix('.json')
-        with open(p,'w') as f:
+        with open(path_out,'w') as f:
-            f.write(js)
+            f.write(json.dumps(data2, indent=2, sort_keys=False))
-            print(f"INFO: extraction des données dans {p.name}.")
+            print(f"INFO: {path_in.name} >> {path_out.name}")
 if __name__ == "__main__":
    here = pathlib.Path.cwd()
-    hdocs = tuple(here.glob("*.html"))
+    hdocs = tuple(here.rglob("*.html"))
    if len(hdocs) == 0:
        print("Aucun fichier HTML ('.html') trouvé.")
    for i in hdocs:
        if i.exists() and i.stat().st_size > 0:
-            proc(i)
+            hproc(i)
Author	SHA1	Message	Date
frabad	3eea3854dc	ignore les JSON de test	2022-05-06 20:15:43 +02:00
frabad	496e95f191	nettoyage récursif des données JSON	2022-05-06 20:13:22 +02:00
frabad	09209b3b3e	recherche récursive des sources de données	2022-05-06 18:23:36 +02:00