Compare commits
No commits in common. "3eea3854dccb47b5da78085c71a39f081bd3a378" and "009b60b8db555f3b703dc2b4c679a75ad5362ef7" have entirely different histories.
3eea3854dc
...
009b60b8db
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,3 @@
|
|||||||
local/
|
local/
|
||||||
*.tar.gz
|
*.tar.gz
|
||||||
*.json
|
|
||||||
|
|
||||||
|
@ -5,44 +5,33 @@ extracteur de données iSpindel JSON entreposées par Little Bock en HTML
|
|||||||
import json
|
import json
|
||||||
try:
|
try:
|
||||||
import lxml.html as LX
|
import lxml.html as LX
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError as e:
|
||||||
import sys
|
import sys
|
||||||
sys.exit("Le module 'lxml' est nécessaire.\n\thttp://pypi.org/lxml")
|
print("Le module 'lxml' est nécessaire.\n http://pypi.org/lxml")
|
||||||
|
sys.exit()
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
def jsproc(data):
|
def proc(path):
|
||||||
"""nettoie des données récursivement"""
|
s, js = None, None
|
||||||
unwanted = ["color","opacity","yAxis","dashStyle"]
|
h = LX.parse(path.name)
|
||||||
for u in unwanted:
|
|
||||||
if u in data.keys():
|
|
||||||
del(data[u])
|
|
||||||
for k,v in data.items():
|
|
||||||
if isinstance(v,dict):
|
|
||||||
jsproc(v)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def hproc(path_in):
|
|
||||||
"""traite un document HTML"""
|
|
||||||
data1, data2 = None, []
|
|
||||||
h = LX.parse(str(path_in))
|
|
||||||
x = h.xpath("//*[@id='fermentation_log_chart']")
|
x = h.xpath("//*[@id='fermentation_log_chart']")
|
||||||
if x: data1 = x[0].get('data-chart-options')
|
if x: s = x[0].get('data-chart-options')
|
||||||
if data1:
|
if s:
|
||||||
for i in json.loads(data1).pop('series'):
|
js = json.dumps(
|
||||||
data2.append(jsproc(i))
|
json.loads(s).pop('series'), indent=4, sort_keys=True) or None
|
||||||
if len(data2) > 0:
|
if js:
|
||||||
path_out = path_in.with_suffix('.json')
|
p = path.with_suffix('.json')
|
||||||
with open(path_out,'w') as f:
|
with open(p,'w') as f:
|
||||||
f.write(json.dumps(data2, indent=2, sort_keys=False))
|
f.write(js)
|
||||||
print(f"INFO: {path_in.name} >> {path_out.name}")
|
print(f"INFO: extraction des données dans {p.name}.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
here = pathlib.Path.cwd()
|
here = pathlib.Path.cwd()
|
||||||
hdocs = tuple(here.rglob("*.html"))
|
hdocs = tuple(here.glob("*.html"))
|
||||||
if len(hdocs) == 0:
|
if len(hdocs) == 0:
|
||||||
print("Aucun fichier HTML ('.html') trouvé.")
|
print("Aucun fichier HTML ('.html') trouvé.")
|
||||||
for i in hdocs:
|
for i in hdocs:
|
||||||
if i.exists() and i.stat().st_size > 0:
|
if i.exists() and i.stat().st_size > 0:
|
||||||
hproc(i)
|
proc(i)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user