32 lines
672 B
Python
32 lines
672 B
Python
# encoding: utf-8
|
|
|
|
import os
|
|
import shutil
|
|
|
|
import lzip
|
|
|
|
url = "https://parltrack.org/dumps/ep_meps.json.lz"
|
|
data_root = os.environ.get("POLITIKORAMA_DATA_ROOT", "../tmp")
|
|
data_source = os.path.join(data_root, "ep_meps.json.lz")
|
|
data_target = os.path.join(data_root, "json")
|
|
|
|
# Cleaning old data
|
|
try:
|
|
os.remove(data_source)
|
|
except FileNotFoundError:
|
|
# No file to remove
|
|
pass
|
|
try:
|
|
shutil.rmtree(data_target)
|
|
except FileNotFoundError:
|
|
# No folder to remove
|
|
pass
|
|
|
|
# Download and extract data
|
|
print("Downloading archive")
|
|
with open(data_source, "wb") as f:
|
|
for chunk in lzip.decompress_url_iter(url):
|
|
f.write(chunk)
|
|
|
|
os.remove(data_source)
|