tools/parltrack/scrap_representatives.py

32 lines
672 B
Python

# encoding: utf-8
import os
import shutil
import lzip
url = "https://parltrack.org/dumps/ep_meps.json.lz"
data_root = os.environ.get("POLITIKORAMA_DATA_ROOT", "../tmp")
data_source = os.path.join(data_root, "ep_meps.json.lz")
data_target = os.path.join(data_root, "json")
# Cleaning old data
try:
os.remove(data_source)
except FileNotFoundError:
# No file to remove
pass
try:
shutil.rmtree(data_target)
except FileNotFoundError:
# No folder to remove
pass
# Download and extract data
print("Downloading archive")
with open(data_source, "wb") as f:
for chunk in lzip.decompress_url_iter(url):
f.write(chunk)
os.remove(data_source)