The updating of the blacklist has been reworked to be functional: it is not relying on a symlink anymore (this symlink has been removed). Instead, the last blacklist is fetched by the get_artifacts_to_build function. Modified Snakefile according to the change in the name of a build status. Created a default config file for Snakemake, allowing custom configurations to be written to "config.yaml".
This commit is contained in:
parent
513b21754c
commit
9c6fe6700b
@ -1 +0,0 @@
|
||||
blacklists/blacklist.csv
|
|
@ -1,8 +1,7 @@
|
||||
folder_artifacts_nickel: "artifacts/nickel"
|
||||
folder_artifacts_json: "artifacts/json"
|
||||
folder_blacklists: "blacklists"
|
||||
symlink_blacklist: "blacklist.csv"
|
||||
system: "g5k" # can be "local" for local execution
|
||||
system: "local" # can be "local" for local execution
|
||||
prefix: "outputs"
|
||||
analysis_dir: "outputs/analysis"
|
||||
|
||||
|
14
config/default_config.yaml
Normal file
14
config/default_config.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
folder_artifacts_nickel: "artifacts/nickel"
|
||||
folder_artifacts_json: "artifacts/json"
|
||||
folder_blacklists: "blacklists"
|
||||
system: "g5k" # can be "local" for local execution
|
||||
prefix: "outputs"
|
||||
analysis_dir: "outputs/analysis"
|
||||
|
||||
site: "grenoble"
|
||||
cluster: "dahu"
|
||||
max_duration: 60 # 1 hour
|
||||
checkpoint: 1 # 1 minute
|
||||
besteffort: True
|
||||
#sleep_time: 300 # 5 minutes
|
||||
sleep_time: 30 # 0.5 minutes
|
@ -9,13 +9,12 @@ DATE = datetime.datetime.now().strftime("%Y%m%d")
|
||||
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
|
||||
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
|
||||
BLACKLIST_FOLDER = config["folder_blacklists"]
|
||||
BLACKLIST = config["symlink_blacklist"]
|
||||
EXTENSION = "json"
|
||||
SYSTEM = config["system"]
|
||||
PREFIX = config["prefix"]
|
||||
ANALYSIS_DIR = config["analysis_dir"]
|
||||
|
||||
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST)
|
||||
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
|
||||
|
||||
rule all:
|
||||
input:
|
||||
@ -72,7 +71,6 @@ rule run_ecg:
|
||||
|
||||
rule update_blacklist:
|
||||
input:
|
||||
BLACKLIST,
|
||||
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\
|
||||
artifact=ARTIFACTS
|
||||
)
|
||||
@ -80,7 +78,7 @@ rule update_blacklist:
|
||||
f"{BLACKLIST_FOLDER}/{{date}}.csv"
|
||||
shell:
|
||||
# We need to ignore lines where build is successful:
|
||||
f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
|
||||
f"cat {{input}} | grep -v ',success' > {{output}}"
|
||||
|
||||
rule analysis:
|
||||
input:
|
||||
@ -146,7 +144,7 @@ rule analysis_aggregate:
|
||||
# PLOT_HEADERS = {
|
||||
# "sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
|
||||
# "pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
|
||||
# "build_status": ["success", "package_unavailable", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"],
|
||||
# "build_status": ["success", "package_install_failed", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"],
|
||||
# "artifact": ["available", "unavailable", "changed"]
|
||||
# }
|
||||
|
||||
|
@ -1,15 +1,29 @@
|
||||
import csv
|
||||
import os
|
||||
|
||||
def get_blacklisted_paths(blacklist_csv_path):
|
||||
blacklisted = set()
|
||||
with open(blacklist_csv_path, "r") as csv_file:
|
||||
spamreader = csv.reader(csv_file, delimiter=",")
|
||||
for row in spamreader:
|
||||
blacklisted.add(row[0])
|
||||
return blacklisted
|
||||
def find_last_blacklist(blacklist_dir_path):
|
||||
last_blacklist = "0"
|
||||
for blacklist in os.listdir(blacklist_dir_path):
|
||||
if not os.path.isdir(blacklist):
|
||||
# We want the latest one, so the one that has the most recent date
|
||||
# as file name:
|
||||
curbl_date = int(os.path.splitext(blacklist)[0])
|
||||
lastbl_date = int(os.path.splitext(last_blacklist)[0])
|
||||
if curbl_date > lastbl_date:
|
||||
last_blacklist = blacklist
|
||||
return last_blacklist
|
||||
|
||||
def get_artifacts_to_build(artifacts_folder, blacklist_csv_path):
|
||||
blacklisted = get_blacklisted_paths(blacklist_csv_path)
|
||||
all_artifacts = set([a.split(".")[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
|
||||
return list(all_artifacts.difference(blacklisted))
|
||||
def get_blacklisted(blacklist_dir_path):
|
||||
blacklisted = set()
|
||||
if os.path.exists(blacklist_dir_path):
|
||||
blacklist_csv_path = os.path.join(blacklist_dir_path, find_last_blacklist(blacklist_dir_path))
|
||||
with open(blacklist_csv_path, "r") as csv_file:
|
||||
spamreader = csv.reader(csv_file, delimiter=",")
|
||||
for row in spamreader:
|
||||
blacklisted.add(row[0])
|
||||
return blacklisted
|
||||
|
||||
def get_artifacts_to_build(artifacts_folder, blacklist_dir_path):
|
||||
blacklisted = get_blacklisted(blacklist_dir_path)
|
||||
all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
|
||||
return list(all_artifacts.difference(blacklisted))
|
||||
|
Loading…
Reference in New Issue
Block a user