diff --git a/blacklist.csv b/blacklist.csv deleted file mode 120000 index 0e88979..0000000 --- a/blacklist.csv +++ /dev/null @@ -1 +0,0 @@ -blacklists/blacklist.csv \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 1d35535..0acad79 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,8 +1,7 @@ folder_artifacts_nickel: "artifacts/nickel" folder_artifacts_json: "artifacts/json" folder_blacklists: "blacklists" -symlink_blacklist: "blacklist.csv" -system: "g5k" # can be "local" for local execution +system: "local" # can be "local" for local execution prefix: "outputs" analysis_dir: "outputs/analysis" diff --git a/config/default_config.yaml b/config/default_config.yaml new file mode 100644 index 0000000..2a8472e --- /dev/null +++ b/config/default_config.yaml @@ -0,0 +1,14 @@ +folder_artifacts_nickel: "artifacts/nickel" +folder_artifacts_json: "artifacts/json" +folder_blacklists: "blacklists" +system: "g5k" # can be "local" for local execution +prefix: "outputs" +analysis_dir: "outputs/analysis" + +site: "grenoble" +cluster: "dahu" +max_duration: 60 # 1 hour +checkpoint: 1 # 1 minute +besteffort: True + #sleep_time: 300 # 5 minutes +sleep_time: 30 # 0.5 minutes diff --git a/workflow/Snakefile b/workflow/Snakefile index d99a1f8..25253f8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -9,13 +9,12 @@ DATE = datetime.datetime.now().strftime("%Y%m%d") ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"] ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"] BLACKLIST_FOLDER = config["folder_blacklists"] -BLACKLIST = config["symlink_blacklist"] EXTENSION = "json" SYSTEM = config["system"] PREFIX = config["prefix"] ANALYSIS_DIR = config["analysis_dir"] -ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST) +ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER) rule all: input: @@ -72,7 +71,6 @@ rule run_ecg: rule update_blacklist: input: - BLACKLIST, build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\ artifact=ARTIFACTS ) @@ -80,7 +78,7 @@ rule update_blacklist: f"{BLACKLIST_FOLDER}/{{date}}.csv" shell: # We need to ignore lines where build is successful: - f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}" + f"cat {{input}} | grep -v ',success' > {{output}}" rule analysis: input: @@ -146,7 +144,7 @@ rule analysis_aggregate: # PLOT_HEADERS = { # "sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], # "pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], -# "build_status": ["success", "package_unavailable", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"], +# "build_status": ["success", "package_install_failed", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"], # "artifact": ["available", "unavailable", "changed"] # } diff --git a/workflow/utils.smk b/workflow/utils.smk index a136bd8..5f4d568 100644 --- a/workflow/utils.smk +++ b/workflow/utils.smk @@ -1,15 +1,29 @@ import csv import os -def get_blacklisted_paths(blacklist_csv_path): - blacklisted = set() - with open(blacklist_csv_path, "r") as csv_file: - spamreader = csv.reader(csv_file, delimiter=",") - for row in spamreader: - blacklisted.add(row[0]) - return blacklisted +def find_last_blacklist(blacklist_dir_path): + last_blacklist = "0" + for blacklist in os.listdir(blacklist_dir_path): + if not os.path.isdir(blacklist): + # We want the latest one, so the one that has the most recent date + # as file name: + curbl_date = int(os.path.splitext(blacklist)[0]) + lastbl_date = int(os.path.splitext(last_blacklist)[0]) + if curbl_date > lastbl_date: + last_blacklist = blacklist + return last_blacklist -def get_artifacts_to_build(artifacts_folder, blacklist_csv_path): - blacklisted = get_blacklisted_paths(blacklist_csv_path) - all_artifacts = set([a.split(".")[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) - return list(all_artifacts.difference(blacklisted)) +def get_blacklisted(blacklist_dir_path): + blacklisted = set() + if os.path.exists(blacklist_dir_path): + blacklist_csv_path = os.path.join(blacklist_dir_path, find_last_blacklist(blacklist_dir_path)) + with open(blacklist_csv_path, "r") as csv_file: + spamreader = csv.reader(csv_file, delimiter=",") + for row in spamreader: + blacklisted.add(row[0]) + return blacklisted + +def get_artifacts_to_build(artifacts_folder, blacklist_dir_path): + blacklisted = get_blacklisted(blacklist_dir_path) + all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) + return list(all_artifacts.difference(blacklisted))