The updating of the blacklist has been reworked to be functional: it is not relying on a symlink anymore (this symlink has been removed). Instead, the last blacklist is fetched by the get_artifacts_to_build function. Modified Snakefile according to the change in the name of a build status. Created a default config file for Snakemake, allowing custom configurations to be written to "config.yaml".

This commit is contained in:
antux18 2024-08-20 18:55:12 +02:00
parent 513b21754c
commit 9c6fe6700b
5 changed files with 43 additions and 19 deletions

View File

@ -1 +0,0 @@
blacklists/blacklist.csv
1 blacklists/blacklist.csv

View File

@ -1,8 +1,7 @@
folder_artifacts_nickel: "artifacts/nickel"
folder_artifacts_json: "artifacts/json"
folder_blacklists: "blacklists"
symlink_blacklist: "blacklist.csv"
system: "g5k" # can be "local" for local execution
system: "local" # can be "local" for local execution
prefix: "outputs"
analysis_dir: "outputs/analysis"

View File

@ -0,0 +1,14 @@
folder_artifacts_nickel: "artifacts/nickel"
folder_artifacts_json: "artifacts/json"
folder_blacklists: "blacklists"
system: "g5k" # can be "local" for local execution
prefix: "outputs"
analysis_dir: "outputs/analysis"
site: "grenoble"
cluster: "dahu"
max_duration: 60 # 1 hour
checkpoint: 1 # 1 minute
besteffort: True
#sleep_time: 300 # 5 minutes
sleep_time: 30 # 0.5 minutes

View File

@ -9,13 +9,12 @@ DATE = datetime.datetime.now().strftime("%Y%m%d")
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
BLACKLIST_FOLDER = config["folder_blacklists"]
BLACKLIST = config["symlink_blacklist"]
EXTENSION = "json"
SYSTEM = config["system"]
PREFIX = config["prefix"]
ANALYSIS_DIR = config["analysis_dir"]
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST)
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
rule all:
input:
@ -72,7 +71,6 @@ rule run_ecg:
rule update_blacklist:
input:
BLACKLIST,
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\
artifact=ARTIFACTS
)
@ -80,7 +78,7 @@ rule update_blacklist:
f"{BLACKLIST_FOLDER}/{{date}}.csv"
shell:
# We need to ignore lines where build is successful:
f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
f"cat {{input}} | grep -v ',success' > {{output}}"
rule analysis:
input:
@ -146,7 +144,7 @@ rule analysis_aggregate:
# PLOT_HEADERS = {
# "sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
# "pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
# "build_status": ["success", "package_unavailable", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"],
# "build_status": ["success", "package_install_failed", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"],
# "artifact": ["available", "unavailable", "changed"]
# }

View File

@ -1,15 +1,29 @@
import csv
import os
def get_blacklisted_paths(blacklist_csv_path):
blacklisted = set()
with open(blacklist_csv_path, "r") as csv_file:
spamreader = csv.reader(csv_file, delimiter=",")
for row in spamreader:
blacklisted.add(row[0])
return blacklisted
def find_last_blacklist(blacklist_dir_path):
last_blacklist = "0"
for blacklist in os.listdir(blacklist_dir_path):
if not os.path.isdir(blacklist):
# We want the latest one, so the one that has the most recent date
# as file name:
curbl_date = int(os.path.splitext(blacklist)[0])
lastbl_date = int(os.path.splitext(last_blacklist)[0])
if curbl_date > lastbl_date:
last_blacklist = blacklist
return last_blacklist
def get_artifacts_to_build(artifacts_folder, blacklist_csv_path):
blacklisted = get_blacklisted_paths(blacklist_csv_path)
all_artifacts = set([a.split(".")[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
return list(all_artifacts.difference(blacklisted))
def get_blacklisted(blacklist_dir_path):
blacklisted = set()
if os.path.exists(blacklist_dir_path):
blacklist_csv_path = os.path.join(blacklist_dir_path, find_last_blacklist(blacklist_dir_path))
with open(blacklist_csv_path, "r") as csv_file:
spamreader = csv.reader(csv_file, delimiter=",")
for row in spamreader:
blacklisted.add(row[0])
return blacklisted
def get_artifacts_to_build(artifacts_folder, blacklist_dir_path):
blacklisted = get_blacklisted(blacklist_dir_path)
all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
return list(all_artifacts.difference(blacklisted))