2024-07-19 16:33:27 +02:00
|
|
|
configfile: "config/config.yaml"
|
|
|
|
|
2024-07-11 15:17:16 +02:00
|
|
|
include: "utils.smk"
|
|
|
|
|
2024-07-21 16:13:52 +02:00
|
|
|
import os
|
2024-07-11 15:17:16 +02:00
|
|
|
import datetime
|
|
|
|
DATE = datetime.datetime.now().strftime("%Y%m%d")
|
|
|
|
|
2024-07-19 16:33:27 +02:00
|
|
|
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
|
|
|
|
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
|
|
|
|
BLACKLIST_FOLDER = config["folder_blacklists"]
|
2024-07-16 13:59:44 +02:00
|
|
|
EXTENSION = "json"
|
2024-07-20 15:41:56 +02:00
|
|
|
SYSTEM = config["system"]
|
2024-07-21 16:13:52 +02:00
|
|
|
PREFIX = config["prefix"]
|
2024-08-21 21:15:09 +02:00
|
|
|
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
|
|
|
|
|
2024-08-15 12:23:34 +02:00
|
|
|
ANALYSIS_DIR = config["analysis_dir"]
|
2024-08-21 21:15:09 +02:00
|
|
|
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
|
|
|
|
ANALYSIS_TYPES = ["moment", "long_term"]
|
2024-07-11 15:17:16 +02:00
|
|
|
|
2024-08-20 18:55:12 +02:00
|
|
|
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
|
2024-07-11 15:17:16 +02:00
|
|
|
|
|
|
|
rule all:
|
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
|
|
|
|
cat = ANALYSIS_CATS,
|
|
|
|
date = DATE
|
2024-08-15 12:23:34 +02:00
|
|
|
),
|
2024-08-21 21:15:09 +02:00
|
|
|
# expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{date}}.csv",
|
|
|
|
# folder=["pkgs", "build_status", "artifact_hash"],
|
|
|
|
# artifact=ARTIFACTS,
|
|
|
|
# date=DATE
|
|
|
|
# ),
|
|
|
|
# expand(f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
|
|
|
|
# artifact=ARTIFACTS,
|
|
|
|
# date=DATE
|
|
|
|
# ),
|
2024-07-11 15:17:16 +02:00
|
|
|
f"{BLACKLIST_FOLDER}/{DATE}.csv"
|
|
|
|
|
2024-07-19 16:33:27 +02:00
|
|
|
rule check_all:
|
|
|
|
input:
|
|
|
|
expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS)
|
2024-07-26 19:00:25 +02:00
|
|
|
|
2024-07-16 13:59:44 +02:00
|
|
|
rule check_artifact:
|
|
|
|
input:
|
|
|
|
"flake.nix",
|
|
|
|
"flake.lock",
|
|
|
|
contract="workflow/nickel/artifact_contract.ncl",
|
|
|
|
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{artifact}}.ncl"
|
|
|
|
output:
|
|
|
|
f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json"
|
|
|
|
shell:
|
|
|
|
"""
|
|
|
|
nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
|
|
|
|
"""
|
|
|
|
|
2024-07-20 15:41:56 +02:00
|
|
|
SHELLS_ECG = {
|
2024-08-15 12:23:34 +02:00
|
|
|
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
|
2024-07-21 17:32:04 +02:00
|
|
|
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
|
2024-07-20 15:41:56 +02:00
|
|
|
}
|
|
|
|
|
2024-07-11 15:17:16 +02:00
|
|
|
rule run_ecg:
|
|
|
|
input:
|
|
|
|
"flake.nix",
|
|
|
|
"flake.lock",
|
|
|
|
ecg="ecg.py",
|
2024-08-15 12:23:34 +02:00
|
|
|
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
|
2024-07-20 15:41:56 +02:00
|
|
|
execo_wrapper="workflow/scripts/submission_g5k.py",
|
2024-08-15 12:23:34 +02:00
|
|
|
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
|
2024-07-16 13:59:44 +02:00
|
|
|
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}"
|
2024-07-11 15:17:16 +02:00
|
|
|
output:
|
2024-07-21 16:13:52 +02:00
|
|
|
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
|
|
|
|
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
|
|
|
|
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
|
|
|
|
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv"
|
2024-07-11 15:17:16 +02:00
|
|
|
shell:
|
2024-07-21 16:13:52 +02:00
|
|
|
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "")
|
2024-07-11 15:17:16 +02:00
|
|
|
|
|
|
|
rule update_blacklist:
|
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
|
|
|
|
artifact=ARTIFACTS
|
2024-08-15 12:23:34 +02:00
|
|
|
)
|
2024-07-11 15:17:16 +02:00
|
|
|
output:
|
|
|
|
f"{BLACKLIST_FOLDER}/{{date}}.csv"
|
|
|
|
shell:
|
2024-07-26 19:00:25 +02:00
|
|
|
# We need to ignore lines where build is successful:
|
2024-08-20 18:55:12 +02:00
|
|
|
f"cat {{input}} | grep -v ',success' > {{output}}"
|
2024-08-15 12:23:34 +02:00
|
|
|
|
2024-08-19 16:37:51 +02:00
|
|
|
rule analysis:
|
2024-08-15 12:23:34 +02:00
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{PREFIX}/{{output_dir}}/{{artifact}}/{{date}}.csv",
|
|
|
|
output_dir = ECG_OUTPUTS,
|
|
|
|
artifact = ARTIFACTS,
|
|
|
|
date = get_analysis_dates("{PREFIX}/{wildcards.output_dir}")
|
2024-08-19 19:02:23 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
rule softenv_analysis:
|
2024-08-21 21:15:09 +02:00
|
|
|
wildcard_constraints:
|
|
|
|
date="\d+"
|
2024-08-19 19:02:23 +02:00
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
sources_stats = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
|
|
|
|
artifact = ARTIFACTS,
|
|
|
|
date = DATE
|
|
|
|
),
|
|
|
|
pkgs_changes = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
|
|
|
|
artifact = ARTIFACTS,
|
|
|
|
date = get_analysis_dates("{PREFIX}/pkgs")
|
2024-08-19 19:02:23 +02:00
|
|
|
)
|
2024-08-15 12:23:34 +02:00
|
|
|
output:
|
2024-08-21 21:15:09 +02:00
|
|
|
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
|
|
|
|
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
|
2024-08-19 16:37:51 +02:00
|
|
|
shell:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"""
|
|
|
|
python3 analysis/softenv_analysis.py -t sources-stats -i {{input.sources_stats}} -o {{output.sources_stats}}
|
|
|
|
python3 analysis/softenv_analysis.py -t pkgs-changes -i {{input.pkgs_changes}} -o {{output.pkgs_changes}}
|
|
|
|
"""
|
2024-08-19 16:37:51 +02:00
|
|
|
|
2024-08-19 19:02:23 +02:00
|
|
|
rule buildstatus_analysis:
|
2024-08-21 21:15:09 +02:00
|
|
|
wildcard_constraints:
|
|
|
|
date="\d+"
|
2024-08-19 19:02:23 +02:00
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
|
|
|
|
artifact = ARTIFACTS,
|
|
|
|
date = DATE
|
2024-08-19 19:02:23 +02:00
|
|
|
)
|
|
|
|
output:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"{ANALYSIS_DIR}/build_status/{{date}}.csv",
|
2024-08-19 19:02:23 +02:00
|
|
|
shell:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"""
|
|
|
|
python3 analysis/buildstatus_analysis.py -i {{input}} -o {{output}}
|
|
|
|
"""
|
2024-08-19 16:37:51 +02:00
|
|
|
|
2024-08-19 19:02:23 +02:00
|
|
|
rule artifact_analysis:
|
2024-08-21 21:15:09 +02:00
|
|
|
wildcard_constraints:
|
|
|
|
date="\d+"
|
2024-08-19 16:37:51 +02:00
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv",
|
|
|
|
artifact = ARTIFACTS,
|
|
|
|
date = get_analysis_dates("{PREFIX}/artifact_hash")
|
2024-08-19 19:02:23 +02:00
|
|
|
)
|
2024-08-19 16:37:51 +02:00
|
|
|
output:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"{ANALYSIS_DIR}/artifact/{{date}}.csv",
|
2024-08-15 12:23:34 +02:00
|
|
|
shell:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"""
|
|
|
|
python3 analysis/artifact_analysis.py -i {{input}} -o {{output}}
|
|
|
|
"""
|
2024-08-19 19:02:23 +02:00
|
|
|
|
|
|
|
rule analysis_aggregate:
|
|
|
|
input:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{ANALYSIS_DIR}/{{input_cat}}/{{date}}.csv",
|
|
|
|
input_cat = ANALYSIS_CATS,
|
|
|
|
date = get_analysis_dates("{ANALYSIS_DIR}/{wildcards.cat}")
|
|
|
|
)
|
|
|
|
# sources_stats = expand(f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", date = glob_wildcards(f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv").date),
|
|
|
|
# pkgs_changes = expand(f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv", date = glob_wildcards(f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv").date),
|
|
|
|
# build_status = expand(f"{ANALYSIS_DIR}/build_status/{{date}}.csv", date = glob_wildcards(f"{ANALYSIS_DIR}/build_status/{{date}}.csv").date),
|
|
|
|
# artifact = expand(f"{ANALYSIS_DIR}/artifact/{{date}}.csv", date = glob_wildcards(f"{ANALYSIS_DIR}/artifact/{{date}}.csv").date)
|
2024-08-19 19:02:23 +02:00
|
|
|
output:
|
2024-08-21 21:15:09 +02:00
|
|
|
expand(f"{ANALYSIS_DIR}/{{output_cat}}/aggregated/{{{{date}}}}.csv",
|
|
|
|
output_cat = ANALYSIS_CATS
|
|
|
|
)
|
|
|
|
shell:
|
|
|
|
expand(f"cat {ANALYSIS_DIR}/{{cat}}/{{{{date}}}}.csv > {ANALYSIS_DIR}/{{cat}}/aggregated/{{{{date}}}}.csv",
|
|
|
|
cat = ANALYSIS_CATS
|
|
|
|
)
|
|
|
|
|
|
|
|
PLOT_HEADERS = {
|
|
|
|
"sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
|
|
|
|
"pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"],
|
|
|
|
"build_status": ["success", "package_install_failed", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"],
|
|
|
|
"artifact": ["available", "unavailable", "changed"]
|
|
|
|
}
|
|
|
|
|
|
|
|
rule all_plot:
|
|
|
|
input:
|
|
|
|
expand(f"{ANALYSIS_DIR}/{{folder}}/aggregated/{{date}}.csv",
|
|
|
|
folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],
|
|
|
|
date = DATE
|
|
|
|
)
|
|
|
|
|
|
|
|
rule line_plot:
|
|
|
|
input:
|
2024-08-19 19:02:23 +02:00
|
|
|
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{DATE}.csv",
|
|
|
|
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{DATE}.csv",
|
|
|
|
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{DATE}.csv",
|
|
|
|
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{DATE}.csv"
|
2024-08-21 21:15:09 +02:00
|
|
|
output:
|
|
|
|
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/line/{DATE}.csv",
|
|
|
|
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/plot/line/{DATE}.csv",
|
|
|
|
build_status = f"{ANALYSIS_DIR}/build_status/plot/line/{DATE}.csv",
|
|
|
|
artifact = f"{ANALYSIS_DIR}/artifact/plot/line/{DATE}.csv"
|
|
|
|
shell:
|
|
|
|
f"""
|
|
|
|
Rscript plot.r line {{input.sources_stats}} {{output.sources_stats}} {{{{PLOT_HEADERS["sources_stats"]}}}}
|
|
|
|
Rscript plot.r line {{input.pkgs_changes}} {{output.pkgs_changes}} {{{{PLOT_HEADERS["pkgs_changes"]}}}}
|
|
|
|
Rscript plot.r line {{input.build_status}} {{output.build_status}} {{{{PLOT_HEADERS["build_status"]}}}}
|
|
|
|
Rscript plot.r line {{input.artifact}} {{output.artifact}} {{{{PLOT_HEADERS["artifact"]}}}}
|
|
|
|
"""
|
|
|
|
|
|
|
|
rule bar_plot:
|
|
|
|
input:
|
|
|
|
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{DATE}.csv",
|
|
|
|
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{DATE}.csv",
|
|
|
|
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{DATE}.csv"
|
|
|
|
output:
|
|
|
|
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/bar/{DATE}.csv",
|
|
|
|
build_status = f"{ANALYSIS_DIR}/build_status/plot/bar/{DATE}.csv",
|
|
|
|
artifact = f"{ANALYSIS_DIR}/artifact/plot/bar/{DATE}.csv"
|
2024-08-19 19:02:23 +02:00
|
|
|
shell:
|
2024-08-21 21:15:09 +02:00
|
|
|
f"""
|
|
|
|
Rscript plot.r bar {{input.sources_stats}} {{output.sources_stats}} {{{{PLOT_HEADERS["sources_stats"]}}}}
|
|
|
|
Rscript plot.r bar {{input.build_status}} {{output.build_status}} {{{{PLOT_HEADERS["build_status"]}}}}
|
|
|
|
Rscript plot.r bar {{input.artifact}} {{output.artifact}} {{{{PLOT_HEADERS["artifact"]}}}}
|
|
|
|
"""
|