study-docker-repro-longevity/workflow/Snakefile

238 lines
8.3 KiB
Plaintext
Raw Normal View History

2024-07-19 16:33:27 +02:00
configfile: "config/config.yaml"
2024-07-11 15:17:16 +02:00
include: "utils.smk"
import os
2024-07-11 15:17:16 +02:00
import datetime
DATE = datetime.datetime.now().strftime("%Y%m%d")
2024-07-19 16:33:27 +02:00
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
BLACKLIST_FOLDER = config["folder_blacklists"]
2024-07-16 13:59:44 +02:00
EXTENSION = "json"
2024-07-20 15:41:56 +02:00
SYSTEM = config["system"]
PREFIX = config["prefix"]
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
SHELLS_ECG = {
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
}
ANALYSIS_DIR = config["analysis_dir"]
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
ANALYSIS_SCRIPTS_DIR = "analysis"
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh"
PLOT_DIR = config["plot_dir"]
PLOT_SCRIPT = "plot/plot.r"
PLOT_HEADERS = {
"softenv": "dpkg rpm pacman pip conda git misc",
"build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error",
"artifact": "available unavailable changed"
}
2024-07-11 15:17:16 +02:00
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
2024-07-11 15:17:16 +02:00
rule all:
input:
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf",
analysis_cat = ANALYSIS_CATS,
date = DATE
),
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/bar/{{date}}.pdf",
analysis_cat = ["sources_stats", "build_status", "artifact"],
date = DATE
),
2024-07-11 15:17:16 +02:00
f"{BLACKLIST_FOLDER}/{DATE}.csv"
# Artifacts configuration files:
2024-07-19 16:33:27 +02:00
rule check_all:
input:
expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS)
2024-07-16 13:59:44 +02:00
rule check_artifact:
input:
"flake.nix",
"flake.lock",
contract="workflow/nickel/artifact_contract.ncl",
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{artifact}}.ncl"
output:
f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json"
shell:
"""
nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
"""
# ECG:
2024-07-11 15:17:16 +02:00
rule run_ecg:
input:
"flake.nix",
"flake.lock",
ecg="ecg.py",
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
2024-07-20 15:41:56 +02:00
execo_wrapper="workflow/scripts/submission_g5k.py",
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
2024-07-16 13:59:44 +02:00
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}"
2024-07-11 15:17:16 +02:00
output:
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv"
2024-07-11 15:17:16 +02:00
shell:
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "")
2024-07-11 15:17:16 +02:00
rule update_blacklist:
input:
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
artifact=ARTIFACTS
)
2024-07-11 15:17:16 +02:00
output:
f"{BLACKLIST_FOLDER}/{{date}}.csv"
shell:
# We need to ignore lines where build is successful:
f"cat {{input}} | grep -v ',success' > {{output}} || true"
# Analysis:
rule softenv_analysis:
wildcard_constraints:
date="\d+"
input:
today_files = expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS
),
dirs = expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
),
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
shell:
f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py sources-stats {{output.sources_stats}} {{input.today_files}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py pkgs-changes {{output.pkgs_changes}} {{input.dirs}}
"""
rule buildstatus_analysis:
wildcard_constraints:
date="\d+"
input:
expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS
),
output:
f"{ANALYSIS_DIR}/build_status/{{date}}.csv",
shell:
f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
"""
rule artifact_analysis:
wildcard_constraints:
date="\d+"
input:
today_files = expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS
),
dirs = expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
),
output:
f"{ANALYSIS_DIR}/artifact/{{date}}.csv",
shell:
f"""
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {{input.dirs}}
"""
# Analysis aggregate:
rule analysis_aggregate:
input:
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
cat = ANALYSIS_CATS,
date = DATE
)
rule pkgschgs_aggregate:
input:
dir = f"{ANALYSIS_DIR}/pkgs_changes",
today_file = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
rule srcsstats_aggregate:
input:
dir = f"{ANALYSIS_DIR}/sources_stats",
today_file = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
rule artifact_aggregate:
input:
dir = f"{ANALYSIS_DIR}/artifact",
today_file = f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
rule buildstatus_aggregate:
input:
dir = f"{ANALYSIS_DIR}/build_status",
today_file = f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
# Plot:
rule all_plot:
input:
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
cat = ANALYSIS_CATS,
date = DATE
)
rule line_plot:
input:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv",
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/line/{{date}}.pdf",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/plot/line/{{date}}.pdf",
build_status = f"{ANALYSIS_DIR}/build_status/plot/line/{{date}}.pdf",
artifact = f"{ANALYSIS_DIR}/artifact/plot/line/{{date}}.pdf"
shell:
f"""
Rscript {PLOT_SCRIPT} line {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.pkgs_changes}} {{output.pkgs_changes}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
"""
rule bar_plot:
input:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/bar/{{date}}.pdf",
build_status = f"{ANALYSIS_DIR}/build_status/plot/bar/{{date}}.pdf",
artifact = f"{ANALYSIS_DIR}/artifact/plot/bar/{{date}}.pdf"
shell:
f"""
Rscript {PLOT_SCRIPT} bar {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} bar {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
Rscript {PLOT_SCRIPT} bar {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
"""