study-docker-repro-longevity/workflow/Snakefile

172 lines
5.9 KiB
Plaintext
Raw Normal View History

2024-07-19 16:33:27 +02:00
configfile: "config/config.yaml"
2024-07-11 15:17:16 +02:00
include: "utils.smk"
import os
2024-07-11 15:17:16 +02:00
import datetime
DATE = datetime.datetime.now().strftime("%Y%m%d")
2024-07-19 16:33:27 +02:00
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
2024-07-20 15:41:56 +02:00
SYSTEM = config["system"]
2024-08-29 12:58:50 +02:00
CONFERENCE = config["conference"]
2024-08-29 12:58:50 +02:00
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + CONFERENCE)
PREFIX = config["prefix"]
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
ANALYSIS_DIR = config["analysis_dir"]
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
ANALYSIS_SCRIPTS_DIR = "analysis"
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
))
SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
))
PLOT_DIR = config["plot_dir"]
PLOT_HEADERS = {
2024-08-28 14:35:13 +02:00
#"softenv": "dpkg rpm pacman pip conda git misc",
"sources_stats": "dpkg rpm pacman pip conda git misc",
"pkgs_changes": "dpkg rpm pacman pip conda git misc",
"build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error",
"artifact": "available unavailable changed"
}
2024-07-11 15:17:16 +02:00
rule all:
input:
2024-08-29 12:58:50 +02:00
expand(f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",\
conference=config['conference'],\
artifact=ARTIFACTS,\
date = DATE)
2024-07-11 15:17:16 +02:00
# Artifacts configuration files:
2024-07-19 16:33:27 +02:00
rule check_all:
input:
2024-08-29 12:58:50 +02:00
expand(f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json", artifact=ARTIFACTS, conference=config['conference'])
2024-07-16 13:59:44 +02:00
rule check_artifact:
input:
"flake.nix",
"flake.lock",
contract="workflow/nickel/artifact_contract.ncl",
2024-08-29 12:58:50 +02:00
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl"
2024-07-16 13:59:44 +02:00
output:
2024-08-29 12:58:50 +02:00
f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
2024-07-16 13:59:44 +02:00
shell:
"""
2024-08-29 12:58:50 +02:00
nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
2024-07-16 13:59:44 +02:00
"""
# ECG:
2024-07-11 15:17:16 +02:00
rule run_ecg:
input:
"flake.nix",
"flake.lock",
2024-08-29 11:23:54 +02:00
ecg="ecg/app/ecg.py",
2024-07-20 15:41:56 +02:00
execo_wrapper="workflow/scripts/submission_g5k.py",
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
2024-08-29 12:58:50 +02:00
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
2024-07-11 15:17:16 +02:00
output:
2024-08-29 12:58:50 +02:00
log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv",
2024-07-11 15:17:16 +02:00
shell:
2024-08-29 11:23:54 +02:00
(f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \
--script {{input.oar_wrapper}} \
--site {config['site']} \
--cluster {config['cluster']} \
--max-duration {config['max_duration']} \
--checkpoint {config['checkpoint']} \
{'--besteffort' if config['besteffort'] else ''} \
--sleep_time {config['sleep_time']} \
--build_status_file {{output.build_status}} \
--artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \
"""
2024-08-29 12:58:50 +02:00
nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status}
2024-08-29 11:23:54 +02:00
""" + \
("'" if SYSTEM == "g5k" else "")
2024-07-11 15:17:16 +02:00
# Analysis:
2024-08-29 12:58:50 +02:00
#rule softenv_analysis:
# wildcard_constraints:
# date="\d+"
# input:
# expand(f"{PREFIX}{{conference}}/pkgs/{{artifact}}/{{{{date}}}}.csv",
# artifact = ARTIFACTS
# )
# output:
# sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
# pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
# shell:
# f"""
# {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
# {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
# """
#
#rule buildstatus_analysis:
# wildcard_constraints:
# date="\d+"
# input:
# expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
# artifact = ARTIFACTS
# ),
# output:
# f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
# shell:
# f"""
# {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
# """
#
#rule artifact_analysis:
# wildcard_constraints:
# date="\d+"
# input:
# expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
# artifact = ARTIFACTS
# )
# output:
# f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
# shell:
# f"""
# {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
# """
#
## Analysis aggregate:
#
#rule analysis_aggregate:
# input:
# expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
# cat = ANALYSIS_CATS,
# date = DATE
# )
#
#rule aggregate_by_type:
# input:
# data=f"{ANALYSIS_DIR}/{{type}}/{{date}}.csv",
# script="workflow/scripts/aggregate_wrapper.sh"
# output:
# f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv"
# shell:
# f"{{input.script}} {ANALYSIS_DIR}/{{type}} {{output}}"
#
## Plot:
#
#rule plot:
# input:
# script = "plot/plot.r",
# data = f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv",
# output:
# f"{ANALYSIS_DIR}/{{type}}/{{plot}}/{{date}}.pdf"
# params:
# header = lambda w: PLOT_HEADERS[w.type]
# shell:
# "Rscript {input.script} {wildcards.plot} {input.data} {output} {params.header} timestamp"