configfile: "config/config.yaml" include: "utils.smk" import os import datetime DATE = datetime.datetime.now().strftime("%Y%m%d") ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"] ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"] SYSTEM = config["system"] CONFERENCE = config["conference"] ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + CONFERENCE) PREFIX = config["prefix"] ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"] ANALYSIS_DIR = config["analysis_dir"] ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"] ANALYSIS_SCRIPTS_DIR = "analysis" ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh" ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}", artifact = ARTIFACTS )) SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}", artifact = ARTIFACTS )) PLOT_DIR = config["plot_dir"] PLOT_HEADERS = { #"softenv": "dpkg rpm pacman pip conda git misc", "sources_stats": "dpkg rpm pacman pip conda git misc", "pkgs_changes": "dpkg rpm pacman pip conda git misc", "build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error", "artifact": "available unavailable changed" } rule all: input: expand(f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",\ conference=config['conference'],\ artifact=ARTIFACTS,\ date = DATE) # Artifacts configuration files: rule check_all: input: expand(f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json", artifact=ARTIFACTS, conference=config['conference']) rule check_artifact: input: "flake.nix", "flake.lock", contract="workflow/nickel/artifact_contract.ncl", artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl" output: f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json" shell: """ nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)' """ # ECG: rule run_ecg: input: "flake.nix", "flake.lock", ecg="ecg/app/ecg.py", execo_wrapper="workflow/scripts/submission_g5k.py", oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash", artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json" output: log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt", pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv", build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv", artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv", shell: (f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \ --script {{input.oar_wrapper}} \ --site {config['site']} \ --cluster {config['cluster']} \ --max-duration {config['max_duration']} \ --checkpoint {config['checkpoint']} \ {'--besteffort' if config['besteffort'] else ''} \ --sleep_time {config['sleep_time']} \ --build_status_file {{output.build_status}} \ --artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \ """ nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status} """ + \ ("'" if SYSTEM == "g5k" else "") # Analysis: #rule softenv_analysis: # wildcard_constraints: # date="\d+" # input: # expand(f"{PREFIX}{{conference}}/pkgs/{{artifact}}/{{{{date}}}}.csv", # artifact = ARTIFACTS # ) # output: # sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", # pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" # shell: # f""" # {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}} # {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS} # """ # #rule buildstatus_analysis: # wildcard_constraints: # date="\d+" # input: # expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv", # artifact = ARTIFACTS # ), # output: # f"{ANALYSIS_DIR}/build_status/{{date}}.csv" # shell: # f""" # {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}} # """ # #rule artifact_analysis: # wildcard_constraints: # date="\d+" # input: # expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv", # artifact = ARTIFACTS # ) # output: # f"{ANALYSIS_DIR}/artifact/{{date}}.csv" # shell: # f""" # {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS} # """ # ## Analysis aggregate: # #rule analysis_aggregate: # input: # expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", # cat = ANALYSIS_CATS, # date = DATE # ) # #rule aggregate_by_type: # input: # data=f"{ANALYSIS_DIR}/{{type}}/{{date}}.csv", # script="workflow/scripts/aggregate_wrapper.sh" # output: # f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv" # shell: # f"{{input.script}} {ANALYSIS_DIR}/{{type}} {{output}}" # ## Plot: # #rule plot: # input: # script = "plot/plot.r", # data = f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv", # output: # f"{ANALYSIS_DIR}/{{type}}/{{plot}}/{{date}}.pdf" # params: # header = lambda w: PLOT_HEADERS[w.type] # shell: # "Rscript {input.script} {wildcards.plot} {input.data} {output} {params.header} timestamp"