configfile: "config/config.yaml" include: "utils.smk" import os import datetime DATE = datetime.datetime.now().strftime("%Y%m%d") ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"] ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"] BLACKLIST_FOLDER = config["folder_blacklists"] EXTENSION = "json" SYSTEM = config["system"] PREFIX = config["prefix"] ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"] SHELLS_ECG = { "local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}", "g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '" } ANALYSIS_DIR = config["analysis_dir"] ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"] PLOT_DIR = config["plot_dir"] PLOT_SCRIPT = "plot/plot.r" PLOT_HEADERS = { "softenv": "dpkg rpm pacman pip conda git misc", "build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error", "artifact": "available unavailable changed" } ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER) rule all: input: expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf", analysis_cat = ANALYSIS_CATS, date = DATE ), expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/bar/{{date}}.pdf", analysis_cat = ["sources_stats", "build_status", "artifact"], date = DATE ), # expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", # cat = ANALYSIS_CATS, # date = DATE # ), # expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{date}}.csv", # folder=["pkgs", "build_status", "artifact_hash"], # artifact=ARTIFACTS, # date=DATE # ), # expand(f"{PREFIX}/logs/{{artifact}}/{{date}}.txt", # artifact=ARTIFACTS, # date=DATE # ), f"{BLACKLIST_FOLDER}/{DATE}.csv" rule check_all: input: expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS) rule check_artifact: input: "flake.nix", "flake.lock", contract="workflow/nickel/artifact_contract.ncl", artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{artifact}}.ncl" output: f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json" shell: """ nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)' """ rule run_ecg: input: "flake.nix", "flake.lock", ecg="ecg.py", ecg_wrapper="workflow/scripts/ecg_wrapper.sh", execo_wrapper="workflow/scripts/submission_g5k.py", oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash", artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}" output: log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt", pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv", artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv" shell: (SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "") rule update_blacklist: input: build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv", artifact=ARTIFACTS ) output: f"{BLACKLIST_FOLDER}/{{date}}.csv" shell: # We need to ignore lines where build is successful: f"cat {{input}} | grep -v ',success' > {{output}} || true" # rule analysis: # input: # expand(f"{PREFIX}/{{output_dir}}/{{artifact}}/{{date}}.csv", # output_dir = ECG_OUTPUTS, # artifact = ARTIFACTS, # # date = get_analysis_dates("{PREFIX}/{output_dir}") # date = glob_wildcards("{PREFIX}/{output_dir}/{artifact}/{date}.csv").date # ) rule softenv_analysis: wildcard_constraints: date="\d+" input: sources_stats = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", artifact = ARTIFACTS, date = DATE ), pkgs_changes = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", artifact = ARTIFACTS, date = get_analysis_dates(f"{PREFIX}/pkgs") ) output: sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" shell: f""" python3 analysis/softenv_analysis.py -t sources-stats -i {{input.sources_stats}} -o {{output.sources_stats}} python3 analysis/softenv_analysis.py -t pkgs-changes -i {{input.pkgs_changes}} -o {{output.pkgs_changes}} """ rule buildstatus_analysis: wildcard_constraints: date="\d+" input: expand(f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv", artifact = ARTIFACTS, date = DATE ) output: f"{ANALYSIS_DIR}/build_status/{{date}}.csv", shell: f""" python3 analysis/buildstatus_analysis.py -i {{input}} -o {{output}} """ rule artifact_analysis: wildcard_constraints: date="\d+" input: expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv", artifact = ARTIFACTS, date = get_analysis_dates(f"{PREFIX}/artifact_hash") ) output: f"{ANALYSIS_DIR}/artifact/{{date}}.csv", shell: f""" python3 analysis/artifact_analysis.py -i {{input}} -o {{output}} """ rule analysis_aggregate: input: expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", cat = ANALYSIS_CATS, date = DATE ) # rule single_aggregate: # input: # expand(f"{ANALYSIS_DIR}/{{{{cat}}}}/{{date}}.csv", # date = get_analysis_dates(f"{ANALYSIS_DIR}/{{wildcards.cat}}") # # date = glob_wildcards("{ANALYSIS_DIR}/{cat}/{date}.csv").date # ) # output: # f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv" # shell: # f"cat {{input}} > {{output}}" rule pkgschgs_aggregate: input: expand(f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv", date = get_analysis_dates(f"{ANALYSIS_DIR}/pkgs_changes") ) output: f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv" shell: f"cat {{input}} > {{output}}" rule srcsstats_aggregate: input: expand(f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", date = get_analysis_dates(f"{ANALYSIS_DIR}/sources_stats") ) output: f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv" shell: f"cat {{input}} > {{output}}" rule artifact_aggregate: input: expand(f"{ANALYSIS_DIR}/artifact/{{date}}.csv", date = get_analysis_dates(f"{ANALYSIS_DIR}/artifact") ) output: f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" shell: f"cat {{input}} > {{output}}" rule buildstatus_aggregate: input: expand(f"{ANALYSIS_DIR}/build_status/{{date}}.csv", date = get_analysis_dates(f"{ANALYSIS_DIR}/build_status") ) output: f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv" shell: f"cat {{input}} > {{output}}" rule all_plot: input: expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", cat = ANALYSIS_CATS, date = DATE ) rule line_plot: input: sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv", pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv", build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv", artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" output: sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/line/{{date}}.pdf", pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/plot/line/{{date}}.pdf", build_status = f"{ANALYSIS_DIR}/build_status/plot/line/{{date}}.pdf", artifact = f"{ANALYSIS_DIR}/artifact/plot/line/{{date}}.pdf" shell: f""" Rscript {PLOT_SCRIPT} line {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp Rscript {PLOT_SCRIPT} line {{input.pkgs_changes}} {{output.pkgs_changes}} {PLOT_HEADERS["softenv"]} timestamp Rscript {PLOT_SCRIPT} line {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp Rscript {PLOT_SCRIPT} line {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp """ rule bar_plot: input: sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv", build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv", artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" output: sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/bar/{{date}}.pdf", build_status = f"{ANALYSIS_DIR}/build_status/plot/bar/{{date}}.pdf", artifact = f"{ANALYSIS_DIR}/artifact/plot/bar/{{date}}.pdf" shell: f""" Rscript {PLOT_SCRIPT} bar {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp Rscript {PLOT_SCRIPT} bar {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp Rscript {PLOT_SCRIPT} bar {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp """