From 0a8b0c85c68edcd8ab61aed402873a9971fa70f9 Mon Sep 17 00:00:00 2001 From: antux18 Date: Mon, 19 Aug 2024 19:02:23 +0200 Subject: [PATCH] Fixed analysis part in Snakemake again, needs testing. Moved test and template artifacts back to parent folder. Changes analysis output dir. --- .gitignore | 1 + artifacts/nickel/{excluded => }/template.ncl | 0 artifacts/nickel/{excluded => }/test.ncl | 0 blacklists/blacklist.csv | 10 ++ config/config.yaml | 2 +- run.sh | 2 +- workflow/Snakefile | 148 +++++++++++-------- 7 files changed, 96 insertions(+), 67 deletions(-) rename artifacts/nickel/{excluded => }/template.ncl (100%) rename artifacts/nickel/{excluded => }/test.ncl (100%) diff --git a/.gitignore b/.gitignore index ad65c7b..70552b5 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ pkglist.csv log.txt build_status.csv *.pdf +blacklists/* diff --git a/artifacts/nickel/excluded/template.ncl b/artifacts/nickel/template.ncl similarity index 100% rename from artifacts/nickel/excluded/template.ncl rename to artifacts/nickel/template.ncl diff --git a/artifacts/nickel/excluded/test.ncl b/artifacts/nickel/test.ncl similarity index 100% rename from artifacts/nickel/excluded/test.ncl rename to artifacts/nickel/test.ncl diff --git a/blacklists/blacklist.csv b/blacklists/blacklist.csv index e69de29..43f12f2 100644 --- a/blacklists/blacklist.csv +++ b/blacklists/blacklist.csv @@ -0,0 +1,10 @@ +albab_k9db,0,unknown_error +breitweiser_high,0,unknown_error +isakov_taxonomy,0,unknown_error +moses_high,0,unknown_error +parasyris_approximate,0,unknown_error +shi_welder,0,unknown_error +wang_tgopt,0,unknown_error +ziogas_deinsum-cpu,0,unknown_error +ziogas_deinsum-gpu,0,unknown_error +template,0,unknown_error \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 1891ba6..1d35535 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -4,7 +4,7 @@ folder_blacklists: "blacklists" symlink_blacklist: "blacklist.csv" system: "g5k" # can be "local" for local execution prefix: "outputs" -analysis_dir: "analysis" +analysis_dir: "outputs/analysis" site: "grenoble" cluster: "dahu" diff --git a/run.sh b/run.sh index 997baf7..218bbfe 100755 --- a/run.sh +++ b/run.sh @@ -13,4 +13,4 @@ then mkdir $CACHE_DIR fi -./ecg.py $TESTFILE -v -p $OUTPUT_PATH/pkglist.csv -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache \ No newline at end of file +./ecg.py $TESTFILE -p $OUTPUT_PATH/pkglist.csv -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 4ca822b..d99a1f8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -82,75 +82,93 @@ rule update_blacklist: # We need to ignore lines where build is successful: f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}" -ANALYSIS_SCRIPTS = { - "sources_stats": f"python3 {ANALYSIS_DIR}/softenv_analysis.py -t sources-stats", - "pkgs_changes": f"python3 {ANALYSIS_DIR}/softenv_analysis.py -t pkgs-changes", - "build_status": f"python3 {ANALYSIS_DIR}/buildstatus_analysis.py", - "artifact": f"python3 {ANALYSIS_DIR}/artifact_analysis.py" -} - rule analysis: input: - { - "sources_stats": expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "pkgs_changes": expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "build_status": expand(f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "artifact_hash": expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - } + expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{{{date}}}}.csv",\ + folder = ["pkgs", "build_status", "artifact_hash"],\ + artifact = ARTIFACTS + ) output: - { - "sources_stats": expand(f"{ANALYSIS_DIR}/sources_stats/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "pkgs_changes": expand(f"{ANALYSIS_DIR}/pkgs_changes/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "build_status": expand(f"{ANALYSIS_DIR}/build_status/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - "artifact_hash": expand(f"{ANALYSIS_DIR}/artifact_hash/{{artifact}}/{{date}}.csv",\ - artifact = ARTIFACTS,\ - date = DATE - ), - } - shell: - f"{{{{ANALYSIS_SCRIPTS[wildcards.folder]}}}} -i {{input[wildcards.folder]}} -o {{output[wildcards.folder]}}" + expand(f"{ANALYSIS_DIR}/{{folder}}/{{date}}.csv",\ + folder = ["sources_stats", "pkgs_changes", "build_status", "artifact_hash"],\ + date = DATE + ) -PLOT_HEADERS = { - "sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], - "pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], - "build_status": ["success", "package_unavailable", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"], - "artifact": ["available", "unavailable", "changed"] -} - -rule plot: +rule softenv_analysis: input: - expand(f"{ANALYSIS_DIR}/{{folder}}/{{artifact}}/{{date}}.csv",\ - folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],\ - artifact = ARTIFACTS,\ - date = DATE - ), + expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",\ + artifact = ARTIFACTS + ) output: - expand(f"{ANALYSIS_DIR}/{{folder}}/{{plot_type}}.pdf",\ - folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],\ - plot_type = ["line", "bar"],\ - artifact = ARTIFACTS,\ - date = DATE - ), + sources_stats = f"{ANALYSIS_DIR}/sources_stats/{DATE}.csv",\ + pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{DATE}.csv" shell: - f"Rscript plot.r {{{{wildcards.plot_type}}}} {{{{PLOT_HEADERS[wildcards.folder]}}}}" \ No newline at end of file + f"python3 analysis/softenv_analysis.py -t sources-stats -i {{input}} -o {{output.sources_stats}}" + f"python3 analysis/softenv_analysis.py -t pkgs-changes -i {{input}} -o {{output.pkgs_changes}}" + +rule buildstatus_analysis: + input: + expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\ + artifact = ARTIFACTS + ) + output: + f"{ANALYSIS_DIR}/build_status/{DATE}.csv" + shell: + f"python3 analysis/buildstatus_analysis.py -i {{input}} -o {{output}}" + +rule artifact_analysis: + input: + expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",\ + artifact = ARTIFACTS + ) + output: + f"{ANALYSIS_DIR}/artifact/{DATE}.csv" + shell: + f"python3 analysis/artifact_analysis.py -i {{input}} -o {{output}}" + +rule analysis_aggregate: + input: + sources_stats = expand(f"{ANALYSIS_DIR}/sources_stats/{{{{date}}}}.csv"), + pkgs_changes = expand(f"{ANALYSIS_DIR}/pkgs_changes/{{{{date}}}}.csv"), + build_status = expand(f"{ANALYSIS_DIR}/build_status/{{{{date}}}}.csv"), + artifact = expand(f"{ANALYSIS_DIR}/artifact/{{{{date}}}}.csv") + output: + sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{DATE}.csv", + pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{DATE}.csv", + build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{DATE}.csv", + artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{DATE}.csv" + shell: + f"cat {{input.sources_stats}} > {{output.sources_stats}}" + f"cat {{input.pkgs_changes}} > {{output.pkgs_changes}}" + f"cat {{input.build_status}} > {{output.build_status}}" + f"cat {{input.artifact}} > {{output.artifact}}" + +# PLOT_HEADERS = { +# "sources_stats": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], +# "pkgs_changes": ["dpkg", "rpm", "pacman", "pip", "conda", "git", "misc"], +# "build_status": ["success", "package_unavailable", "baseimage_unavailable", "artifact_unavailable", "dockerfile_not_found", "script_crash", "job_time_exceeded", "unknown_error"], +# "artifact": ["available", "unavailable", "changed"] +# } + +# rule plot_all: +# input: +# expand(f"{ANALYSIS_DIR}/{{folder}}/aggregated/{{date}}.csv",\ +# folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],\ +# date = DATE +# ) + +# rule line_plot: +# input: +# expand(f"{ANALYSIS_DIR}/{{folder}}/{{artifact}}/{{date}}.csv",\ +# folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],\ +# artifact = ARTIFACTS,\ +# date = DATE +# ), +# output: +# expand(f"{ANALYSIS_DIR}/{{folder}}/line.pdf",\ +# folder = ["sources_stats", "pkgs_changes", "build_status", "artifact"],\ +# artifact = ARTIFACTS,\ +# date = DATE +# ), +# shell: +# f"Rscript plot.r line {{{{PLOT_HEADERS[wildcards.folder]}}}}" \ No newline at end of file