Since analysis and aggregation rules had directories in their input, Snakemake complained because of these missing directories, even if he was supposed to create them. So I removed the directories from the inputs of the rules, and used global variables instead (and for some, just wrote the directory path directly in the shell command).

Fixed the missing option specifier for the analysis wrapper for the softenv analysis. Also fixed the way arguments are parsed by the wrapper.
This commit is contained in:
antux18 2024-08-26 14:40:43 +02:00
parent 97447e59a1
commit 9c6ce2d150
2 changed files with 29 additions and 36 deletions

View File

@ -12,6 +12,8 @@ BLACKLIST_FOLDER = config["folder_blacklists"]
EXTENSION = "json" EXTENSION = "json"
SYSTEM = config["system"] SYSTEM = config["system"]
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
PREFIX = config["prefix"] PREFIX = config["prefix"]
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"] ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
SHELLS_ECG = { SHELLS_ECG = {
@ -24,6 +26,12 @@ ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
ANALYSIS_SCRIPTS_DIR = "analysis" ANALYSIS_SCRIPTS_DIR = "analysis"
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh" ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh" AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh"
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
))
SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
))
PLOT_DIR = config["plot_dir"] PLOT_DIR = config["plot_dir"]
PLOT_SCRIPT = "plot/plot.r" PLOT_SCRIPT = "plot/plot.r"
@ -33,8 +41,6 @@ PLOT_HEADERS = {
"artifact": "available unavailable changed" "artifact": "available unavailable changed"
} }
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
rule all: rule all:
input: input:
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf", expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf",
@ -81,7 +87,7 @@ rule run_ecg:
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt", log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv", build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv" artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv",
shell: shell:
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "") (SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "")
@ -102,19 +108,16 @@ rule softenv_analysis:
wildcard_constraints: wildcard_constraints:
date="\d+" date="\d+"
input: input:
today_files = expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv", expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS artifact = ARTIFACTS
), )
dirs = expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
),
output: output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
shell: shell:
f""" f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py sources-stats {{output.sources_stats}} {{input.today_files}} {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py pkgs-changes {{output.pkgs_changes}} {{input.dirs}} {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
""" """
rule buildstatus_analysis: rule buildstatus_analysis:
@ -125,7 +128,7 @@ rule buildstatus_analysis:
artifact = ARTIFACTS artifact = ARTIFACTS
), ),
output: output:
f"{ANALYSIS_DIR}/build_status/{{date}}.csv", f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
shell: shell:
f""" f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}} {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
@ -135,17 +138,14 @@ rule artifact_analysis:
wildcard_constraints: wildcard_constraints:
date="\d+" date="\d+"
input: input:
today_files = expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv", expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS artifact = ARTIFACTS
), )
dirs = expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
),
output: output:
f"{ANALYSIS_DIR}/artifact/{{date}}.csv", f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
shell: shell:
f""" f"""
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {{input.dirs}} {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
""" """
# Analysis aggregate: # Analysis aggregate:
@ -159,39 +159,35 @@ rule analysis_aggregate:
rule pkgschgs_aggregate: rule pkgschgs_aggregate:
input: input:
dir = f"{ANALYSIS_DIR}/pkgs_changes", f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
today_file = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
output: output:
f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv" f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv"
shell: shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/pkgs_changes {{output}}"
rule srcsstats_aggregate: rule srcsstats_aggregate:
input: input:
dir = f"{ANALYSIS_DIR}/sources_stats", f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
today_file = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
output: output:
f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv" f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv"
shell: shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/sources_stats {{output}}"
rule artifact_aggregate: rule artifact_aggregate:
input: input:
dir = f"{ANALYSIS_DIR}/artifact", f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
today_file = f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
output: output:
f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
shell: shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/artifact {{output}}"
rule buildstatus_aggregate: rule buildstatus_aggregate:
input: input:
dir = f"{ANALYSIS_DIR}/build_status", f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
today_file = f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
output: output:
f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv" f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv"
shell: shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/build_status {{output}}"
# Plot: # Plot:

View File

@ -1,7 +1,5 @@
#!/bin/bash #!/bin/bash
echo "$@"
MODE=$1 # Either "dirs" or "files", depending on the type of input MODE=$1 # Either "dirs" or "files", depending on the type of input
shift shift
SCRIPT=$1 SCRIPT=$1
@ -11,16 +9,16 @@ if [ $1 = "-t" ]
then then
TYPE=$2 # Used if softenv analysis TYPE=$2 # Used if softenv analysis
shift shift
OUTPUT=$2
shift
else else
OUTPUT=$1 OUTPUT=$1
fi fi
shift shift
INPUT="$@" INPUT="$@"
echo $OUTPUT
# Adding option prefix: # Adding option prefix:
if [ $TYPE != "" ] if [ "$TYPE" != "" ]
then then
TYPE="-t $TYPE" TYPE="-t $TYPE"
fi fi
@ -37,6 +35,5 @@ then
INPUT_FILES="$INPUT_FILES $(find $dir/*.csv -maxdepth 1 -type f)" INPUT_FILES="$INPUT_FILES $(find $dir/*.csv -maxdepth 1 -type f)"
done done
fi fi
echo $INPUT_FILES
python3 $SCRIPT $TYPE -i $INPUT_FILES -o $OUTPUT python3 $SCRIPT $TYPE -i $INPUT_FILES -o $OUTPUT