Since analysis and aggregation rules had directories in their input, Snakemake complained because of these missing directories, even if he was supposed to create them. So I removed the directories from the inputs of the rules, and used global variables instead (and for some, just wrote the directory path directly in the shell command).

Fixed the missing option specifier for the analysis wrapper for the softenv analysis. Also fixed the way arguments are parsed by the wrapper.
This commit is contained in:
antux18 2024-08-26 14:40:43 +02:00
parent 97447e59a1
commit 9c6ce2d150
2 changed files with 29 additions and 36 deletions

View File

@ -12,6 +12,8 @@ BLACKLIST_FOLDER = config["folder_blacklists"]
EXTENSION = "json"
SYSTEM = config["system"]
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
PREFIX = config["prefix"]
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
SHELLS_ECG = {
@ -24,6 +26,12 @@ ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
ANALYSIS_SCRIPTS_DIR = "analysis"
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh"
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
))
SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
))
PLOT_DIR = config["plot_dir"]
PLOT_SCRIPT = "plot/plot.r"
@ -33,8 +41,6 @@ PLOT_HEADERS = {
"artifact": "available unavailable changed"
}
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
rule all:
input:
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf",
@ -81,7 +87,7 @@ rule run_ecg:
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv"
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv",
shell:
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "")
@ -102,19 +108,16 @@ rule softenv_analysis:
wildcard_constraints:
date="\d+"
input:
today_files = expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",
expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS
),
dirs = expand(f"{PREFIX}/pkgs/{{artifact}}",
artifact = ARTIFACTS
),
)
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
shell:
f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py sources-stats {{output.sources_stats}} {{input.today_files}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py pkgs-changes {{output.pkgs_changes}} {{input.dirs}}
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
"""
rule buildstatus_analysis:
@ -125,7 +128,7 @@ rule buildstatus_analysis:
artifact = ARTIFACTS
),
output:
f"{ANALYSIS_DIR}/build_status/{{date}}.csv",
f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
shell:
f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
@ -135,17 +138,14 @@ rule artifact_analysis:
wildcard_constraints:
date="\d+"
input:
today_files = expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS
),
dirs = expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS
),
)
output:
f"{ANALYSIS_DIR}/artifact/{{date}}.csv",
f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
shell:
f"""
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {{input.dirs}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
"""
# Analysis aggregate:
@ -159,39 +159,35 @@ rule analysis_aggregate:
rule pkgschgs_aggregate:
input:
dir = f"{ANALYSIS_DIR}/pkgs_changes",
today_file = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/pkgs_changes {{output}}"
rule srcsstats_aggregate:
input:
dir = f"{ANALYSIS_DIR}/sources_stats",
today_file = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/sources_stats {{output}}"
rule artifact_aggregate:
input:
dir = f"{ANALYSIS_DIR}/artifact",
today_file = f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/artifact {{output}}"
rule buildstatus_aggregate:
input:
dir = f"{ANALYSIS_DIR}/build_status",
today_file = f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}"
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/build_status {{output}}"
# Plot:

View File

@ -1,7 +1,5 @@
#!/bin/bash
echo "$@"
MODE=$1 # Either "dirs" or "files", depending on the type of input
shift
SCRIPT=$1
@ -11,16 +9,16 @@ if [ $1 = "-t" ]
then
TYPE=$2 # Used if softenv analysis
shift
OUTPUT=$2
shift
else
OUTPUT=$1
fi
shift
INPUT="$@"
echo $OUTPUT
# Adding option prefix:
if [ $TYPE != "" ]
if [ "$TYPE" != "" ]
then
TYPE="-t $TYPE"
fi
@ -37,6 +35,5 @@ then
INPUT_FILES="$INPUT_FILES $(find $dir/*.csv -maxdepth 1 -type f)"
done
fi
echo $INPUT_FILES
python3 $SCRIPT $TYPE -i $INPUT_FILES -o $OUTPUT