diff --git a/analysis/analysis_wrapper.sh b/analysis/analysis_wrapper.sh deleted file mode 100755 index 005d56c..0000000 --- a/analysis/analysis_wrapper.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -ANALYSIS_TYPE=$1 -OUTPUT=$2 -shift; shift - -ARGS=$@ -INPUT=("${ARGS[@]/#/-i }") -SCRIPT="" -OPT="" - -case ANALYSIS_TYPE in - "softenv") - SCRIPT="softenv_analysis.py" - OPT="-t sources-stats" - ;; - -python3 softenv_analysis.py -t sources-stats $INPUT -o \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 12b2da5..e9dbab3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -21,6 +21,9 @@ SHELLS_ECG = { ANALYSIS_DIR = config["analysis_dir"] ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"] +ANALYSIS_SCRIPTS_DIR = "analysis" +ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh" +AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh" PLOT_DIR = config["plot_dir"] PLOT_SCRIPT = "plot/plot.r" @@ -42,21 +45,10 @@ rule all: analysis_cat = ["sources_stats", "build_status", "artifact"], date = DATE ), - # expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", - # cat = ANALYSIS_CATS, - # date = DATE - # ), - # expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{date}}.csv", - # folder=["pkgs", "build_status", "artifact_hash"], - # artifact=ARTIFACTS, - # date=DATE - # ), - # expand(f"{PREFIX}/logs/{{artifact}}/{{date}}.txt", - # artifact=ARTIFACTS, - # date=DATE - # ), f"{BLACKLIST_FOLDER}/{DATE}.csv" +# Artifacts configuration files: + rule check_all: input: expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS) @@ -74,6 +66,8 @@ rule check_artifact: nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)' """ +# ECG: + rule run_ecg: input: "flake.nix", @@ -102,66 +96,60 @@ rule update_blacklist: # We need to ignore lines where build is successful: f"cat {{input}} | grep -v ',success' > {{output}} || true" -# rule analysis: -# input: -# expand(f"{PREFIX}/{{output_dir}}/{{artifact}}/{{date}}.csv", -# output_dir = ECG_OUTPUTS, -# artifact = ARTIFACTS, -# # date = get_analysis_dates("{PREFIX}/{output_dir}") -# date = glob_wildcards("{PREFIX}/{output_dir}/{artifact}/{date}.csv").date -# ) +# Analysis: rule softenv_analysis: wildcard_constraints: date="\d+" input: - sources_stats = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", - artifact = ARTIFACTS, - date = DATE + today_files = expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv", + artifact = ARTIFACTS + ), + dirs = expand(f"{PREFIX}/pkgs/{{artifact}}", + artifact = ARTIFACTS ), - pkgs_changes = expand(f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", - artifact = ARTIFACTS, - date = get_analysis_dates(f"{PREFIX}/pkgs") - ) output: sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" shell: f""" - python3 analysis/softenv_analysis.py -t sources-stats -i {{input.sources_stats}} -o {{output.sources_stats}} - python3 analysis/softenv_analysis.py -t pkgs-changes -i {{input.pkgs_changes}} -o {{output.pkgs_changes}} + {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py sources-stats {{output.sources_stats}} {{input.today_files}} + {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py pkgs-changes {{output.pkgs_changes}} {{input.dirs}} """ rule buildstatus_analysis: wildcard_constraints: date="\d+" input: - expand(f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv", - artifact = ARTIFACTS, - date = DATE - ) + expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv", + artifact = ARTIFACTS + ), output: f"{ANALYSIS_DIR}/build_status/{{date}}.csv", shell: f""" - python3 analysis/buildstatus_analysis.py -i {{input}} -o {{output}} + {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}} """ rule artifact_analysis: wildcard_constraints: date="\d+" input: - expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv", - artifact = ARTIFACTS, - date = get_analysis_dates(f"{PREFIX}/artifact_hash") - ) + today_files = expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv", + artifact = ARTIFACTS + ), + dirs = expand(f"{PREFIX}/artifact_hash/{{artifact}}", + artifact = ARTIFACTS + ), output: f"{ANALYSIS_DIR}/artifact/{{date}}.csv", shell: f""" - python3 analysis/artifact_analysis.py -i {{input}} -o {{output}} + {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {{input.dirs}} """ +# Analysis aggregate: + rule analysis_aggregate: input: expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", @@ -169,56 +157,43 @@ rule analysis_aggregate: date = DATE ) -# rule single_aggregate: -# input: -# expand(f"{ANALYSIS_DIR}/{{{{cat}}}}/{{date}}.csv", -# date = get_analysis_dates(f"{ANALYSIS_DIR}/{{wildcards.cat}}") -# # date = glob_wildcards("{ANALYSIS_DIR}/{cat}/{date}.csv").date -# ) -# output: -# f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv" -# shell: -# f"cat {{input}} > {{output}}" - rule pkgschgs_aggregate: input: - expand(f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv", - date = get_analysis_dates(f"{ANALYSIS_DIR}/pkgs_changes") - ) + dir = f"{ANALYSIS_DIR}/pkgs_changes", + today_file = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" output: f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv" shell: - f"cat {{input}} > {{output}}" + f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" rule srcsstats_aggregate: input: - expand(f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", - date = get_analysis_dates(f"{ANALYSIS_DIR}/sources_stats") - ) + dir = f"{ANALYSIS_DIR}/sources_stats", + today_file = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv" output: f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv" shell: - f"cat {{input}} > {{output}}" + f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" rule artifact_aggregate: input: - expand(f"{ANALYSIS_DIR}/artifact/{{date}}.csv", - date = get_analysis_dates(f"{ANALYSIS_DIR}/artifact") - ) + dir = f"{ANALYSIS_DIR}/artifact", + today_file = f"{ANALYSIS_DIR}/artifact/{{date}}.csv" output: f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" shell: - f"cat {{input}} > {{output}}" + f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" rule buildstatus_aggregate: input: - expand(f"{ANALYSIS_DIR}/build_status/{{date}}.csv", - date = get_analysis_dates(f"{ANALYSIS_DIR}/build_status") - ) + dir = f"{ANALYSIS_DIR}/build_status", + today_file = f"{ANALYSIS_DIR}/build_status/{{date}}.csv" output: f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv" shell: - f"cat {{input}} > {{output}}" + f"{AGGREGATE_WRAPPER} {{input.dir}} {{output}}" + +# Plot: rule all_plot: input: diff --git a/workflow/scripts/aggregate_wrapper.sh b/workflow/scripts/aggregate_wrapper.sh new file mode 100755 index 0000000..fb69d97 --- /dev/null +++ b/workflow/scripts/aggregate_wrapper.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +INPUT_DIR=$1 + +INPUT=$(find $INPUT_DIR/*.csv -maxdepth 1 -type f) +OUTPUT=$2 + +cat $INPUT > $OUTPUT \ No newline at end of file diff --git a/workflow/scripts/analysis_wrapper.sh b/workflow/scripts/analysis_wrapper.sh new file mode 100755 index 0000000..59a5407 --- /dev/null +++ b/workflow/scripts/analysis_wrapper.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +echo "$@" + +MODE=$1 # Either "dirs" or "files", depending on the type of input +shift +SCRIPT=$1 +shift +TYPE="" +if [ $1 = "-t" ] +then + TYPE=$2 # Used if softenv analysis + shift +else + OUTPUT=$1 +fi +shift +INPUT="$@" + +echo $OUTPUT + +# Adding option prefix: +if [ $TYPE != "" ] +then + TYPE="-t $TYPE" +fi + +# If inputs are files, then we just use that as input for the script: +INPUT_FILES=$INPUT +# If inputs are directories, we need to explore every single one of them +# to find the input files to pass to the script: +if [ $MODE = "dirs" ] +then + INPUT_FILES="" + for dir in $INPUT + do + INPUT_FILES="$INPUT_FILES $(find $dir/*.csv -maxdepth 1 -type f)" + done +fi +echo $INPUT_FILES + +python3 $SCRIPT $TYPE -i $INPUT_FILES -o $OUTPUT \ No newline at end of file diff --git a/workflow/utils.smk b/workflow/utils.smk index 9b71ab0..f695f08 100644 --- a/workflow/utils.smk +++ b/workflow/utils.smk @@ -1,29 +1,6 @@ import csv import os -def get_analysis_dates(directory): - outputs = [] - if os.path.exists(directory): - for file in os.listdir(directory): - if not os.path.isdir(os.path.join(directory, file)): - outputs.append(os.path.splitext(file)[0]) - today = datetime.datetime.now().strftime("%Y%m%d") - if today not in outputs: - outputs.append(today) - return outputs - -# def find_last_blacklist(blacklist_dir_path): -# last_blacklist = "0.csv" -# for blacklist in os.listdir(blacklist_dir_path): -# if not os.path.isdir(blacklist): -# # We want the latest one, so the one that has the most recent date -# # as file name: -# curbl_date = int(os.path.splitext(blacklist)[0]) -# lastbl_date = int(os.path.splitext(last_blacklist)[0]) -# if curbl_date > lastbl_date: -# last_blacklist = blacklist -# return last_blacklist - def get_blacklisted(blacklist_dir_path): blacklisted = set() if os.path.exists(blacklist_dir_path):