Adding missing changes from the previous commit because I wasn't in the root folder...

This commit is contained in:
antux18 2024-08-15 12:23:34 +02:00
parent cb1a62217c
commit 0bbed9d0f5
8 changed files with 101 additions and 69 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
pkglists/*
output/*
outputs/*
cache/*
examples/*
.snakemake/*

View File

@ -79,18 +79,17 @@ Where:
Run `ecg.py` as follow:
```
python3 ecg.py <config_file> -p <pkglist_path> -l <log_file> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
python3 ecg.py <config_file> -p <pkglist_path> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
```
Where:
- `<config_file>` is the configuration file of the artifact in JSON format. WARNING: The name of the file (without the extension) must comply with the Docker image naming convention: only characters allowed are lowercase letters and numbers, separated with either one "." maximum, or two "_" maximum, or an unlimited number of "-", and should be of 128 characters maximum.
- `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
- `<log_file>` is the path to the file where to log the output of the program.
- `<build_status_file>` is the path to the file where to write the build status of the Docker image given in the configuration file.
- `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
- `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage. If not specified, cache is disabled.
You can also use `--docker-cache` to enable the cache of the Docker layers, and `-v` to show the full output of the script in your terminal (by default, it is only written to the specified `log_file`).
You can also use `--docker-cache` to enable the cache of the Docker layers.
##### Outputs
@ -103,10 +102,6 @@ The list of packages installed in the container, depending on the sources (a pac
For Git packages, the hash of the last commit is used as version number. For miscellaneous packages, the hash of the file that has been used to install the package is used as version number. The timestamp corresponds to the time when ECG started building the package list, so it will be the same for each package that has been logged during the same execution of ECG.
###### Output log
Just a plain text file containing the output of the script.
###### Build status file
The log of the attempts to build the Docker image, in the form of a CSV file, with the following columns in order:
@ -158,8 +153,7 @@ The script `softenv_analysis.py` performs a software environment analysis by par
Depending on the type of analysis, multiple tables can be generated:
- `sources-stats`: Number of packages per source (a package manager, `git` or `misc`).
- `pkg-changes`: Number of packages that changed over time (`0` if only one file is given, since it will only include the package list of a single execution).
- `pkgs-per-container`: Number of packages per container. This analysis hasn't been implemented yet, and may never be.
- `pkgs-changes`: Number of packages that changed over time (`0` if only one file is given, since it will only include the package list of a single execution).
The type of analysis can be specified using the option `-t`.

View File

@ -4,6 +4,7 @@ folder_blacklists: "blacklists"
symlink_blacklist: "blacklist.csv"
system: "g5k" # can be "local" for local execution
prefix: "outputs"
analysis_dir: "analysis"
site: "grenoble"
cluster: "dahu"

108
ecg.py
View File

@ -384,11 +384,11 @@ def main():
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
"""
)
parser.add_argument(
'-v', '--verbose',
action = 'store_true',
help = "Shows more details on what is being done."
)
# parser.add_argument(
# '-v', '--verbose',
# action = 'store_true',
# help = "Shows more details on what is being done."
# )
parser.add_argument(
"config",
help = "The path to the configuration file of the artifact's Docker image."
@ -398,11 +398,11 @@ def main():
help = "Path to the file where the package list generated by the program should be written.",
required = True
)
parser.add_argument(
"-l", "--log-path",
help = "Path to the file where to log the output of the program.",
required = True
)
# parser.add_argument(
# "-l", "--log-path",
# help = "Path to the file where to log the output of the program.",
# required = True
# )
parser.add_argument(
"-b", "--build-status",
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
@ -430,62 +430,62 @@ def main():
args = parser.parse_args()
# Setting up the paths of the outputs:
log_path = "log.txt" # Output of the program
pkglist_path = args.pkg_list
log_path = args.log_path
buildstatus_path = args.build_status
arthashlog_path = args.artifact_hash
cache_dir = args.cache_dir
# log_path = "log.txt" # Output of the program
# log_path = args.log_path
# Setting up the log: will be displayed both on stdout and to the specified
# file:
print(f"Output will be stored in {log_path}")
logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
if args.verbose:
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
# Setting up the log:
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
# # Old version where the script writes its own log to the given file:
# print(f"Output will be stored in {log_path}")
# logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
# if args.verbose:
# logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
# Parsing the input file including the configuration of the artifact's
# image:
config_path = args.config
status = ""
try:
config_file = open(config_path, "r")
config = json.loads(config_file.read())
config_file.close()
config_file = open(config_path, "r")
config = json.loads(config_file.read())
config_file.close()
dl_dir = None
# If not using cache, creates a temporary directory:
if cache_dir == None:
tmp_dir = tempfile.TemporaryDirectory()
dl_dir = tmp_dir.name
dl_dir = None
# If not using cache, creates a temporary directory:
if cache_dir == None:
tmp_dir = tempfile.TemporaryDirectory()
dl_dir = tmp_dir.name
else:
use_cache = True
dl_dir = cache_dir
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
# If download was successful:
if artifact_dir != "":
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
if return_code == 0:
status = "success"
check_env(config, artifact_dir, artifact_name, pkglist_path)
remove_image(config, artifact_name)
else:
use_cache = True
dl_dir = cache_dir
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
# If download was successful:
if artifact_dir != "":
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
if return_code == 0:
status = "success"
check_env(config, artifact_dir, artifact_name, pkglist_path)
remove_image(config, artifact_name)
else:
status = builderror_identifier(build_output)
# Creates file if not already:
pathlib.Path(pkglist_path).touch()
# If download failed, we need to save the error to the build status log:
else:
logging.fatal("Artifact could not be downloaded!")
status = "artifact_unavailable"
except Exception as err:
# Handles any possible script's own crashes:
formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
log_file = open(log_path, "a")
log_file.write(formatted_err)
log_file.close()
logging.error(formatted_err)
status = "script_crash"
status = builderror_identifier(build_output)
# Creates file if not already:
pathlib.Path(pkglist_path).touch()
# If download failed, we need to save the error to the build status log:
else:
logging.fatal("Artifact could not be downloaded!")
status = "artifact_unavailable"
# except Exception as err:
# # Handles any possible script's own crashes:
# formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
# log_file = open(log_path, "a")
# log_file.write(formatted_err)
# log_file.close()
# logging.error(formatted_err)
# status = "script_crash"
buildresult_saver(status, buildstatus_path, config_path)
if __name__ == "__main__":

2
run.sh
View File

@ -13,4 +13,4 @@ then
mkdir $CACHE_DIR
fi
./ecg.py $TESTFILE -v -p $OUTPUT_PATH/pkglist.csv -l $OUTPUT_PATH/log.txt -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache
./ecg.py $TESTFILE -v -p $OUTPUT_PATH/pkglist.csv -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache

View File

@ -13,6 +13,7 @@ BLACKLIST = config["symlink_blacklist"]
EXTENSION = "json"
SYSTEM = config["system"]
PREFIX = config["prefix"]
ANALYSIS_DIR = config["analysis_dir"]
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST)
@ -22,12 +23,12 @@ rule all:
folder=["pkgs", "build_status", "artifact_hash"],\
artifact=ARTIFACTS,\
date=DATE
),
),
expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{date}}.txt",\
folder=["logs"],\
artifact=ARTIFACTS,\
date=DATE
),
),
f"{BLACKLIST_FOLDER}/{DATE}.csv"
rule check_all:
@ -49,7 +50,7 @@ rule check_artifact:
"""
SHELLS_ECG = {
"local": f"python3 {{input.ecg}} -l {{output.log}} -p {{output.pkg}} -b {{output.build_status}} -a {{output.artifact_hash}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION}",
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
}
@ -58,8 +59,9 @@ rule run_ecg:
"flake.nix",
"flake.lock",
ecg="ecg.py",
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
execo_wrapper="workflow/scripts/submission_g5k.py",
oar_wrapper="workflow/scripts/ecg_wrapper.oar.bash",
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}"
output:
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
@ -73,9 +75,29 @@ rule update_blacklist:
input:
BLACKLIST,
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\
artifact=ARTIFACTS)
artifact=ARTIFACTS
)
output:
f"{BLACKLIST_FOLDER}/{{date}}.csv"
shell:
# We need to ignore lines where build is successful:
f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
rule analysis:
input:
log = f"{PREFIX}/logs/*/*.txt",
pkg = f"{PREFIX}/pkgs/*/*.csv",
build_status = f"{PREFIX}/build_status/*/*.csv",
artifact_hash = f"{PREFIX}/artifact_hash/*/*.csv",
softenv_analysis = "softenv_analysis.py",
buildstatis_analysis = "buildstatis_analysis.py",
artifact_analysis = "artifact_analysis.py",
SOFTENV_TYPES = ["sources-stats", "pkgs-changes"]
output:
expand(f"{ANALYSIS_DIR}/{{folder}}/{{artifact}}/{{date}}.csv",\
folder=["sources_stats", "pkgs_changes", "build_status", "artifact"],\
artifact=ARTIFACTS,\
date=DATE
),
shell:
expand("python3 {{input.softenv_analysis}} -t {{analysis_type}}")

14
workflow/scripts/ecg_wrapper.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
ECG=$1
CONFIG=$2
PKGLIST=$3
BUILD_STATUS=$4
ARTHASH_LOG=$5
OUTPUT_LOG=$6
python3 $ECG -p $PKGLIST -b $BUILD_STATUS -a $ARTHASH_LOG $CONFIG > $OUTPUT_LOG 2> $OUTPUT_LOG
if [ $? -ne 0 ]
then
echo "${CONFIG}, `date +%s.%N`, script_crash" >> ${BUILD_STATUS}; exit 0;
fi