Adding missing changes from the previous commit because I wasn't in the root folder...
This commit is contained in:
parent
cb1a62217c
commit
0bbed9d0f5
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,5 +1,6 @@
|
||||
pkglists/*
|
||||
output/*
|
||||
outputs/*
|
||||
cache/*
|
||||
examples/*
|
||||
.snakemake/*
|
||||
|
12
README.md
12
README.md
@ -79,18 +79,17 @@ Where:
|
||||
Run `ecg.py` as follow:
|
||||
|
||||
```
|
||||
python3 ecg.py <config_file> -p <pkglist_path> -l <log_file> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
|
||||
python3 ecg.py <config_file> -p <pkglist_path> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
|
||||
```
|
||||
|
||||
Where:
|
||||
- `<config_file>` is the configuration file of the artifact in JSON format. WARNING: The name of the file (without the extension) must comply with the Docker image naming convention: only characters allowed are lowercase letters and numbers, separated with either one "." maximum, or two "_" maximum, or an unlimited number of "-", and should be of 128 characters maximum.
|
||||
- `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
|
||||
- `<log_file>` is the path to the file where to log the output of the program.
|
||||
- `<build_status_file>` is the path to the file where to write the build status of the Docker image given in the configuration file.
|
||||
- `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
|
||||
- `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage. If not specified, cache is disabled.
|
||||
|
||||
You can also use `--docker-cache` to enable the cache of the Docker layers, and `-v` to show the full output of the script in your terminal (by default, it is only written to the specified `log_file`).
|
||||
You can also use `--docker-cache` to enable the cache of the Docker layers.
|
||||
|
||||
##### Outputs
|
||||
|
||||
@ -103,10 +102,6 @@ The list of packages installed in the container, depending on the sources (a pac
|
||||
|
||||
For Git packages, the hash of the last commit is used as version number. For miscellaneous packages, the hash of the file that has been used to install the package is used as version number. The timestamp corresponds to the time when ECG started building the package list, so it will be the same for each package that has been logged during the same execution of ECG.
|
||||
|
||||
###### Output log
|
||||
|
||||
Just a plain text file containing the output of the script.
|
||||
|
||||
###### Build status file
|
||||
|
||||
The log of the attempts to build the Docker image, in the form of a CSV file, with the following columns in order:
|
||||
@ -158,8 +153,7 @@ The script `softenv_analysis.py` performs a software environment analysis by par
|
||||
|
||||
Depending on the type of analysis, multiple tables can be generated:
|
||||
- `sources-stats`: Number of packages per source (a package manager, `git` or `misc`).
|
||||
- `pkg-changes`: Number of packages that changed over time (`0` if only one file is given, since it will only include the package list of a single execution).
|
||||
- `pkgs-per-container`: Number of packages per container. This analysis hasn't been implemented yet, and may never be.
|
||||
- `pkgs-changes`: Number of packages that changed over time (`0` if only one file is given, since it will only include the package list of a single execution).
|
||||
|
||||
The type of analysis can be specified using the option `-t`.
|
||||
|
||||
|
@ -4,6 +4,7 @@ folder_blacklists: "blacklists"
|
||||
symlink_blacklist: "blacklist.csv"
|
||||
system: "g5k" # can be "local" for local execution
|
||||
prefix: "outputs"
|
||||
analysis_dir: "analysis"
|
||||
|
||||
site: "grenoble"
|
||||
cluster: "dahu"
|
||||
|
108
ecg.py
108
ecg.py
@ -384,11 +384,11 @@ def main():
|
||||
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
|
||||
"""
|
||||
)
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action = 'store_true',
|
||||
help = "Shows more details on what is being done."
|
||||
)
|
||||
# parser.add_argument(
|
||||
# '-v', '--verbose',
|
||||
# action = 'store_true',
|
||||
# help = "Shows more details on what is being done."
|
||||
# )
|
||||
parser.add_argument(
|
||||
"config",
|
||||
help = "The path to the configuration file of the artifact's Docker image."
|
||||
@ -398,11 +398,11 @@ def main():
|
||||
help = "Path to the file where the package list generated by the program should be written.",
|
||||
required = True
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l", "--log-path",
|
||||
help = "Path to the file where to log the output of the program.",
|
||||
required = True
|
||||
)
|
||||
# parser.add_argument(
|
||||
# "-l", "--log-path",
|
||||
# help = "Path to the file where to log the output of the program.",
|
||||
# required = True
|
||||
# )
|
||||
parser.add_argument(
|
||||
"-b", "--build-status",
|
||||
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
|
||||
@ -430,62 +430,62 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setting up the paths of the outputs:
|
||||
log_path = "log.txt" # Output of the program
|
||||
pkglist_path = args.pkg_list
|
||||
log_path = args.log_path
|
||||
buildstatus_path = args.build_status
|
||||
arthashlog_path = args.artifact_hash
|
||||
cache_dir = args.cache_dir
|
||||
# log_path = "log.txt" # Output of the program
|
||||
# log_path = args.log_path
|
||||
|
||||
# Setting up the log: will be displayed both on stdout and to the specified
|
||||
# file:
|
||||
print(f"Output will be stored in {log_path}")
|
||||
logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
|
||||
if args.verbose:
|
||||
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
|
||||
# Setting up the log:
|
||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
|
||||
# # Old version where the script writes its own log to the given file:
|
||||
# print(f"Output will be stored in {log_path}")
|
||||
# logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
|
||||
# if args.verbose:
|
||||
# logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
|
||||
|
||||
# Parsing the input file including the configuration of the artifact's
|
||||
# image:
|
||||
config_path = args.config
|
||||
status = ""
|
||||
try:
|
||||
config_file = open(config_path, "r")
|
||||
config = json.loads(config_file.read())
|
||||
config_file.close()
|
||||
config_file = open(config_path, "r")
|
||||
config = json.loads(config_file.read())
|
||||
config_file.close()
|
||||
|
||||
dl_dir = None
|
||||
# If not using cache, creates a temporary directory:
|
||||
if cache_dir == None:
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
dl_dir = tmp_dir.name
|
||||
dl_dir = None
|
||||
# If not using cache, creates a temporary directory:
|
||||
if cache_dir == None:
|
||||
tmp_dir = tempfile.TemporaryDirectory()
|
||||
dl_dir = tmp_dir.name
|
||||
else:
|
||||
use_cache = True
|
||||
dl_dir = cache_dir
|
||||
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
|
||||
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
|
||||
# If download was successful:
|
||||
if artifact_dir != "":
|
||||
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
|
||||
if return_code == 0:
|
||||
status = "success"
|
||||
check_env(config, artifact_dir, artifact_name, pkglist_path)
|
||||
remove_image(config, artifact_name)
|
||||
else:
|
||||
use_cache = True
|
||||
dl_dir = cache_dir
|
||||
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
|
||||
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
|
||||
# If download was successful:
|
||||
if artifact_dir != "":
|
||||
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
|
||||
if return_code == 0:
|
||||
status = "success"
|
||||
check_env(config, artifact_dir, artifact_name, pkglist_path)
|
||||
remove_image(config, artifact_name)
|
||||
else:
|
||||
status = builderror_identifier(build_output)
|
||||
# Creates file if not already:
|
||||
pathlib.Path(pkglist_path).touch()
|
||||
# If download failed, we need to save the error to the build status log:
|
||||
else:
|
||||
logging.fatal("Artifact could not be downloaded!")
|
||||
status = "artifact_unavailable"
|
||||
except Exception as err:
|
||||
# Handles any possible script's own crashes:
|
||||
formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
|
||||
log_file = open(log_path, "a")
|
||||
log_file.write(formatted_err)
|
||||
log_file.close()
|
||||
logging.error(formatted_err)
|
||||
status = "script_crash"
|
||||
status = builderror_identifier(build_output)
|
||||
# Creates file if not already:
|
||||
pathlib.Path(pkglist_path).touch()
|
||||
# If download failed, we need to save the error to the build status log:
|
||||
else:
|
||||
logging.fatal("Artifact could not be downloaded!")
|
||||
status = "artifact_unavailable"
|
||||
# except Exception as err:
|
||||
# # Handles any possible script's own crashes:
|
||||
# formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
|
||||
# log_file = open(log_path, "a")
|
||||
# log_file.write(formatted_err)
|
||||
# log_file.close()
|
||||
# logging.error(formatted_err)
|
||||
# status = "script_crash"
|
||||
buildresult_saver(status, buildstatus_path, config_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
2
run.sh
2
run.sh
@ -13,4 +13,4 @@ then
|
||||
mkdir $CACHE_DIR
|
||||
fi
|
||||
|
||||
./ecg.py $TESTFILE -v -p $OUTPUT_PATH/pkglist.csv -l $OUTPUT_PATH/log.txt -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache
|
||||
./ecg.py $TESTFILE -v -p $OUTPUT_PATH/pkglist.csv -b $OUTPUT_PATH/build_status.csv -a $OUTPUT_PATH/artifact_hash.csv -c $CACHE_DIR --docker-cache
|
@ -13,6 +13,7 @@ BLACKLIST = config["symlink_blacklist"]
|
||||
EXTENSION = "json"
|
||||
SYSTEM = config["system"]
|
||||
PREFIX = config["prefix"]
|
||||
ANALYSIS_DIR = config["analysis_dir"]
|
||||
|
||||
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST)
|
||||
|
||||
@ -22,12 +23,12 @@ rule all:
|
||||
folder=["pkgs", "build_status", "artifact_hash"],\
|
||||
artifact=ARTIFACTS,\
|
||||
date=DATE
|
||||
),
|
||||
),
|
||||
expand(f"{PREFIX}/{{folder}}/{{artifact}}/{{date}}.txt",\
|
||||
folder=["logs"],\
|
||||
artifact=ARTIFACTS,\
|
||||
date=DATE
|
||||
),
|
||||
),
|
||||
f"{BLACKLIST_FOLDER}/{DATE}.csv"
|
||||
|
||||
rule check_all:
|
||||
@ -49,7 +50,7 @@ rule check_artifact:
|
||||
"""
|
||||
|
||||
SHELLS_ECG = {
|
||||
"local": f"python3 {{input.ecg}} -l {{output.log}} -p {{output.pkg}} -b {{output.build_status}} -a {{output.artifact_hash}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION}",
|
||||
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
|
||||
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
|
||||
}
|
||||
|
||||
@ -58,8 +59,9 @@ rule run_ecg:
|
||||
"flake.nix",
|
||||
"flake.lock",
|
||||
ecg="ecg.py",
|
||||
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
|
||||
execo_wrapper="workflow/scripts/submission_g5k.py",
|
||||
oar_wrapper="workflow/scripts/ecg_wrapper.oar.bash",
|
||||
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
|
||||
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}"
|
||||
output:
|
||||
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
|
||||
@ -73,9 +75,29 @@ rule update_blacklist:
|
||||
input:
|
||||
BLACKLIST,
|
||||
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",\
|
||||
artifact=ARTIFACTS)
|
||||
artifact=ARTIFACTS
|
||||
)
|
||||
output:
|
||||
f"{BLACKLIST_FOLDER}/{{date}}.csv"
|
||||
shell:
|
||||
# We need to ignore lines where build is successful:
|
||||
f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
|
||||
|
||||
rule analysis:
|
||||
input:
|
||||
log = f"{PREFIX}/logs/*/*.txt",
|
||||
pkg = f"{PREFIX}/pkgs/*/*.csv",
|
||||
build_status = f"{PREFIX}/build_status/*/*.csv",
|
||||
artifact_hash = f"{PREFIX}/artifact_hash/*/*.csv",
|
||||
softenv_analysis = "softenv_analysis.py",
|
||||
buildstatis_analysis = "buildstatis_analysis.py",
|
||||
artifact_analysis = "artifact_analysis.py",
|
||||
SOFTENV_TYPES = ["sources-stats", "pkgs-changes"]
|
||||
output:
|
||||
expand(f"{ANALYSIS_DIR}/{{folder}}/{{artifact}}/{{date}}.csv",\
|
||||
folder=["sources_stats", "pkgs_changes", "build_status", "artifact"],\
|
||||
artifact=ARTIFACTS,\
|
||||
date=DATE
|
||||
),
|
||||
shell:
|
||||
expand("python3 {{input.softenv_analysis}} -t {{analysis_type}}")
|
14
workflow/scripts/ecg_wrapper.sh
Executable file
14
workflow/scripts/ecg_wrapper.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
ECG=$1
|
||||
CONFIG=$2
|
||||
PKGLIST=$3
|
||||
BUILD_STATUS=$4
|
||||
ARTHASH_LOG=$5
|
||||
OUTPUT_LOG=$6
|
||||
|
||||
python3 $ECG -p $PKGLIST -b $BUILD_STATUS -a $ARTHASH_LOG $CONFIG > $OUTPUT_LOG 2> $OUTPUT_LOG
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "${CONFIG}, `date +%s.%N`, script_crash" >> ${BUILD_STATUS}; exit 0;
|
||||
fi
|
Loading…
Reference in New Issue
Block a user