Added build status analysis. Now logging build status even if build is successful, to make analysis easier (as mentionned in #26). Added a new error type.

This commit is contained in:
antux18 2024-07-26 19:00:25 +02:00
parent 4db323fb03
commit e8b25b74a7
4 changed files with 81 additions and 36 deletions

View File

@ -37,7 +37,7 @@ Where:
- `<config_file>` is the configuration file of the artifact in JSON format. An example is given in `artifacts_json/test.json`. WARNING: The name of the file (without the extension) must comply with the Docker image naming convention: only characters allowed are lowercase letters and numbers, separated with either one "." maximum, or two "_" maximum, or an unlimited number of "-", and should be of 128 characters maximum.
- `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
- `<log_file>` is the path to the file where to log the output of the program.
- `<build_status_file>` is the path to the file where to write the build summary of the Docker image given in the configuration file.
- `<build_status_file>` is the path to the file where to write the build status of the Docker image given in the configuration file.
- `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
- `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage. If not specified, cache is disabled.
@ -58,16 +58,19 @@ Just a plain text file containing the output of the script.
### Build status file
The log of the failed attempts to build the Docker image, in the form of a CSV file, with the following columns in order:
The log of the attempts to build the Docker image, in the form of a CSV file, with the following columns in order:
| Config file path | Timestamp | Reason category |
| Config file path | Timestamp | Result |
|------------------|-----------|-----------------|
The timestamp corresponds to when the error is being logged, not to when it happened.
The timestamp corresponds to when the result is being logged, not to when it happened.
The following are the categories of reasons explaining why the building failed:
The following are the possible results of the build:
- `success`: The Docker image has been built successfully.
- `package_unavailable`: A command requested the installation of a package that is not available.
- `baseimage_unavailable`: The base image needed for this container is not available.
- `artifact_unavailable`: The artifact could not be downloaded.
- `dockerfile_not_found`: No Dockerfile has been found in the location specified in the configuration file.
- `unknown_error`: Any other error.
### Artifact hash log

View File

@ -25,6 +25,7 @@ def softenv_analysis(input_tables):
Output table of the analysis in the form of a dict with headers as keys.
"""
pkgmgr = {}
i = 0
for table in input_tables:
for row in table:
# Third column is the package source:
@ -107,7 +108,8 @@ def artifact_analysis(input_tables):
def buildstatus_analysis(input_tables):
"""
Analyzes the given build status tables.
Analyzes the given build status tables to count the results of the building
of the Dockerfile for each category.
Parameters
----------
@ -119,7 +121,23 @@ def buildstatus_analysis(input_tables):
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
return {}
buildstatus = {}
for table in input_tables:
# # There has never been any error:
# if table == [[]]:
# if "never_failed" not in buildstatus:
# buildstatus["never_failed"] = 1
# else:
# buildstatus["never_failed"] += 1
# # There has been an error at least once:
# else:
for row in table:
# Third column is the result:
if row[2] not in buildstatus:
buildstatus[row[2]] = 1
else:
buildstatus[row[2]] += 1
return buildstatus
def main():
# Command line arguments parsing:

77
ecg.py
View File

@ -138,16 +138,54 @@ def download_sources(config, arthashlog_path, dl_dir, use_cache):
logging.info(f"Cache found for {url}, skipping download")
return artifact_dir
def buildstatus_saver(output, buildstatus_path, config_path):
def builderror_identifier(output):
"""
Parses the given 'output' to indentify the errors, then saves them to the
'build_status' file.
Parses the given 'output' to indentify the error.
Parameters
----------
output: str
Output of Docker.
Returns
-------
found_error: str
The error that has been found in the output, according to the
categories. If there is more than one, only the latest is taken into
account.
"""
# Possible error messages given by 'docker build' and their category.
# The key is the category, the value is a tuple of error messages belonging to
# to this category:
build_errors = {
"package_unavailable":("Unable to locate package"),
"baseimage_unavailable":("manifest unknown: manifest unknown"),
"dockerfile_not_found":("Dockerfile: no such file or directory")
}
found_error = ""
unknown_error = True
for error_cat, error in build_errors.items():
if error in output:
unknown_error = False
found_error = error_cat
if unknown_error:
found_error = "unknown_error"
return found_error
def buildresult_saver(result, buildstatus_path, config_path):
"""
Saves the given result in the 'build_status' file.
Parameters
----------
result: str
The result of the build. Either a Docker 'build' error
(see 'builderror_identifier'), another type of error
(for instance 'artifact_unavailable'), or 'success'
if build is successful.
buildstatus_path: str
Path to the build status file.
@ -158,32 +196,15 @@ def buildstatus_saver(output, buildstatus_path, config_path):
-------
None
"""
# Possible error messages given by 'docker build' and their category.
# The key is the category, the value is a tuple of error messages belonging to
# to this category:
build_errors = {
"package_unavailable":("Unable to locate package"),
"baseimage_unavailable":("manifest unknown: manifest unknown"),
"artifact_unavailable":("artifact_unavailable")
}
file_exists = os.path.exists(buildstatus_path)
buildstatus_file = open(buildstatus_path, "a")
artifact_name = os.path.basename(config_path).split(".")[0]
# # Writing header in case file didn't exist:
# if not file_exists:
# buildstatus_file.write("yaml_path,timestamp,error")
unknown_error = True
for error_cat, error in build_errors.items():
if error in output:
unknown_error = False
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
buildstatus_file.write(f"{artifact_name},{timestamp},{error_cat}\n")
if unknown_error:
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
buildstatus_file.write(f"{artifact_name},{timestamp},unknown_error\n")
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
buildstatus_file.close()
def build_image(config, src_dir, image_name, docker_cache = False):
@ -427,23 +448,25 @@ def main():
use_cache = True
dl_dir = cache_dir
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache)
status = ""
# If download was successful:
if artifact_dir != "":
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
status = ""
if return_code == 0:
status = "success"
check_env(config, artifact_dir, artifact_name, pkglist_path)
remove_image(config, artifact_name)
# Creates file if not already:
pathlib.Path(buildstatus_path).touch()
else:
status = builderror_identifier(build_output)
# Creates file if not already:
pathlib.Path(pkglist_path).touch()
buildstatus_saver(build_output, buildstatus_path, config_path)
# If download failed, we need to save the error to the build status log:
else:
logging.fatal("Artifact could not be downloaded!")
buildstatus_saver("artifact_unavailable", buildstatus_path, config_path)
status = "artifact_unavailable"
buildresult_saver(status, buildstatus_path, config_path)
if __name__ == "__main__":
main()

View File

@ -33,7 +33,7 @@ rule all:
rule check_all:
input:
expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS)
rule check_artifact:
input:
@ -77,4 +77,5 @@ rule update_blacklist:
output:
f"{BLACKLIST_FOLDER}/{{date}}.csv"
shell:
f"cat {{input}} > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
# We need to ignore lines where build is successful:
f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"