Added build status analysis. Now logging build status even if build is successful, to make analysis easier (as mentionned in #26). Added a new error type.

2024-07-26 19:00:25 +02:00 · 2024-07-26 19:00:25 +02:00 · e8b25b74a7
commit e8b25b74a7
parent 4db323fb03
4 changed files with 81 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -37,7 +37,7 @@ Where:
 - `<config_file>` is the configuration file of the artifact in JSON format. An example is given in `artifacts_json/test.json`. WARNING: The name of the file (without the extension) must comply with the Docker image naming convention: only characters allowed are lowercase letters and numbers, separated with either one "." maximum, or two "_" maximum, or an unlimited number of "-", and should be of 128 characters maximum.
 - `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
 - `<log_file>` is the path to the file where to log the output of the program.
- `<build_status_file>` is the path to the file where to write the build summary of the Docker image given in the configuration file.
+- `<build_status_file>` is the path to the file where to write the build status of the Docker image given in the configuration file.
 - `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
 - `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage. If not specified, cache is disabled.

@ -58,16 +58,19 @@ Just a plain text file containing the output of the script.

 ### Build status file

-The log of the failed attempts to build the Docker image, in the form of a CSV file, with the following columns in order:
+The log of the attempts to build the Docker image, in the form of a CSV file, with the following columns in order:

-| Config file path | Timestamp | Reason category |
+| Config file path | Timestamp | Result          |
 |------------------|-----------|-----------------|

-The timestamp corresponds to when the error is being logged, not to when it happened.
+The timestamp corresponds to when the result is being logged, not to when it happened.

-The following are the categories of reasons explaining why the building failed:
+The following are the possible results of the build:
+- `success`: The Docker image has been built successfully.
 - `package_unavailable`: A command requested the installation of a package that is not available.
 - `baseimage_unavailable`: The base image needed for this container is not available.
+- `artifact_unavailable`: The artifact could not be downloaded.
+- `dockerfile_not_found`: No Dockerfile has been found in the location specified in the configuration file.
 - `unknown_error`: Any other error.

 ### Artifact hash log
--- a/analysis/output_analysis.py
+++ b/analysis/output_analysis.py
@ -25,6 +25,7 @@ def softenv_analysis(input_tables):
        Output table of the analysis in the form of a dict with headers as keys.
    """
    pkgmgr = {}
+    i = 0
    for table in input_tables:
        for row in table:
            # Third column is the package source:
@ -107,7 +108,8 @@ def artifact_analysis(input_tables):

 def buildstatus_analysis(input_tables):
    """
-    Analyzes the given build status tables.
+    Analyzes the given build status tables to count the results of the building
+    of the Dockerfile for each category.

    Parameters
    ----------
@ -119,7 +121,23 @@ def buildstatus_analysis(input_tables):
    dict
        Output table of the analysis in the form of a dict with headers as keys.
    """
-    return {}
+    buildstatus = {}
+    for table in input_tables:
+        # # There has never been any error:
+        # if table == [[]]:
+        #     if "never_failed" not in buildstatus:
+        #             buildstatus["never_failed"] = 1
+        #     else:
+        #         buildstatus["never_failed"] += 1
+        # # There has been an error at least once:
+        # else:
+        for row in table:
+            # Third column is the result:
+            if row[2] not in buildstatus:
+                buildstatus[row[2]] = 1
+            else:
+                buildstatus[row[2]] += 1
+    return buildstatus

 def main():
    # Command line arguments parsing:
--- a/ecg.py
+++ b/ecg.py
@ -138,16 +138,54 @@ def download_sources(config, arthashlog_path, dl_dir, use_cache):
        logging.info(f"Cache found for {url}, skipping download")
    return artifact_dir

-def buildstatus_saver(output, buildstatus_path, config_path):
+def builderror_identifier(output):
+
    """
-    Parses the given 'output' to indentify the errors, then saves them to the
-    'build_status' file.
+    Parses the given 'output' to indentify the error.

    Parameters
    ----------
    output: str
        Output of Docker.

+    Returns
+    -------
+    found_error: str
+        The error that has been found in the output, according to the
+        categories. If there is more than one, only the latest is taken into
+        account.
+    """
+    # Possible error messages given by 'docker build' and their category.
+    # The key is the category, the value is a tuple of error messages belonging to
+    # to this category:
+    build_errors = {
+        "package_unavailable":("Unable to locate package"),
+        "baseimage_unavailable":("manifest unknown: manifest unknown"),
+        "dockerfile_not_found":("Dockerfile: no such file or directory")
+    }
+
+    found_error = ""
+    unknown_error = True
+    for error_cat, error in build_errors.items():
+        if error in output:
+            unknown_error = False
+            found_error = error_cat
+    if unknown_error:
+        found_error = "unknown_error"
+    return found_error
+
+def buildresult_saver(result, buildstatus_path, config_path):
+    """
+    Saves the given result in the 'build_status' file.
+
+    Parameters
+    ----------
+    result: str
+        The result of the build. Either a Docker 'build' error
+        (see 'builderror_identifier'), another type of error
+        (for instance 'artifact_unavailable'), or 'success'
+        if build is successful.
+
    buildstatus_path: str
        Path to the build status file.

@ -158,32 +196,15 @@ def buildstatus_saver(output, buildstatus_path, config_path):
    -------
    None
    """
-    # Possible error messages given by 'docker build' and their category.
-    # The key is the category, the value is a tuple of error messages belonging to
-    # to this category:
-    build_errors = {
-        "package_unavailable":("Unable to locate package"),
-        "baseimage_unavailable":("manifest unknown: manifest unknown"),
-        "artifact_unavailable":("artifact_unavailable")
-    }
-
    file_exists = os.path.exists(buildstatus_path)
    buildstatus_file = open(buildstatus_path, "a")
    artifact_name = os.path.basename(config_path).split(".")[0]
    # # Writing header in case file didn't exist:
    # if not file_exists:
    #     buildstatus_file.write("yaml_path,timestamp,error")
-    unknown_error = True
-    for error_cat, error in build_errors.items():
-        if error in output:
-            unknown_error = False
-            now = datetime.datetime.now()
-            timestamp = str(datetime.datetime.timestamp(now))
-            buildstatus_file.write(f"{artifact_name},{timestamp},{error_cat}\n")
-    if unknown_error:
-        now = datetime.datetime.now()
-        timestamp = str(datetime.datetime.timestamp(now))
-        buildstatus_file.write(f"{artifact_name},{timestamp},unknown_error\n")
+    now = datetime.datetime.now()
+    timestamp = str(datetime.datetime.timestamp(now))
+    buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
    buildstatus_file.close()

 def build_image(config, src_dir, image_name, docker_cache = False):
@ -427,23 +448,25 @@ def main():
        use_cache = True
        dl_dir = cache_dir
    artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache)
+    status = ""
    # If download was successful:
    if artifact_dir != "":
        artifact_name = os.path.splitext(os.path.basename(config_path))[0]
        return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
+        status = ""
        if return_code == 0:
+            status = "success"
            check_env(config, artifact_dir, artifact_name, pkglist_path)
            remove_image(config, artifact_name)
-            # Creates file if not already:
-            pathlib.Path(buildstatus_path).touch()
        else:
+            status = builderror_identifier(build_output)
            # Creates file if not already:
            pathlib.Path(pkglist_path).touch()
-            buildstatus_saver(build_output, buildstatus_path, config_path)
    # If download failed, we need to save the error to the build status log:
    else:
        logging.fatal("Artifact could not be downloaded!")
-        buildstatus_saver("artifact_unavailable", buildstatus_path, config_path)
+        status = "artifact_unavailable"
+    buildresult_saver(status, buildstatus_path, config_path)

 if __name__ == "__main__":
    main()
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@ -33,7 +33,7 @@ rule all:
 rule check_all:
  input:
    expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS)
-    
+

 rule check_artifact:
  input:
@ -77,4 +77,5 @@ rule update_blacklist:
  output:
    f"{BLACKLIST_FOLDER}/{{date}}.csv"
  shell:
-    f"cat {{input}} > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"
+    # We need to ignore lines where build is successful:
+    f"cat {{input}} | grep -v ',success' > {{output}} && rm -rf {BLACKLIST} && ln -s {{output}} {BLACKLIST}"