study-docker-repro-longevity/analysis/buildstatus_analysis.py

#!/bin/python3

"""
    This script performs a build status analysis on the outputs of the workflow
    to generate tables that can then be plotted by another program.
"""

import argparse
import csv
import os
import datetime

def analysis(input_table):
    """
    Analyzes the given build status table to count the results of the building
    of the Dockerfile for each category.

    Parameters
    ----------
    input_table: str
        Table to analyse.

    Returns
    -------
    dict
        Output table of the analysis in the form of a dict with headers as keys.
    """
    # All build status, initialized to 0.
    # This is required to make the column of the result table deterministic,
    # so they can be determined without the header in the CSV file.
    buildstatus = {"success":0, "package_install_failed":0, "baseimage_unavailable":0, "artifact_unavailable":0, "dockerfile_not_found":0, "script_crash":0, "job_time_exceeded":0, "unknown_error":0}
    for row in input_table:
        # Third column is the result:
        buildstatus[row[2]] += 1
    return buildstatus

def main():
    # Command line arguments parsing:
    parser = argparse.ArgumentParser(
        prog = "buildstatus_analysis",
        description =
        """
        This script performs a build status analysis on the outputs of the
        workflow to generate tables that can then be plotted by another program.
        The generated table gives the amount of images that have been
        built successfully, and the amount of images that failed to build,
        for each category of error.
        """
    )
    parser.add_argument(
        "-v", "--verbose",
        action = "store_true",
        help = "Shows more details on what is being done."
    )
    parser.add_argument(
        "-i", "--input",
        action = "append",
        nargs = "+",
        help =
        """
        The CSV file used as input for the analysis function. Multiple files
        can be specified at once by separating them with a space.
        All the input files must be build status logs generated by ECG.
        """,
        required = True
    )
    parser.add_argument(
        "-o", "--output",
        help =
        """
        Path to the output CSV file that will be created by the analysis function.
        """,
        required = True
    )
    args = parser.parse_args()
    inputs = args.input
    output_path = args.output

    # Parsing the input files:
    input_table = []
    for i in inputs:
        for path in i:
            input_file = open(path)
            input_table += list(csv.reader(input_file))
            input_file.close()

    # Analyzing the inputs:
    output_dict = analysis(input_table)
    # Adding the current time to every row:
    now = datetime.datetime.now()
    timestamp = str(datetime.datetime.timestamp(now))
    output_dict["timestamp"] = timestamp

    # Writing analysis to output file:
    output_file = open(output_path, "w+")
    dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
    # dict_writer.writeheader()
    dict_writer.writerow(output_dict)
    output_file.close()

if __name__ == "__main__":
    main()