study-docker-repro-longevity/analysis/buildstatus_analysis.py

102 lines
3.1 KiB
Python
Executable File

#!/bin/python3
"""
This script performs a build status analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
"""
import argparse
import csv
import os
import datetime
def analysis(input_table):
"""
Analyzes the given build status table to count the results of the building
of the Dockerfile for each category.
Parameters
----------
input_table: str
Table to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
# All build status, initialized to 0.
# This is required to make the column of the result table deterministic,
# so they can be determined without the header in the CSV file.
buildstatus = {"success":0, "package_unavailable":0, "baseimage_unavailable":0, "artifact_unavailable":0, "dockerfile_not_found":0, "script_crash":0, "job_time_exceeded":0, "unknown_error":0}
for row in input_table:
# Third column is the result:
buildstatus[row[2]] += 1
return buildstatus
def main():
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "buildstatus_analysis",
description =
"""
This script performs a build status analysis on the outputs of the
workflow to generate tables that can then be plotted by another program.
The generated table gives the amount of images that have been
built successfully, and the amount of images that failed to build,
for each category of error.
"""
)
parser.add_argument(
"-v", "--verbose",
action = "store_true",
help = "Shows more details on what is being done."
)
parser.add_argument(
"-i", "--input",
action = "append",
nargs = "+",
help =
"""
The CSV file used as input for the analysis function. Multiple files
can be specified at once by separating them with a space.
All the input files must be build status logs generated by ECG.
""",
required = True
)
parser.add_argument(
"-o", "--output",
help =
"""
Path to the output CSV file that will be created by the analysis function.
""",
required = True
)
args = parser.parse_args()
inputs = args.input
output_path = args.output
# Parsing the input files:
input_table = []
for i in inputs:
for path in i:
input_file = open(path)
input_table += list(csv.reader(input_file))
input_file.close()
# Analyzing the inputs:
output_dict = analysis(input_table)
# Adding the current time to every row:
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
output_dict["timestamp"] = timestamp
# Writing analysis to output file:
output_file = open(output_path, "w+")
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
# dict_writer.writeheader()
dict_writer.writerow(output_dict)
output_file.close()
if __name__ == "__main__":
main()