study-docker-repro-longevity/analysis/artifact_analysis.py

147 lines
3.9 KiB
Python
Executable File

#!/bin/python3
"""
This script performs an artifact analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
The generated table gives the amount of artifacts that are available
or not available, and the amount of artifacts that have been modified
over time.
"""
import argparse
import csv
import os
def artifact_changed(table):
"""
Indicates whether the artifact involved in the given hash log table
has changed over time.
Parameters
----------
table: list
Artifact hash log table.
Returns
-------
bool
True if artifact changed, False otherwise.
"""
changed = False
# Hash is in the 2nd column:
artifact_hash = table[0][1]
i = 0
while i < len(table) and not changed:
if table[i][1] != artifact_hash:
changed = True
i += 1
return changed
def artifact_available(table):
"""
Indicates whether the artifact involved in the given hash log table
is still available.
Parameters
----------
table: list
Artifact hash log table.
Returns
-------
bool
True if artifact is still available, False otherwise.
"""
available = True
# We check the last line to check current availability:
if table[-1][1] == "":
available = False
return available
def analysis(input_tables):
"""
Analyzes the given artifact hash tables to determine if the artifacts are
still available and didn't change, changed, or aren't available anymore.
Parameters
----------
input_tables: str
Table to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
artifacts = {"available":0, "unavailable":0, "changed":0}
for table in input_tables:
if artifact_available(table):
artifacts["available"] += 1
else:
artifacts["unavailable"] += 1
if artifact_changed(table):
artifacts["changed"] += 1
return artifacts
def main():
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "artifact_analysis",
description =
"""
This script performs an artifact analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
The generated table gives the amount of artifacts that are available
or not available, and the amount of artifacts that have been modified
over time.
"""
)
parser.add_argument(
"-v", "--verbose",
action = "store_true",
help = "Shows more details on what is being done."
)
parser.add_argument(
"-i", "--input",
action = "append",
help =
"""
The CSV file used as input for the analysis function. Multiple files
can be specified by repeating this argument with different paths.
All the input files must be artifact hash logs generated by ECG.
""",
required = True
)
parser.add_argument(
"-o", "--output",
help =
"""
Path to the output CSV file that will be created by the analysis function.
""",
required = True
)
args = parser.parse_args()
input_paths = args.input
output_path = args.output
# Parsing the input files:
input_tables = []
for path in input_paths:
input_file = open(path)
input_tables.append(list(csv.reader(input_file)))
input_file.close()
# Analyzing the inputs:
output_file = open(output_path, "w+")
output_dict = {}
output_dict = analysis(input_tables)
# Writing analysis to output file:
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
dict_writer.writeheader()
dict_writer.writerow(output_dict)
output_file.close()
if __name__ == "__main__":
main()