2024-07-25 18:03:14 +02:00
|
|
|
#!/bin/python3
|
|
|
|
|
|
|
|
"""
|
2024-08-06 16:50:07 +02:00
|
|
|
This script performs an artifact analysis on the outputs of the workflow
|
|
|
|
to generate tables that can then be plotted by another program.
|
2024-07-25 18:03:14 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import csv
|
|
|
|
import os
|
2024-08-07 17:31:35 +02:00
|
|
|
import datetime
|
2024-07-25 18:03:14 +02:00
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
def artifact_changed(table, name):
|
2024-07-26 17:01:59 +02:00
|
|
|
"""
|
2024-08-07 11:22:54 +02:00
|
|
|
Indicates whether the artifact of the given name has changed over time.
|
|
|
|
An artifact becoming unavailable is considered as modified.
|
2024-07-26 17:01:59 +02:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
table: list
|
|
|
|
Artifact hash log table.
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
name: str
|
|
|
|
Name of the artifact to check.
|
|
|
|
|
2024-07-26 17:01:59 +02:00
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
bool
|
|
|
|
True if artifact changed, False otherwise.
|
|
|
|
"""
|
|
|
|
changed = False
|
|
|
|
i = 0
|
2024-08-07 11:22:54 +02:00
|
|
|
artifact_hash = ""
|
2024-07-26 17:01:59 +02:00
|
|
|
while i < len(table) and not changed:
|
2024-08-07 11:22:54 +02:00
|
|
|
row = table[i]
|
|
|
|
if row[2] == name:
|
|
|
|
# If the first hash has not been saved yet:
|
|
|
|
if artifact_hash == "":
|
|
|
|
artifact_hash = row[1] # Hash is in the 2nd column
|
|
|
|
elif row[1] != artifact_hash:
|
|
|
|
changed = True
|
2024-07-26 17:01:59 +02:00
|
|
|
i += 1
|
|
|
|
return changed
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
def artifact_available(table, name):
|
2024-07-26 17:01:59 +02:00
|
|
|
"""
|
2024-08-07 11:22:54 +02:00
|
|
|
Indicates whether the artifact of the given name is still available.
|
2024-07-26 17:01:59 +02:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
table: list
|
|
|
|
Artifact hash log table.
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
name: str
|
|
|
|
Name of the artifact to check.
|
|
|
|
|
2024-07-26 17:01:59 +02:00
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
bool
|
|
|
|
True if artifact is still available, False otherwise.
|
2024-07-25 18:03:14 +02:00
|
|
|
"""
|
2024-07-26 17:01:59 +02:00
|
|
|
available = True
|
2024-08-07 11:22:54 +02:00
|
|
|
for row in table:
|
|
|
|
if row[2] == name:
|
|
|
|
if row[1] == "-1":
|
|
|
|
# -1 means the artifact could not be downloaded. Otherwise,
|
|
|
|
# this column would contain the hash of the artifact.
|
|
|
|
available = False
|
|
|
|
else:
|
|
|
|
available = True
|
|
|
|
# The last log of the artifact hash will determine if the artifact is
|
|
|
|
# currently available or not.
|
2024-07-26 17:01:59 +02:00
|
|
|
return available
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
def analysis(input_table):
|
2024-07-26 17:01:59 +02:00
|
|
|
"""
|
2024-08-07 11:22:54 +02:00
|
|
|
Analyzes the given artifact hash table to determine if the artifacts are
|
2024-07-26 17:01:59 +02:00
|
|
|
still available and didn't change, changed, or aren't available anymore.
|
2024-07-25 18:03:14 +02:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table: str
|
2024-07-25 18:03:14 +02:00
|
|
|
Table to analyse.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
2024-07-26 12:58:00 +02:00
|
|
|
dict
|
|
|
|
Output table of the analysis in the form of a dict with headers as keys.
|
2024-07-25 18:03:14 +02:00
|
|
|
"""
|
2024-07-26 17:01:59 +02:00
|
|
|
artifacts = {"available":0, "unavailable":0, "changed":0}
|
2024-08-07 11:22:54 +02:00
|
|
|
checked = [] # Artifacts that have been checked already
|
|
|
|
for row in input_table:
|
|
|
|
artifact_name = row[2] # Name of the artifact in the 3rd column
|
|
|
|
if artifact_name not in checked:
|
|
|
|
if artifact_available(input_table, artifact_name):
|
|
|
|
artifacts["available"] += 1
|
|
|
|
else:
|
|
|
|
artifacts["unavailable"] += 1
|
|
|
|
if artifact_changed(input_table, artifact_name):
|
|
|
|
artifacts["changed"] += 1
|
|
|
|
checked.append(artifact_name)
|
2024-07-26 17:01:59 +02:00
|
|
|
return artifacts
|
2024-07-25 18:03:14 +02:00
|
|
|
|
|
|
|
def main():
|
|
|
|
# Command line arguments parsing:
|
|
|
|
parser = argparse.ArgumentParser(
|
2024-08-06 16:50:07 +02:00
|
|
|
prog = "artifact_analysis",
|
|
|
|
description =
|
|
|
|
"""
|
|
|
|
This script performs an artifact analysis on the outputs of the workflow
|
|
|
|
to generate tables that can then be plotted by another program.
|
|
|
|
The generated table gives the amount of artifacts that are available
|
|
|
|
or not available, and the amount of artifacts that have been modified
|
|
|
|
over time.
|
|
|
|
"""
|
2024-07-25 18:03:14 +02:00
|
|
|
)
|
2024-08-05 17:19:45 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"-v", "--verbose",
|
|
|
|
action = "store_true",
|
2024-07-25 18:03:14 +02:00
|
|
|
help = "Shows more details on what is being done."
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
2024-08-05 17:19:45 +02:00
|
|
|
"-i", "--input",
|
|
|
|
action = "append",
|
2024-08-19 14:59:08 +02:00
|
|
|
nargs = "+",
|
2024-08-06 16:50:07 +02:00
|
|
|
help =
|
|
|
|
"""
|
|
|
|
The CSV file used as input for the analysis function. Multiple files
|
2024-08-19 14:59:08 +02:00
|
|
|
can be specified at once by separating them with a space.
|
2024-08-06 16:50:07 +02:00
|
|
|
All the input files must be artifact hash logs generated by ECG.
|
|
|
|
""",
|
2024-08-05 17:19:45 +02:00
|
|
|
required = True
|
2024-07-25 18:03:14 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2024-08-05 17:19:45 +02:00
|
|
|
"-o", "--output",
|
2024-08-06 16:50:07 +02:00
|
|
|
help =
|
|
|
|
"""
|
|
|
|
Path to the output CSV file that will be created by the analysis function.
|
|
|
|
""",
|
2024-08-05 17:19:45 +02:00
|
|
|
required = True
|
2024-07-25 18:03:14 +02:00
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
2024-08-19 14:59:08 +02:00
|
|
|
inputs = args.input
|
2024-08-05 17:19:45 +02:00
|
|
|
output_path = args.output
|
2024-07-25 18:03:14 +02:00
|
|
|
|
2024-08-05 17:19:45 +02:00
|
|
|
# Parsing the input files:
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table = []
|
2024-08-19 14:59:08 +02:00
|
|
|
for i in inputs:
|
|
|
|
for path in i:
|
|
|
|
input_file = open(path)
|
|
|
|
input_table += list(csv.reader(input_file))
|
|
|
|
input_file.close()
|
2024-07-25 18:03:14 +02:00
|
|
|
|
|
|
|
# Analyzing the inputs:
|
2024-08-07 11:22:54 +02:00
|
|
|
output_dict = analysis(input_table)
|
2024-08-07 17:31:35 +02:00
|
|
|
# Adding the current time to every row:
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
timestamp = str(datetime.datetime.timestamp(now))
|
|
|
|
output_dict["timestamp"] = timestamp
|
2024-08-06 16:50:07 +02:00
|
|
|
|
2024-07-26 12:58:00 +02:00
|
|
|
# Writing analysis to output file:
|
2024-08-07 11:22:54 +02:00
|
|
|
output_file = open(output_path, "w+")
|
2024-07-26 12:58:00 +02:00
|
|
|
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
|
2024-08-07 19:51:21 +02:00
|
|
|
# dict_writer.writeheader()
|
2024-07-26 12:58:00 +02:00
|
|
|
dict_writer.writerow(output_dict)
|
2024-07-25 18:03:14 +02:00
|
|
|
output_file.close()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|