#!/bin/python3 """ This script performs an artifact analysis on the outputs of the workflow to generate tables that can then be plotted by another program. The generated table gives the amount of artifacts that are available or not available, and the amount of artifacts that have been modified over time. """ import argparse import csv import os def artifact_changed(table, name): """ Indicates whether the artifact of the given name has changed over time. An artifact becoming unavailable is considered as modified. Parameters ---------- table: list Artifact hash log table. name: str Name of the artifact to check. Returns ------- bool True if artifact changed, False otherwise. """ changed = False i = 0 artifact_hash = "" while i < len(table) and not changed: row = table[i] if row[2] == name: # If the first hash has not been saved yet: if artifact_hash == "": artifact_hash = row[1] # Hash is in the 2nd column elif row[1] != artifact_hash: changed = True i += 1 return changed def artifact_available(table, name): """ Indicates whether the artifact of the given name is still available. Parameters ---------- table: list Artifact hash log table. name: str Name of the artifact to check. Returns ------- bool True if artifact is still available, False otherwise. """ available = True for row in table: if row[2] == name: if row[1] == "-1": # -1 means the artifact could not be downloaded. Otherwise, # this column would contain the hash of the artifact. available = False else: available = True # The last log of the artifact hash will determine if the artifact is # currently available or not. return available def analysis(input_table): """ Analyzes the given artifact hash table to determine if the artifacts are still available and didn't change, changed, or aren't available anymore. Parameters ---------- input_table: str Table to analyse. Returns ------- dict Output table of the analysis in the form of a dict with headers as keys. """ artifacts = {"available":0, "unavailable":0, "changed":0} checked = [] # Artifacts that have been checked already for row in input_table: artifact_name = row[2] # Name of the artifact in the 3rd column if artifact_name not in checked: if artifact_available(input_table, artifact_name): artifacts["available"] += 1 else: artifacts["unavailable"] += 1 if artifact_changed(input_table, artifact_name): artifacts["changed"] += 1 checked.append(artifact_name) return artifacts def main(): # Command line arguments parsing: parser = argparse.ArgumentParser( prog = "artifact_analysis", description = """ This script performs an artifact analysis on the outputs of the workflow to generate tables that can then be plotted by another program. The generated table gives the amount of artifacts that are available or not available, and the amount of artifacts that have been modified over time. """ ) parser.add_argument( "-v", "--verbose", action = "store_true", help = "Shows more details on what is being done." ) parser.add_argument( "-i", "--input", action = "append", help = """ The CSV file used as input for the analysis function. Multiple files can be specified by repeating this argument with different paths. All the input files must be artifact hash logs generated by ECG. """, required = True ) parser.add_argument( "-o", "--output", help = """ Path to the output CSV file that will be created by the analysis function. """, required = True ) args = parser.parse_args() input_paths = args.input output_path = args.output # Parsing the input files: input_table = [] for path in input_paths: input_file = open(path) input_table += list(csv.reader(input_file)) input_file.close() # Analyzing the inputs: output_dict = analysis(input_table) # Writing analysis to output file: output_file = open(output_path, "w+") dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys()) dict_writer.writeheader() dict_writer.writerow(output_dict) output_file.close() if __name__ == "__main__": main()