#!/bin/python3 """ This script performs a software environment analysis on the outputs of the workflow to generate tables that can then be plotted by another program. """ import argparse import csv import os import datetime # All possible package sources, initialized to 0. # This is required to make the column of the result table deterministic, # so they can be determined without the header in the CSV file. pkgsources = {"dpkg":0, "rpm":0, "pacman":0, "pip":0, "conda":0, "git":0, "misc":0} def sources_stats(input_table, pkgsources): """ Analyzes the given package lists table to determine the number of artifacts using a package manager, Git packages or misc packages. Parameters ---------- input_table: str Table to analyse. pkgsources: dict A dictionnary that contains all the possible package sources as keys, with all keys' value initialized at 0. Returns ------- dict Output table of the analysis in the form of a dict with headers as keys. """ i = 0 for row in input_table: # Third column is the package source: if row[2] not in pkgsources: pkgsources[row[2]] = 1 else: pkgsources[row[2]] += 1 return pkgsources def pkg_changed(table, artifact_name, pkgname, pkgsource): """ Analyzes the given package lists table to determine if the given package changed for the given artifact. Parameters ---------- table: str Table to analyse. artifact_name: str Name of the artifact for which we want to analyze package changes. pkgname: str The package we want to track changes. pkgsource: str Source of the package, in case there is multiple packages with the same name but different sources. Returns ------- changed: bool True if the version number of the package changed over time, False otherwise. """ changed = False i = 0 pkgver = "" while i < len(table) and not changed: row = table[i] # Artifact name is in the 4th column, package name in the first, # and package source in the 3rd: if row[3] == artifact_name and row[0] == pkgname and row[2] == pkgsource: # If the first version number has not been saved yet: if pkgver == "": pkgver = row[1] # Package version is in the 2nd column elif row[1] != pkgver: changed = True i += 1 return changed def pkgs_changes(input_table, pkgsources): """ Analyzes the given package lists table to determine the number of packages that changed for every package source. Parameters ---------- input_table: str Table to analyse. pkgsources: dict A dictionnary that contains all the possible package sources as keys, with all keys' value initialized at 0. Returns ------- dict Output table of the analysis in the form of a dict with headers as keys. """ # Key is the artifact name, and value is a list of tuples constituted # of the package that has been checked and its source for this artifact: # FIXME: Memory usage? checked_artifacts = {} i = 0 for row in input_table: artifact_name = row[3] # Artifact name is in the 4th column if artifact_name not in checked_artifacts.keys(): checked_artifacts[artifact_name] = [] pkgname = row[0] # Package name is in the first column pkgsource = row[2] # Package source is in the 3rd column if (pkgname, pkgsource) not in checked_artifacts[artifact_name]: if pkg_changed(input_table, artifact_name, pkgname, pkgsource): pkgsources[pkgsource] += 1 checked_artifacts[artifact_name].append((pkgname, pkgsource)) return pkgsources def pkgs_per_container(input_table): print("ERROR: Not implemented!") def main(): # Command line arguments parsing: parser = argparse.ArgumentParser( prog = "softenv_analysis", description = """ This script performs a software environment analysis on the outputs of the workflow to generate tables that can then be plotted by another program. """ ) parser.add_argument( "-v", "--verbose", action = "store_true", help = "Shows more details on what is being done." ) parser.add_argument( "-t", "--analysis-type", help = """ Specify the type of software analysis to run. Depending on the type of analysis, multiple tables can be generated: the number of packages per source (a package manager, git or misc) by using `sources-stats`, the number of packages that changed over time (0 if only one file is given, since it will only include the package list of a single execution) by using `pkg-changes`, the number of packages per container by specifying `pkgs-per-container`. """, choices = ["sources-stats", "pkgs-changes", "pkgs-per-container"], required = True ) parser.add_argument( "-i", "--input", action = "append", nargs = "+", help = """ The CSV file used as input for the analysis function. Multiple files can be specified at once by separating them with a space. All the input files must be package lists generated by ECG. """, required = True ) parser.add_argument( "-o", "--output", help = """ Path to the output CSV file that will be created by the analysis function. """, required = True ) args = parser.parse_args() inputs = args.input output_path = args.output analysis_type = args.analysis_type # Parsing the input files: input_table = [] for i in inputs: for path in i: input_file = open(path) input_table += list(csv.reader(input_file)) input_file.close() # Analyzing the inputs: if analysis_type == "sources-stats": output_dict = sources_stats(input_table, pkgsources) elif analysis_type == "pkgs-changes": output_dict = pkgs_changes(input_table, pkgsources) elif analysis_type == "pkgs-per-container": output_dict = pkgs_per_container(input_table) # Adding the current time to every row: now = datetime.datetime.now() timestamp = str(datetime.datetime.timestamp(now)) output_dict["timestamp"] = timestamp # Writing analysis to output file: output_file = open(output_path, "w+") dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys()) # dict_writer.writeheader() dict_writer.writerow(output_dict) output_file.close() if __name__ == "__main__": main()