diff --git a/analysis/output_analysis.py b/analysis/artifact_analysis.py similarity index 50% rename from analysis/output_analysis.py rename to analysis/artifact_analysis.py index b8a4538..3f6038c 100755 --- a/analysis/output_analysis.py +++ b/analysis/artifact_analysis.py @@ -1,40 +1,18 @@ #!/bin/python3 """ - This script will analyze the outputs from ECG to generate tables that will - be later plotted. + This script performs an artifact analysis on the outputs of the workflow + to generate tables that can then be plotted by another program. + + The generated table gives the amount of artifacts that are available + or not available, and the amount of artifacts that have been modified + over time. """ import argparse import csv import os -def softenv_analysis(input_tables): - """ - Analyzes the given package lists tables to determine the number of artifacts - using a package manager, Git packages or misc packages. - - Parameters - ---------- - input_tables: str - Tables to analyse. - - Returns - ------- - dict - Output table of the analysis in the form of a dict with headers as keys. - """ - pkgmgr = {} - i = 0 - for table in input_tables: - for row in table: - # Third column is the package source: - if row[2] not in pkgmgr: - pkgmgr[row[2]] = 1 - else: - pkgmgr[row[2]] += 1 - return pkgmgr - def artifact_changed(table): """ Indicates whether the artifact involved in the given hash log table @@ -81,7 +59,7 @@ def artifact_available(table): available = False return available -def artifact_analysis(input_tables): +def analysis(input_tables): """ Analyzes the given artifact hash tables to determine if the artifacts are still available and didn't change, changed, or aren't available anymore. @@ -106,75 +84,44 @@ def artifact_analysis(input_tables): artifacts["changed"] += 1 return artifacts -def buildstatus_analysis(input_tables): - """ - Analyzes the given build status tables to count the results of the building - of the Dockerfile for each category. - - Parameters - ---------- - input_tables: str - Tables to analyse. - - Returns - ------- - dict - Output table of the analysis in the form of a dict with headers as keys. - """ - buildstatus = {} - for table in input_tables: - # # There has never been any error: - # if table == [[]]: - # if "never_failed" not in buildstatus: - # buildstatus["never_failed"] = 1 - # else: - # buildstatus["never_failed"] += 1 - # # There has been an error at least once: - # else: - for row in table: - # Third column is the result: - if row[2] not in buildstatus: - buildstatus[row[2]] = 1 - else: - buildstatus[row[2]] += 1 - return buildstatus - def main(): # Command line arguments parsing: parser = argparse.ArgumentParser( - prog = "output_analysis", - description = "This script analyzes the outputs from ECG to create " \ - "tables." + prog = "artifact_analysis", + description = + """ + This script performs an artifact analysis on the outputs of the workflow + to generate tables that can then be plotted by another program. + The generated table gives the amount of artifacts that are available + or not available, and the amount of artifacts that have been modified + over time. + """ ) parser.add_argument( "-v", "--verbose", action = "store_true", help = "Shows more details on what is being done." ) - parser.add_argument( - "-t", "--analysis-type", - help = "Specify the type of analysis to run.", - choices = ["soft-env", "artifact", "build-status"], - required = True - ) parser.add_argument( "-i", "--input", action = "append", - help = "The CSV file used as input for the analysis function." \ - "Multiple files can be specified by repeating this argument" \ - "with different paths. All the input files must be outputs" \ - "from ECG.", + help = + """ + The CSV file used as input for the analysis function. Multiple files + can be specified by repeating this argument with different paths. + All the input files must be artifact hash logs generated by ECG. + """, required = True ) parser.add_argument( "-o", "--output", - help = "Path to the output CSV file that will be created by the " \ - "analysis function.", + help = + """ + Path to the output CSV file that will be created by the analysis function. + """, required = True ) args = parser.parse_args() - - analysis_type = args.analysis_type input_paths = args.input output_path = args.output @@ -188,12 +135,8 @@ def main(): # Analyzing the inputs: output_file = open(output_path, "w+") output_dict = {} - if analysis_type == "soft-env": - output_dict = softenv_analysis(input_tables) - elif analysis_type == "artifact": - output_dict = artifact_analysis(input_tables) - elif analysis_type == "build-status": - output_dict = buildstatus_analysis(input_tables) + output_dict = analysis(input_tables) + # Writing analysis to output file: dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys()) dict_writer.writeheader() diff --git a/analysis/buildstatus_analysis.py b/analysis/buildstatus_analysis.py new file mode 100755 index 0000000..e6362ef --- /dev/null +++ b/analysis/buildstatus_analysis.py @@ -0,0 +1,109 @@ +#!/bin/python3 + +""" + This script performs a build status analysis on the outputs of the workflow + to generate tables that can then be plotted by another program. + + The generated table gives the amount of images that have been built + sucessfully, and the amount of images that failed to build, for each + category of error. +""" + +import argparse +import csv +import os + +def analysis(input_tables): + """ + Analyzes the given build status tables to count the results of the building + of the Dockerfile for each category. + + Parameters + ---------- + input_tables: str + Tables to analyse. + + Returns + ------- + dict + Output table of the analysis in the form of a dict with headers as keys. + """ + buildstatus = {} + for table in input_tables: + # # There has never been any error: + # if table == [[]]: + # if "never_failed" not in buildstatus: + # buildstatus["never_failed"] = 1 + # else: + # buildstatus["never_failed"] += 1 + # # There has been an error at least once: + # else: + for row in table: + # Third column is the result: + if row[2] not in buildstatus: + buildstatus[row[2]] = 1 + else: + buildstatus[row[2]] += 1 + return buildstatus + +def main(): + # Command line arguments parsing: + parser = argparse.ArgumentParser( + prog = "buildstatus_analysis", + description = + """ + This script performs a build status analysis on the outputs of the + workflow to generate tables that can then be plotted by another program. + The generated table gives the amount of images that have been + built sucessfully, and the amount of images that failed to build, + for each category of error. + """ + ) + parser.add_argument( + "-v", "--verbose", + action = "store_true", + help = "Shows more details on what is being done." + ) + parser.add_argument( + "-i", "--input", + action = "append", + help = + """ + The CSV file used as input for the analysis function. Multiple files + can be specified by repeating this argument with different paths. + All the input files must be build status logs generated by ECG. + """, + required = True + ) + parser.add_argument( + "-o", "--output", + help = + """ + Path to the output CSV file that will be created by the analysis function. + """, + required = True + ) + args = parser.parse_args() + input_paths = args.input + output_path = args.output + + # Parsing the input files: + input_tables = [] + for path in input_paths: + input_file = open(path) + input_tables.append(list(csv.reader(input_file))) + input_file.close() + + # Analyzing the inputs: + output_file = open(output_path, "w+") + output_dict = {} + output_dict = analysis(input_tables) + + # Writing analysis to output file: + dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys()) + dict_writer.writeheader() + dict_writer.writerow(output_dict) + output_file.close() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/analysis/softenv_analysis.py b/analysis/softenv_analysis.py new file mode 100755 index 0000000..72e0d68 --- /dev/null +++ b/analysis/softenv_analysis.py @@ -0,0 +1,121 @@ +#!/bin/python3 + +""" + This script performs a software environment analysis on the outputs + of the workflow to generate tables that can then be plotted by another + program. + + Depending on the type of analysis, multiple tables can be generated: + - sources-stats: Number of packages per source (a package manager, git or + misc) + - pkg-changes: Number of packages that changed over time (0 if only one file + is given, since it will only include the package list of a single execution) + - pkg-per-container: Number of packages per container +""" + +import argparse +import csv +import os + +def sources_stats(input_tables): + """ + Analyzes the given package lists tables to determine the number of artifacts + using a package manager, Git packages or misc packages. + + Parameters + ---------- + input_tables: str + Tables to analyse. + + Returns + ------- + dict + Output table of the analysis in the form of a dict with headers as keys. + """ + pkgmgr = {} + i = 0 + for table in input_tables: + for row in table: + # Third column is the package source: + if row[2] not in pkgmgr: + pkgmgr[row[2]] = 1 + else: + pkgmgr[row[2]] += 1 + return pkgmgr + +def main(): + # Command line arguments parsing: + parser = argparse.ArgumentParser( + prog = "softenv_analysis", + description = + """ + This script performs a software environment analysis on the outputs + of the workflow to generate tables that can then be plotted + by another program. + """ + ) + parser.add_argument( + "-v", "--verbose", + action = "store_true", + help = "Shows more details on what is being done." + ) + parser.add_argument( + "-t", "--analysis-type", + help = + """ + Specify the type of software analysis to run. Depending on the + type of analysis, multiple tables can be generated: + the number of packages per source (a package manager, git or misc) + by using `sources-stats`, + the number of packages that changed over time (0 if only + one file is given, since it will only include the package list + of a single execution) by using `pkg-changes`, + the number of packages per container by specifying `pkgs-per-container`. + """, + choices = ["sources-stats", "pkg-changes", "pkgs-per-container"], + required = True + ) + parser.add_argument( + "-i", "--input", + action = "append", + help = + """ + The CSV file used as input for the analysis function. Multiple files + can be specified by repeating this argument with different paths. + All the input files must be package lists generated by ECG. + """, + required = True + ) + parser.add_argument( + "-o", "--output", + help = + """ + Path to the output CSV file that will be created by the analysis function. + """, + required = True + ) + args = parser.parse_args() + input_paths = args.input + output_path = args.output + analysis_type = args.analysis_type + + # Parsing the input files: + input_tables = [] + for path in input_paths: + input_file = open(path) + input_tables.append(list(csv.reader(input_file))) + input_file.close() + + # Analyzing the inputs: + output_file = open(output_path, "w+") + if analysis_type == "sources-stats": + output_dict = sources_stats(input_tables) + + # Writing analysis to output file: + dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys()) + dict_writer.writeheader() + dict_writer.writerow(output_dict) + output_file.close() + +if __name__ == "__main__": + main() \ No newline at end of file