Separated the analysis into 3 different scripts.

2024-08-06 16:50:07 +02:00 · 2024-08-06 16:50:07 +02:00 · 58dd52e403
commit 58dd52e403
parent 74228cd1c5
3 changed files with 258 additions and 85 deletions
--- a/analysis/artifact_analysis.py
+++ b/analysis/artifact_analysis.py
@ -1,40 +1,18 @@
 #!/bin/python3

 """
-    This script will analyze the outputs from ECG to generate tables that will
-    be later plotted.
+    This script performs an artifact analysis on the outputs of the workflow
+    to generate tables that can then be plotted by another program.
+
+    The generated table gives the amount of artifacts that are available
+    or not available, and the amount of artifacts that have been modified
+    over time.
 """

 import argparse
 import csv
 import os

-def softenv_analysis(input_tables):
-    """
-    Analyzes the given package lists tables to determine the number of artifacts
-    using a package manager, Git packages or misc packages.
-
-    Parameters
-    ----------
-    input_tables: str
-        Tables to analyse.
-
-    Returns
-    -------
-    dict
-        Output table of the analysis in the form of a dict with headers as keys.
-    """
-    pkgmgr = {}
-    i = 0
-    for table in input_tables:
-        for row in table:
-            # Third column is the package source:
-            if row[2] not in pkgmgr:
-                pkgmgr[row[2]] = 1
-            else:
-                pkgmgr[row[2]] += 1
-    return pkgmgr
-
 def artifact_changed(table):
    """
    Indicates whether the artifact involved in the given hash log table
@ -81,7 +59,7 @@ def artifact_available(table):
        available = False
    return available

-def artifact_analysis(input_tables):
+def analysis(input_tables):
    """
    Analyzes the given artifact hash tables to determine if the artifacts are
    still available and didn't change, changed, or aren't available anymore.
@ -106,75 +84,44 @@ def artifact_analysis(input_tables):
            artifacts["changed"] += 1
    return artifacts

-def buildstatus_analysis(input_tables):
-    """
-    Analyzes the given build status tables to count the results of the building
-    of the Dockerfile for each category.
-
-    Parameters
-    ----------
-    input_tables: str
-        Tables to analyse.
-
-    Returns
-    -------
-    dict
-        Output table of the analysis in the form of a dict with headers as keys.
-    """
-    buildstatus = {}
-    for table in input_tables:
-        # # There has never been any error:
-        # if table == [[]]:
-        #     if "never_failed" not in buildstatus:
-        #             buildstatus["never_failed"] = 1
-        #     else:
-        #         buildstatus["never_failed"] += 1
-        # # There has been an error at least once:
-        # else:
-        for row in table:
-            # Third column is the result:
-            if row[2] not in buildstatus:
-                buildstatus[row[2]] = 1
-            else:
-                buildstatus[row[2]] += 1
-    return buildstatus
-
 def main():
    # Command line arguments parsing:
    parser = argparse.ArgumentParser(
-        prog = "output_analysis",
-        description = "This script analyzes the outputs from ECG to create " \
-            "tables."
+        prog = "artifact_analysis",
+        description =
+        """
+        This script performs an artifact analysis on the outputs of the workflow
+        to generate tables that can then be plotted by another program.
+        The generated table gives the amount of artifacts that are available
+        or not available, and the amount of artifacts that have been modified
+        over time.
+        """
    )
    parser.add_argument(
        "-v", "--verbose",
        action = "store_true",
        help = "Shows more details on what is being done."
    )
-    parser.add_argument(
-        "-t", "--analysis-type",
-        help = "Specify the type of analysis to run.",
-        choices = ["soft-env", "artifact", "build-status"],
-        required = True
-    )
    parser.add_argument(
        "-i", "--input",
        action = "append",
-        help = "The CSV file used as input for the analysis function." \
-            "Multiple files can be specified by repeating this argument" \
-            "with different paths. All the input files must be outputs" \
-            "from ECG.",
+        help =
+        """
+        The CSV file used as input for the analysis function. Multiple files
+        can be specified by repeating this argument with different paths.
+        All the input files must be artifact hash logs generated by ECG.
+        """,
        required = True
    )
    parser.add_argument(
        "-o", "--output",
-        help = "Path to the output CSV file that will be created by the " \
-            "analysis function.",
+        help =
+        """
+        Path to the output CSV file that will be created by the analysis function.
+        """,
        required = True
    )
    args = parser.parse_args()
-
-    analysis_type = args.analysis_type
    input_paths = args.input
    output_path = args.output

@ -188,12 +135,8 @@ def main():
    # Analyzing the inputs:
    output_file = open(output_path, "w+")
    output_dict = {}
-    if analysis_type == "soft-env":
-        output_dict = softenv_analysis(input_tables)
-    elif analysis_type == "artifact":
-        output_dict = artifact_analysis(input_tables)
-    elif analysis_type == "build-status":
-        output_dict = buildstatus_analysis(input_tables)
+    output_dict = analysis(input_tables)
+
    # Writing analysis to output file:
    dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
    dict_writer.writeheader()
--- a/analysis/buildstatus_analysis.py
+++ b/analysis/buildstatus_analysis.py
@ -0,0 +1,109 @@
+#!/bin/python3
+
+"""
+    This script performs a build status analysis on the outputs of the workflow
+    to generate tables that can then be plotted by another program.
+
+    The generated table gives the amount of images that have been built
+    sucessfully, and the amount of images that failed to build, for each
+    category of error.
+"""
+
+import argparse
+import csv
+import os
+
+def analysis(input_tables):
+    """
+    Analyzes the given build status tables to count the results of the building
+    of the Dockerfile for each category.
+
+    Parameters
+    ----------
+    input_tables: str
+        Tables to analyse.
+
+    Returns
+    -------
+    dict
+        Output table of the analysis in the form of a dict with headers as keys.
+    """
+    buildstatus = {}
+    for table in input_tables:
+        # # There has never been any error:
+        # if table == [[]]:
+        #     if "never_failed" not in buildstatus:
+        #             buildstatus["never_failed"] = 1
+        #     else:
+        #         buildstatus["never_failed"] += 1
+        # # There has been an error at least once:
+        # else:
+        for row in table:
+            # Third column is the result:
+            if row[2] not in buildstatus:
+                buildstatus[row[2]] = 1
+            else:
+                buildstatus[row[2]] += 1
+    return buildstatus
+
+def main():
+    # Command line arguments parsing:
+    parser = argparse.ArgumentParser(
+        prog = "buildstatus_analysis",
+        description =
+        """
+        This script performs a build status analysis on the outputs of the
+        workflow to generate tables that can then be plotted by another program.
+        The generated table gives the amount of images that have been
+        built sucessfully, and the amount of images that failed to build,
+        for each category of error.
+        """
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        action = "store_true",
+        help = "Shows more details on what is being done."
+    )
+    parser.add_argument(
+        "-i", "--input",
+        action = "append",
+        help =
+        """
+        The CSV file used as input for the analysis function. Multiple files
+        can be specified by repeating this argument with different paths.
+        All the input files must be build status logs generated by ECG.
+        """,
+        required = True
+    )
+    parser.add_argument(
+        "-o", "--output",
+        help =
+        """
+        Path to the output CSV file that will be created by the analysis function.
+        """,
+        required = True
+    )
+    args = parser.parse_args()
+    input_paths = args.input
+    output_path = args.output
+
+    # Parsing the input files:
+    input_tables = []
+    for path in input_paths:
+        input_file = open(path)
+        input_tables.append(list(csv.reader(input_file)))
+        input_file.close()
+
+    # Analyzing the inputs:
+    output_file = open(output_path, "w+")
+    output_dict = {}
+    output_dict = analysis(input_tables)
+
+    # Writing analysis to output file:
+    dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
+    dict_writer.writeheader()
+    dict_writer.writerow(output_dict)
+    output_file.close()
+
+if __name__ == "__main__":
+    main()
--- a/analysis/softenv_analysis.py
+++ b/analysis/softenv_analysis.py
@ -0,0 +1,121 @@
+#!/bin/python3
+
+"""
+    This script performs a software environment analysis on the outputs
+    of the workflow to generate tables that can then be plotted by another
+    program.
+
+    Depending on the type of analysis, multiple tables can be generated:
+    - sources-stats: Number of packages per source (a package manager, git or
+    misc)
+    - pkg-changes: Number of packages that changed over time (0 if only one file
+    is given, since it will only include the package list of a single execution)
+    - pkg-per-container: Number of packages per container
+"""
+
+import argparse
+import csv
+import os
+
+def sources_stats(input_tables):
+    """
+    Analyzes the given package lists tables to determine the number of artifacts
+    using a package manager, Git packages or misc packages.
+
+    Parameters
+    ----------
+    input_tables: str
+        Tables to analyse.
+
+    Returns
+    -------
+    dict
+        Output table of the analysis in the form of a dict with headers as keys.
+    """
+    pkgmgr = {}
+    i = 0
+    for table in input_tables:
+        for row in table:
+            # Third column is the package source:
+            if row[2] not in pkgmgr:
+                pkgmgr[row[2]] = 1
+            else:
+                pkgmgr[row[2]] += 1
+    return pkgmgr
+
+def main():
+    # Command line arguments parsing:
+    parser = argparse.ArgumentParser(
+        prog = "softenv_analysis",
+        description =
+        """
+        This script performs a software environment analysis on the outputs
+        of the workflow to generate tables that can then be plotted
+        by another program.
+        """
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        action = "store_true",
+        help = "Shows more details on what is being done."
+    )
+    parser.add_argument(
+        "-t", "--analysis-type",
+        help =
+        """
+        Specify the type of software analysis to run. Depending on the
+        type of analysis, multiple tables can be generated:
+        the number of packages per source (a package manager, git or misc)
+        by using `sources-stats`,
+        the number of packages that changed over time (0 if only
+        one file is given, since it will only include the package list
+        of a single execution) by using `pkg-changes`,
+        the number of packages per container by specifying `pkgs-per-container`.
+        """,
+        choices = ["sources-stats", "pkg-changes", "pkgs-per-container"],
+        required = True
+    )
+    parser.add_argument(
+        "-i", "--input",
+        action = "append",
+        help =
+        """
+        The CSV file used as input for the analysis function. Multiple files
+        can be specified by repeating this argument with different paths.
+        All the input files must be package lists generated by ECG.
+        """,
+        required = True
+    )
+    parser.add_argument(
+        "-o", "--output",
+        help =
+        """
+        Path to the output CSV file that will be created by the analysis function.
+        """,
+        required = True
+    )
+    args = parser.parse_args()
+    input_paths = args.input
+    output_path = args.output
+    analysis_type = args.analysis_type
+
+    # Parsing the input files:
+    input_tables = []
+    for path in input_paths:
+        input_file = open(path)
+        input_tables.append(list(csv.reader(input_file)))
+        input_file.close()
+
+    # Analyzing the inputs:
+    output_file = open(output_path, "w+")
+    if analysis_type == "sources-stats":
+        output_dict = sources_stats(input_tables)
+
+    # Writing analysis to output file:
+    dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
+    dict_writer.writeheader()
+    dict_writer.writerow(output_dict)
+    output_file.close()
+
+if __name__ == "__main__":
+    main()