Separated the analysis into 3 different scripts.

This commit is contained in:
antux18 2024-08-06 16:50:07 +02:00
parent 74228cd1c5
commit 58dd52e403
3 changed files with 258 additions and 85 deletions

View File

@ -1,40 +1,18 @@
#!/bin/python3
"""
This script will analyze the outputs from ECG to generate tables that will
be later plotted.
This script performs an artifact analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
The generated table gives the amount of artifacts that are available
or not available, and the amount of artifacts that have been modified
over time.
"""
import argparse
import csv
import os
def softenv_analysis(input_tables):
"""
Analyzes the given package lists tables to determine the number of artifacts
using a package manager, Git packages or misc packages.
Parameters
----------
input_tables: str
Tables to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
pkgmgr = {}
i = 0
for table in input_tables:
for row in table:
# Third column is the package source:
if row[2] not in pkgmgr:
pkgmgr[row[2]] = 1
else:
pkgmgr[row[2]] += 1
return pkgmgr
def artifact_changed(table):
"""
Indicates whether the artifact involved in the given hash log table
@ -81,7 +59,7 @@ def artifact_available(table):
available = False
return available
def artifact_analysis(input_tables):
def analysis(input_tables):
"""
Analyzes the given artifact hash tables to determine if the artifacts are
still available and didn't change, changed, or aren't available anymore.
@ -106,75 +84,44 @@ def artifact_analysis(input_tables):
artifacts["changed"] += 1
return artifacts
def buildstatus_analysis(input_tables):
"""
Analyzes the given build status tables to count the results of the building
of the Dockerfile for each category.
Parameters
----------
input_tables: str
Tables to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
buildstatus = {}
for table in input_tables:
# # There has never been any error:
# if table == [[]]:
# if "never_failed" not in buildstatus:
# buildstatus["never_failed"] = 1
# else:
# buildstatus["never_failed"] += 1
# # There has been an error at least once:
# else:
for row in table:
# Third column is the result:
if row[2] not in buildstatus:
buildstatus[row[2]] = 1
else:
buildstatus[row[2]] += 1
return buildstatus
def main():
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "output_analysis",
description = "This script analyzes the outputs from ECG to create " \
"tables."
prog = "artifact_analysis",
description =
"""
This script performs an artifact analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
The generated table gives the amount of artifacts that are available
or not available, and the amount of artifacts that have been modified
over time.
"""
)
parser.add_argument(
"-v", "--verbose",
action = "store_true",
help = "Shows more details on what is being done."
)
parser.add_argument(
"-t", "--analysis-type",
help = "Specify the type of analysis to run.",
choices = ["soft-env", "artifact", "build-status"],
required = True
)
parser.add_argument(
"-i", "--input",
action = "append",
help = "The CSV file used as input for the analysis function." \
"Multiple files can be specified by repeating this argument" \
"with different paths. All the input files must be outputs" \
"from ECG.",
help =
"""
The CSV file used as input for the analysis function. Multiple files
can be specified by repeating this argument with different paths.
All the input files must be artifact hash logs generated by ECG.
""",
required = True
)
parser.add_argument(
"-o", "--output",
help = "Path to the output CSV file that will be created by the " \
"analysis function.",
help =
"""
Path to the output CSV file that will be created by the analysis function.
""",
required = True
)
args = parser.parse_args()
analysis_type = args.analysis_type
input_paths = args.input
output_path = args.output
@ -188,12 +135,8 @@ def main():
# Analyzing the inputs:
output_file = open(output_path, "w+")
output_dict = {}
if analysis_type == "soft-env":
output_dict = softenv_analysis(input_tables)
elif analysis_type == "artifact":
output_dict = artifact_analysis(input_tables)
elif analysis_type == "build-status":
output_dict = buildstatus_analysis(input_tables)
output_dict = analysis(input_tables)
# Writing analysis to output file:
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
dict_writer.writeheader()

109
analysis/buildstatus_analysis.py Executable file
View File

@ -0,0 +1,109 @@
#!/bin/python3
"""
This script performs a build status analysis on the outputs of the workflow
to generate tables that can then be plotted by another program.
The generated table gives the amount of images that have been built
sucessfully, and the amount of images that failed to build, for each
category of error.
"""
import argparse
import csv
import os
def analysis(input_tables):
"""
Analyzes the given build status tables to count the results of the building
of the Dockerfile for each category.
Parameters
----------
input_tables: str
Tables to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
buildstatus = {}
for table in input_tables:
# # There has never been any error:
# if table == [[]]:
# if "never_failed" not in buildstatus:
# buildstatus["never_failed"] = 1
# else:
# buildstatus["never_failed"] += 1
# # There has been an error at least once:
# else:
for row in table:
# Third column is the result:
if row[2] not in buildstatus:
buildstatus[row[2]] = 1
else:
buildstatus[row[2]] += 1
return buildstatus
def main():
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "buildstatus_analysis",
description =
"""
This script performs a build status analysis on the outputs of the
workflow to generate tables that can then be plotted by another program.
The generated table gives the amount of images that have been
built sucessfully, and the amount of images that failed to build,
for each category of error.
"""
)
parser.add_argument(
"-v", "--verbose",
action = "store_true",
help = "Shows more details on what is being done."
)
parser.add_argument(
"-i", "--input",
action = "append",
help =
"""
The CSV file used as input for the analysis function. Multiple files
can be specified by repeating this argument with different paths.
All the input files must be build status logs generated by ECG.
""",
required = True
)
parser.add_argument(
"-o", "--output",
help =
"""
Path to the output CSV file that will be created by the analysis function.
""",
required = True
)
args = parser.parse_args()
input_paths = args.input
output_path = args.output
# Parsing the input files:
input_tables = []
for path in input_paths:
input_file = open(path)
input_tables.append(list(csv.reader(input_file)))
input_file.close()
# Analyzing the inputs:
output_file = open(output_path, "w+")
output_dict = {}
output_dict = analysis(input_tables)
# Writing analysis to output file:
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
dict_writer.writeheader()
dict_writer.writerow(output_dict)
output_file.close()
if __name__ == "__main__":
main()

121
analysis/softenv_analysis.py Executable file
View File

@ -0,0 +1,121 @@
#!/bin/python3
"""
This script performs a software environment analysis on the outputs
of the workflow to generate tables that can then be plotted by another
program.
Depending on the type of analysis, multiple tables can be generated:
- sources-stats: Number of packages per source (a package manager, git or
misc)
- pkg-changes: Number of packages that changed over time (0 if only one file
is given, since it will only include the package list of a single execution)
- pkg-per-container: Number of packages per container
"""
import argparse
import csv
import os
def sources_stats(input_tables):
"""
Analyzes the given package lists tables to determine the number of artifacts
using a package manager, Git packages or misc packages.
Parameters
----------
input_tables: str
Tables to analyse.
Returns
-------
dict
Output table of the analysis in the form of a dict with headers as keys.
"""
pkgmgr = {}
i = 0
for table in input_tables:
for row in table:
# Third column is the package source:
if row[2] not in pkgmgr:
pkgmgr[row[2]] = 1
else:
pkgmgr[row[2]] += 1
return pkgmgr
def main():
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "softenv_analysis",
description =
"""
This script performs a software environment analysis on the outputs
of the workflow to generate tables that can then be plotted
by another program.
"""
)
parser.add_argument(
"-v", "--verbose",
action = "store_true",
help = "Shows more details on what is being done."
)
parser.add_argument(
"-t", "--analysis-type",
help =
"""
Specify the type of software analysis to run. Depending on the
type of analysis, multiple tables can be generated:
the number of packages per source (a package manager, git or misc)
by using `sources-stats`,
the number of packages that changed over time (0 if only
one file is given, since it will only include the package list
of a single execution) by using `pkg-changes`,
the number of packages per container by specifying `pkgs-per-container`.
""",
choices = ["sources-stats", "pkg-changes", "pkgs-per-container"],
required = True
)
parser.add_argument(
"-i", "--input",
action = "append",
help =
"""
The CSV file used as input for the analysis function. Multiple files
can be specified by repeating this argument with different paths.
All the input files must be package lists generated by ECG.
""",
required = True
)
parser.add_argument(
"-o", "--output",
help =
"""
Path to the output CSV file that will be created by the analysis function.
""",
required = True
)
args = parser.parse_args()
input_paths = args.input
output_path = args.output
analysis_type = args.analysis_type
# Parsing the input files:
input_tables = []
for path in input_paths:
input_file = open(path)
input_tables.append(list(csv.reader(input_file)))
input_file.close()
# Analyzing the inputs:
output_file = open(output_path, "w+")
if analysis_type == "sources-stats":
output_dict = sources_stats(input_tables)
# Writing analysis to output file:
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
dict_writer.writeheader()
dict_writer.writerow(output_dict)
output_file.close()
if __name__ == "__main__":
main()