2024-08-06 16:50:07 +02:00
|
|
|
#!/bin/python3
|
|
|
|
|
|
|
|
"""
|
|
|
|
This script performs a software environment analysis on the outputs
|
|
|
|
of the workflow to generate tables that can then be plotted by another
|
|
|
|
program.
|
|
|
|
|
|
|
|
Depending on the type of analysis, multiple tables can be generated:
|
2024-08-07 11:22:54 +02:00
|
|
|
- `sources-stats`: Number of packages per source (a package manager, git or
|
2024-08-06 16:50:07 +02:00
|
|
|
misc)
|
2024-08-07 11:22:54 +02:00
|
|
|
- `pkg-changes`: Number of packages that changed over time (0 if only one file
|
2024-08-06 16:50:07 +02:00
|
|
|
is given, since it will only include the package list of a single execution)
|
2024-08-07 11:22:54 +02:00
|
|
|
- `pkgs-per-container`: Number of packages per container
|
2024-08-06 16:50:07 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import csv
|
|
|
|
import os
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
def sources_stats(input_table):
|
2024-08-06 16:50:07 +02:00
|
|
|
"""
|
2024-08-07 11:22:54 +02:00
|
|
|
Analyzes the given package lists table to determine the number of artifacts
|
2024-08-06 16:50:07 +02:00
|
|
|
using a package manager, Git packages or misc packages.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table: str
|
|
|
|
Table to analyse.
|
2024-08-06 16:50:07 +02:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
dict
|
|
|
|
Output table of the analysis in the form of a dict with headers as keys.
|
|
|
|
"""
|
|
|
|
pkgmgr = {}
|
|
|
|
i = 0
|
2024-08-07 11:22:54 +02:00
|
|
|
for row in input_table:
|
|
|
|
# Third column is the package source:
|
|
|
|
if row[2] not in pkgmgr:
|
|
|
|
pkgmgr[row[2]] = 1
|
|
|
|
else:
|
|
|
|
pkgmgr[row[2]] += 1
|
2024-08-06 16:50:07 +02:00
|
|
|
return pkgmgr
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
# def pkg_changed(pkgname, )
|
|
|
|
|
|
|
|
def pkgs_changes(input_table):
|
|
|
|
"""
|
|
|
|
Analyzes the given package lists table to determine the number of packages
|
|
|
|
that changed for every package source.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
input_table: str
|
|
|
|
Table to analyse.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
dict
|
|
|
|
Output table of the analysis in the form of a dict with headers as keys.
|
|
|
|
"""
|
|
|
|
pkgmgr = {}
|
|
|
|
i = 0
|
|
|
|
for row in input_table:
|
|
|
|
# Third column is the package source:
|
|
|
|
if row[2] not in pkgmgr:
|
|
|
|
pkgmgr[row[2]] = 1
|
|
|
|
else:
|
|
|
|
pkgmgr[row[2]] += 1
|
|
|
|
return pkgmgr
|
|
|
|
|
|
|
|
def pkgs_per_container(input_table):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
pass
|
|
|
|
|
2024-08-06 16:50:07 +02:00
|
|
|
def main():
|
|
|
|
# Command line arguments parsing:
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog = "softenv_analysis",
|
|
|
|
description =
|
|
|
|
"""
|
|
|
|
This script performs a software environment analysis on the outputs
|
|
|
|
of the workflow to generate tables that can then be plotted
|
|
|
|
by another program.
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-v", "--verbose",
|
|
|
|
action = "store_true",
|
|
|
|
help = "Shows more details on what is being done."
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-t", "--analysis-type",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
Specify the type of software analysis to run. Depending on the
|
|
|
|
type of analysis, multiple tables can be generated:
|
|
|
|
the number of packages per source (a package manager, git or misc)
|
|
|
|
by using `sources-stats`,
|
|
|
|
the number of packages that changed over time (0 if only
|
|
|
|
one file is given, since it will only include the package list
|
|
|
|
of a single execution) by using `pkg-changes`,
|
|
|
|
the number of packages per container by specifying `pkgs-per-container`.
|
|
|
|
""",
|
2024-08-07 11:22:54 +02:00
|
|
|
choices = ["sources-stats", "pkgs-changes", "pkgs-per-container"],
|
2024-08-06 16:50:07 +02:00
|
|
|
required = True
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-i", "--input",
|
|
|
|
action = "append",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
The CSV file used as input for the analysis function. Multiple files
|
|
|
|
can be specified by repeating this argument with different paths.
|
|
|
|
All the input files must be package lists generated by ECG.
|
|
|
|
""",
|
|
|
|
required = True
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-o", "--output",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
Path to the output CSV file that will be created by the analysis function.
|
|
|
|
""",
|
|
|
|
required = True
|
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
input_paths = args.input
|
|
|
|
output_path = args.output
|
|
|
|
analysis_type = args.analysis_type
|
|
|
|
|
|
|
|
# Parsing the input files:
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table = []
|
2024-08-06 16:50:07 +02:00
|
|
|
for path in input_paths:
|
|
|
|
input_file = open(path)
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table += list(csv.reader(input_file))
|
2024-08-06 16:50:07 +02:00
|
|
|
input_file.close()
|
|
|
|
|
|
|
|
# Analyzing the inputs:
|
|
|
|
if analysis_type == "sources-stats":
|
2024-08-07 11:22:54 +02:00
|
|
|
output_dict = sources_stats(input_table)
|
|
|
|
elif analysis_type == "pkgs-changes":
|
|
|
|
output_dict = pkgs_changes(input_table)
|
|
|
|
elif analysis_type == "pkgs-per-container":
|
|
|
|
output_dict = pkgs_per_container(input_table)
|
2024-08-06 16:50:07 +02:00
|
|
|
|
|
|
|
# Writing analysis to output file:
|
2024-08-07 11:22:54 +02:00
|
|
|
output_file = open(output_path, "w+")
|
2024-08-06 16:50:07 +02:00
|
|
|
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
|
|
|
|
dict_writer.writeheader()
|
|
|
|
dict_writer.writerow(output_dict)
|
|
|
|
output_file.close()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|