2024-08-06 16:50:07 +02:00
|
|
|
#!/bin/python3
|
|
|
|
|
|
|
|
"""
|
|
|
|
This script performs a software environment analysis on the outputs
|
|
|
|
of the workflow to generate tables that can then be plotted by another
|
|
|
|
program.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import csv
|
|
|
|
import os
|
2024-08-07 17:31:35 +02:00
|
|
|
import datetime
|
2024-08-06 16:50:07 +02:00
|
|
|
|
2024-08-07 19:51:21 +02:00
|
|
|
# All possible package sources, initialized to 0.
|
|
|
|
# This is required to make the column of the result table deterministic,
|
|
|
|
# so they can be determined without the header in the CSV file.
|
|
|
|
pkgsources = {"dpkg":0, "rpm":0, "pacman":0, "pip":0, "conda":0, "git":0, "misc":0}
|
|
|
|
|
|
|
|
def sources_stats(input_table, pkgsources):
|
2024-08-06 16:50:07 +02:00
|
|
|
"""
|
2024-08-07 11:22:54 +02:00
|
|
|
Analyzes the given package lists table to determine the number of artifacts
|
2024-08-06 16:50:07 +02:00
|
|
|
using a package manager, Git packages or misc packages.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table: str
|
|
|
|
Table to analyse.
|
2024-08-06 16:50:07 +02:00
|
|
|
|
2024-08-07 19:51:21 +02:00
|
|
|
pkgsources: dict
|
|
|
|
A dictionnary that contains all the possible package sources as keys,
|
|
|
|
with all keys' value initialized at 0.
|
|
|
|
|
2024-08-06 16:50:07 +02:00
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
dict
|
|
|
|
Output table of the analysis in the form of a dict with headers as keys.
|
|
|
|
"""
|
|
|
|
i = 0
|
2024-08-07 11:22:54 +02:00
|
|
|
for row in input_table:
|
|
|
|
# Third column is the package source:
|
2024-08-07 19:51:21 +02:00
|
|
|
if row[2] not in pkgsources:
|
|
|
|
pkgsources[row[2]] = 1
|
2024-08-07 11:22:54 +02:00
|
|
|
else:
|
2024-08-07 19:51:21 +02:00
|
|
|
pkgsources[row[2]] += 1
|
|
|
|
return pkgsources
|
2024-08-06 16:50:07 +02:00
|
|
|
|
2024-08-07 12:26:36 +02:00
|
|
|
def pkg_changed(table, artifact_name, pkgname, pkgsource):
|
|
|
|
"""
|
|
|
|
Analyzes the given package lists table to determine if the given package
|
|
|
|
changed for the given artifact.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
table: str
|
|
|
|
Table to analyse.
|
|
|
|
|
|
|
|
artifact_name: str
|
|
|
|
Name of the artifact for which we want to analyze package changes.
|
|
|
|
|
|
|
|
pkgname: str
|
|
|
|
The package we want to track changes.
|
|
|
|
|
|
|
|
pkgsource: str
|
|
|
|
Source of the package, in case there is multiple packages with the
|
|
|
|
same name but different sources.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
changed: bool
|
|
|
|
True if the version number of the package changed over time, False
|
|
|
|
otherwise.
|
|
|
|
"""
|
|
|
|
changed = False
|
|
|
|
i = 0
|
|
|
|
pkgver = ""
|
|
|
|
while i < len(table) and not changed:
|
|
|
|
row = table[i]
|
|
|
|
# Artifact name is in the 4th column, package name in the first,
|
|
|
|
# and package source in the 3rd:
|
|
|
|
if row[3] == artifact_name and row[0] == pkgname and row[2] == pkgsource:
|
|
|
|
# If the first version number has not been saved yet:
|
|
|
|
if pkgver == "":
|
|
|
|
pkgver = row[1] # Package version is in the 2nd column
|
|
|
|
elif row[1] != pkgver:
|
|
|
|
changed = True
|
|
|
|
i += 1
|
|
|
|
return changed
|
2024-08-07 11:22:54 +02:00
|
|
|
|
2024-08-07 19:51:21 +02:00
|
|
|
def pkgs_changes(input_table, pkgsources):
|
2024-08-07 11:22:54 +02:00
|
|
|
"""
|
|
|
|
Analyzes the given package lists table to determine the number of packages
|
|
|
|
that changed for every package source.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
input_table: str
|
|
|
|
Table to analyse.
|
|
|
|
|
2024-08-07 19:51:21 +02:00
|
|
|
pkgsources: dict
|
|
|
|
A dictionnary that contains all the possible package sources as keys,
|
|
|
|
with all keys' value initialized at 0.
|
|
|
|
|
2024-08-07 11:22:54 +02:00
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
dict
|
|
|
|
Output table of the analysis in the form of a dict with headers as keys.
|
|
|
|
"""
|
2024-08-07 12:26:36 +02:00
|
|
|
# Key is the artifact name, and value is a list of tuples constituted
|
|
|
|
# of the package that has been checked and its source for this artifact:
|
|
|
|
# FIXME: Memory usage?
|
|
|
|
checked_artifacts = {}
|
2024-08-07 11:22:54 +02:00
|
|
|
i = 0
|
|
|
|
for row in input_table:
|
2024-08-07 12:26:36 +02:00
|
|
|
artifact_name = row[3] # Artifact name is in the 4th column
|
|
|
|
if artifact_name not in checked_artifacts.keys():
|
|
|
|
checked_artifacts[artifact_name] = []
|
|
|
|
pkgname = row[0] # Package name is in the first column
|
|
|
|
pkgsource = row[2] # Package source is in the 3rd column
|
|
|
|
if (pkgname, pkgsource) not in checked_artifacts[artifact_name]:
|
|
|
|
if pkg_changed(input_table, artifact_name, pkgname, pkgsource):
|
2024-08-07 19:51:21 +02:00
|
|
|
pkgsources[pkgsource] += 1
|
2024-08-07 12:26:36 +02:00
|
|
|
checked_artifacts[artifact_name].append((pkgname, pkgsource))
|
2024-08-07 19:51:21 +02:00
|
|
|
return pkgsources
|
2024-08-07 11:22:54 +02:00
|
|
|
|
|
|
|
def pkgs_per_container(input_table):
|
2024-08-07 17:31:35 +02:00
|
|
|
print("ERROR: Not implemented!")
|
2024-08-07 11:22:54 +02:00
|
|
|
|
2024-08-06 16:50:07 +02:00
|
|
|
def main():
|
|
|
|
# Command line arguments parsing:
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog = "softenv_analysis",
|
|
|
|
description =
|
|
|
|
"""
|
|
|
|
This script performs a software environment analysis on the outputs
|
|
|
|
of the workflow to generate tables that can then be plotted
|
|
|
|
by another program.
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-v", "--verbose",
|
|
|
|
action = "store_true",
|
|
|
|
help = "Shows more details on what is being done."
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-t", "--analysis-type",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
Specify the type of software analysis to run. Depending on the
|
|
|
|
type of analysis, multiple tables can be generated:
|
|
|
|
the number of packages per source (a package manager, git or misc)
|
|
|
|
by using `sources-stats`,
|
|
|
|
the number of packages that changed over time (0 if only
|
|
|
|
one file is given, since it will only include the package list
|
|
|
|
of a single execution) by using `pkg-changes`,
|
|
|
|
the number of packages per container by specifying `pkgs-per-container`.
|
|
|
|
""",
|
2024-08-07 11:22:54 +02:00
|
|
|
choices = ["sources-stats", "pkgs-changes", "pkgs-per-container"],
|
2024-08-06 16:50:07 +02:00
|
|
|
required = True
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-i", "--input",
|
|
|
|
action = "append",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
The CSV file used as input for the analysis function. Multiple files
|
|
|
|
can be specified by repeating this argument with different paths.
|
|
|
|
All the input files must be package lists generated by ECG.
|
|
|
|
""",
|
|
|
|
required = True
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-o", "--output",
|
|
|
|
help =
|
|
|
|
"""
|
|
|
|
Path to the output CSV file that will be created by the analysis function.
|
|
|
|
""",
|
|
|
|
required = True
|
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
input_paths = args.input
|
|
|
|
output_path = args.output
|
|
|
|
analysis_type = args.analysis_type
|
|
|
|
|
|
|
|
# Parsing the input files:
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table = []
|
2024-08-06 16:50:07 +02:00
|
|
|
for path in input_paths:
|
|
|
|
input_file = open(path)
|
2024-08-07 11:22:54 +02:00
|
|
|
input_table += list(csv.reader(input_file))
|
2024-08-06 16:50:07 +02:00
|
|
|
input_file.close()
|
|
|
|
|
|
|
|
# Analyzing the inputs:
|
|
|
|
if analysis_type == "sources-stats":
|
2024-08-07 19:51:21 +02:00
|
|
|
output_dict = sources_stats(input_table, pkgsources)
|
2024-08-07 11:22:54 +02:00
|
|
|
elif analysis_type == "pkgs-changes":
|
2024-08-07 19:51:21 +02:00
|
|
|
output_dict = pkgs_changes(input_table, pkgsources)
|
2024-08-07 11:22:54 +02:00
|
|
|
elif analysis_type == "pkgs-per-container":
|
|
|
|
output_dict = pkgs_per_container(input_table)
|
2024-08-07 17:31:35 +02:00
|
|
|
# Adding the current time to every row:
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
timestamp = str(datetime.datetime.timestamp(now))
|
|
|
|
output_dict["timestamp"] = timestamp
|
2024-08-06 16:50:07 +02:00
|
|
|
|
|
|
|
# Writing analysis to output file:
|
2024-08-07 11:22:54 +02:00
|
|
|
output_file = open(output_path, "w+")
|
2024-08-06 16:50:07 +02:00
|
|
|
dict_writer = csv.DictWriter(output_file, fieldnames=output_dict.keys())
|
2024-08-07 19:51:21 +02:00
|
|
|
# dict_writer.writeheader()
|
2024-08-06 16:50:07 +02:00
|
|
|
dict_writer.writerow(output_dict)
|
|
|
|
output_file.close()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|