Written a doc. Changing the name of the variables related to the hash log.

This commit is contained in:
antux18 2024-07-16 15:34:15 +02:00
parent f068fb91d0
commit 82af4bd521
11 changed files with 49 additions and 9 deletions

View File

@ -1,3 +1,43 @@
# Study of the Reproducibility and Longevity of Dockerfiles # Study of the Reproducibility and Longevity of Dockerfiles
TODO: doc ECG is a program that automates software environment checking for scientific artifacts.
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
## How it works
ECG takes as input a JSON configuration telling where to download the artifact, where to find the Dockerfile to build in the artifact, and which package managers are used by the Docker container.
It will then download the artifact, build the Dockerfile, and then create a list of the installed packages in the Docker container. It also stores the potential errors encountered when building the Dockerfile, and logs the hash of the artifact for future comparison.
## Setup
A Linux operating system and the following packages are required:
- `snakemake`
- `gawk`
- `nickel`
The following Python package is also required:
- `requests`
Otherwise, you can use the Nix package manager and run `nix develop` in this directory to setup the full software environment.
## Usage
Run `ecg.py` as follow:
```
python3 ecg.py <config_file> -p <pkglist_path> -l <log_file> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
```
Where:
- `<config_file>` is the configuration file of the artifact in JSON format. An example is given in `artifacts_json/test.json`.
- `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
- `<log_file>` is the path to the file where to log the output of the program.
- `<build_status_file>` is the path to the file where to write the build summary of the Docker image given in the configuration file.
- `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
- `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage.
## License
TBD

16
ecg.py
View File

@ -27,7 +27,7 @@ import sys
config_path = "" config_path = ""
pkglist_path = "" # Package list being generated pkglist_path = "" # Package list being generated
buildstatus_path = "" # Summary of the build process of the image buildstatus_path = "" # Summary of the build process of the image
arthashhist_path = "" # History of the hash of the downloaded artifact arthashlog_path = "" # Log of the hash of the downloaded artifact
cachedir_path = "" # Artifact cache directory cachedir_path = "" # Artifact cache directory
# Commands to list installed packages along with their versions and the name # Commands to list installed packages along with their versions and the name
@ -133,12 +133,12 @@ def download_sources(config):
artifact = tarfile.open(artifact_path) artifact = tarfile.open(artifact_path)
logging.info(f"Extracting artifact at {artifact_dir}") logging.info(f"Extracting artifact at {artifact_dir}")
artifact.extractall(artifact_dir) artifact.extractall(artifact_dir)
# Saving the current hash of the artifact for the history: # Logging the current hash of the artifact:
arthashhist_file = open(arthashhist_path, "a") arthashlog_file = open(arthashlog_path, "a")
now = datetime.datetime.now() now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now)) timestamp = str(datetime.datetime.timestamp(now))
arthashhist_file.write(f"{timestamp},{artifact_hash}\n") arthashlog_file.write(f"{timestamp},{artifact_hash}\n")
arthashhist_file.close() arthashlog_file.close()
else: else:
logging.info(f"Cache found for {url}, skipping download") logging.info(f"Cache found for {url}, skipping download")
return artifact_dir return artifact_dir
@ -265,7 +265,7 @@ def remove_image(config):
subprocess.run(["docker", "rmi", name], capture_output = True) subprocess.run(["docker", "rmi", name], capture_output = True)
def main(): def main():
global config_path, pkglist_path, buildstatus_path, arthashhist_path, cachedir_path global config_path, pkglist_path, buildstatus_path, arthashlog_path, cachedir_path
# Command line arguments parsing: # Command line arguments parsing:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -298,7 +298,7 @@ def main():
) )
parser.add_argument( parser.add_argument(
"-a", "--artifact-hash", "-a", "--artifact-hash",
help = "Path to the file where to write the history of the hash of the downloaded artifact.", help = "Path to the file where to write the log of the hash of the downloaded artifact.",
required = True required = True
) )
parser.add_argument( parser.add_argument(
@ -313,7 +313,7 @@ def main():
pkglist_path = args.pkg_list pkglist_path = args.pkg_list
log_path = args.log_path log_path = args.log_path
buildstatus_path = args.build_summary buildstatus_path = args.build_summary
arthashhist_path = args.artifact_hash arthashlog_path = args.artifact_hash
cachedir_path = args.cache_dir cachedir_path = args.cache_dir
# Setting up the log: will be displayed both on stdout and to the specified # Setting up the log: will be displayed both on stdout and to the specified