Written a doc. Changing the name of the variables related to the hash log.
This commit is contained in:
parent
f068fb91d0
commit
82af4bd521
42
README.md
42
README.md
@ -1,3 +1,43 @@
|
|||||||
# Study of the Reproducibility and Longevity of Dockerfiles
|
# Study of the Reproducibility and Longevity of Dockerfiles
|
||||||
|
|
||||||
TODO: doc
|
ECG is a program that automates software environment checking for scientific artifacts.
|
||||||
|
|
||||||
|
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
ECG takes as input a JSON configuration telling where to download the artifact, where to find the Dockerfile to build in the artifact, and which package managers are used by the Docker container.
|
||||||
|
|
||||||
|
It will then download the artifact, build the Dockerfile, and then create a list of the installed packages in the Docker container. It also stores the potential errors encountered when building the Dockerfile, and logs the hash of the artifact for future comparison.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
A Linux operating system and the following packages are required:
|
||||||
|
- `snakemake`
|
||||||
|
- `gawk`
|
||||||
|
- `nickel`
|
||||||
|
|
||||||
|
The following Python package is also required:
|
||||||
|
- `requests`
|
||||||
|
|
||||||
|
Otherwise, you can use the Nix package manager and run `nix develop` in this directory to setup the full software environment.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Run `ecg.py` as follow:
|
||||||
|
|
||||||
|
```
|
||||||
|
python3 ecg.py <config_file> -p <pkglist_path> -l <log_file> -b <build_status_file> -a <artifact_hash_log> -c <cache_directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
- `<config_file>` is the configuration file of the artifact in JSON format. An example is given in `artifacts_json/test.json`.
|
||||||
|
- `<pkglist_path>` is the path to the file where the package list generated by the program should be written.
|
||||||
|
- `<log_file>` is the path to the file where to log the output of the program.
|
||||||
|
- `<build_status_file>` is the path to the file where to write the build summary of the Docker image given in the configuration file.
|
||||||
|
- `<artifact_hash_log>` is the path to the file where to log the hash of the downloaded artifact.
|
||||||
|
- `<cache_directory>` is the path to the cache directory, where downloaded artifacts will be stored for future usage.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
TBD
|
16
ecg.py
16
ecg.py
@ -27,7 +27,7 @@ import sys
|
|||||||
config_path = ""
|
config_path = ""
|
||||||
pkglist_path = "" # Package list being generated
|
pkglist_path = "" # Package list being generated
|
||||||
buildstatus_path = "" # Summary of the build process of the image
|
buildstatus_path = "" # Summary of the build process of the image
|
||||||
arthashhist_path = "" # History of the hash of the downloaded artifact
|
arthashlog_path = "" # Log of the hash of the downloaded artifact
|
||||||
cachedir_path = "" # Artifact cache directory
|
cachedir_path = "" # Artifact cache directory
|
||||||
|
|
||||||
# Commands to list installed packages along with their versions and the name
|
# Commands to list installed packages along with their versions and the name
|
||||||
@ -133,12 +133,12 @@ def download_sources(config):
|
|||||||
artifact = tarfile.open(artifact_path)
|
artifact = tarfile.open(artifact_path)
|
||||||
logging.info(f"Extracting artifact at {artifact_dir}")
|
logging.info(f"Extracting artifact at {artifact_dir}")
|
||||||
artifact.extractall(artifact_dir)
|
artifact.extractall(artifact_dir)
|
||||||
# Saving the current hash of the artifact for the history:
|
# Logging the current hash of the artifact:
|
||||||
arthashhist_file = open(arthashhist_path, "a")
|
arthashlog_file = open(arthashlog_path, "a")
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
timestamp = str(datetime.datetime.timestamp(now))
|
timestamp = str(datetime.datetime.timestamp(now))
|
||||||
arthashhist_file.write(f"{timestamp},{artifact_hash}\n")
|
arthashlog_file.write(f"{timestamp},{artifact_hash}\n")
|
||||||
arthashhist_file.close()
|
arthashlog_file.close()
|
||||||
else:
|
else:
|
||||||
logging.info(f"Cache found for {url}, skipping download")
|
logging.info(f"Cache found for {url}, skipping download")
|
||||||
return artifact_dir
|
return artifact_dir
|
||||||
@ -265,7 +265,7 @@ def remove_image(config):
|
|||||||
subprocess.run(["docker", "rmi", name], capture_output = True)
|
subprocess.run(["docker", "rmi", name], capture_output = True)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
global config_path, pkglist_path, buildstatus_path, arthashhist_path, cachedir_path
|
global config_path, pkglist_path, buildstatus_path, arthashlog_path, cachedir_path
|
||||||
|
|
||||||
# Command line arguments parsing:
|
# Command line arguments parsing:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
@ -298,7 +298,7 @@ def main():
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-a", "--artifact-hash",
|
"-a", "--artifact-hash",
|
||||||
help = "Path to the file where to write the history of the hash of the downloaded artifact.",
|
help = "Path to the file where to write the log of the hash of the downloaded artifact.",
|
||||||
required = True
|
required = True
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -313,7 +313,7 @@ def main():
|
|||||||
pkglist_path = args.pkg_list
|
pkglist_path = args.pkg_list
|
||||||
log_path = args.log_path
|
log_path = args.log_path
|
||||||
buildstatus_path = args.build_summary
|
buildstatus_path = args.build_summary
|
||||||
arthashhist_path = args.artifact_hash
|
arthashlog_path = args.artifact_hash
|
||||||
cachedir_path = args.cache_dir
|
cachedir_path = args.cache_dir
|
||||||
|
|
||||||
# Setting up the log: will be displayed both on stdout and to the specified
|
# Setting up the log: will be displayed both on stdout and to the specified
|
||||||
|
Loading…
Reference in New Issue
Block a user