Compare commits

...

9 Commits

Author SHA1 Message Date
Quentin Guilloteau
9289b10b84 start splitting the workflow into several pieces 2024-08-29 13:10:24 +02:00
Quentin Guilloteau
a62d338bd0 moving some config around 2024-08-29 12:58:50 +02:00
Quentin Guilloteau
31e2ff0ca5 seperate the nix environments into several files 2024-08-29 11:41:29 +02:00
Quentin Guilloteau
d8d2a2e8e2 remove wrapper ecg 2024-08-29 11:23:54 +02:00
Quentin Guilloteau
20c7238581 fix typo in ecg 2024-08-29 10:34:05 +02:00
Quentin Guilloteau
742d6008f5 package ecg with nix 2024-08-29 10:32:54 +02:00
Quentin Guilloteau
817ec821c5 big cleaning of ecg. probably does not work right now: to be tested 2024-08-29 10:22:51 +02:00
Quentin Guilloteau
258cd64e40 removing useless lines from ecg 2024-08-28 14:37:00 +02:00
Quentin Guilloteau
025a16b62c starting cleaning snakefile 2024-08-28 14:35:13 +02:00
20 changed files with 604 additions and 714 deletions

View File

@ -0,0 +1,13 @@
{
version = "1.0",
artifact_url = "https://zenodo.org/records/11636529/files/artifact-pap130.zip",
type = "zip",
doi = "10.5281/zenodo.11636529",
conf_date = 2024,
virtualization = "docker",
buildfile_dir = ".docker",
package_managers = [ "dpkg" ],
misc_packages = [
{ name = "gurobi10.0.1_linux64", url = "https://packages.gurobi.com/10.0/gurobi10.0.1_linux64.tar.gz" }
]
}

View File

@ -0,0 +1,10 @@
{
version = "1.0",
artifact_url = "https://zenodo.org/records/11547063/files/artifact.zip",
type = "zip",
doi = "10.5281/zenodo.11547063",
conf_date = 2024,
virtualization = "docker",
buildfile_dir = "artifact",
package_managers = [ "dpkg", "pip" ]
}

View File

@ -0,0 +1,17 @@
{
version = "1.0",
artifact_url = "https://zenodo.org/records/11558678/files/peanuts-playground.zip",
type = "zip",
doi = "10.5281/zenodo.11558678",
conf_date = 2024,
comment = "Files in /var/lib/apt/lists/ are removed.",
virtualization = "docker",
buildfile_dir = "./",
package_managers = [ "dpkg" ],
git_packages = [
{ name = "spack", location = "/home/vscode/.cache/spack" }
],
misc_packages = [
{ name = "cmake-3.22.2-linux", url = "https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh" }
]
}

View File

@ -0,0 +1,10 @@
{
version = "1.0",
artifact_url = "https://zenodo.org/records/11579181/files/bsa_spmm.zip",
type = "zip",
doi = "10.5281/zenodo.11579181",
conf_date = 2024,
comment = "Are there really Dockerfiles for this artifact?",
virtualization = "docker",
package_managers = [ "dpkg" ],
}

View File

@ -0,0 +1,11 @@
{
version = "1.0",
artifact_url = "https://zenodo.org/records/11775182/files/Euro-PAR_2024_paper_432.zip",
type = "zip",
doi = "10.5281/zenodo.11775182",
conf_date = 2024,
comment = "Files in /var/lib/apt/lists/ are removed.",
virtualization = "docker",
buildfile_dir = "./",
package_managers = [ "dpkg", "pip" ]
}

View File

@ -12,3 +12,5 @@ max_duration: 60
checkpoint: 1 checkpoint: 1
besteffort: True besteffort: True
sleep_time: 30 sleep_time: 30
conference: "europar24"

500
ecg.py
View File

@ -1,500 +0,0 @@
#!/bin/python3
"""
ECG is a program that automates software environment checking
for scientific artifacts.
It is meant to be executed periodically to analyze variations in the
software environment of the artifact through time.
"""
import subprocess
import json
import argparse
import tempfile
import os
import requests
import zipfile
import tarfile
import pathlib
import logging
import datetime
import sys
import string
import traceback
def trim(url):
"""
Trims given URL to make it contain only lowercase letters and numbers,
as well as with a maximum length of 128.
Parameters
----------
url: str
URL to trim.
Returns
-------
str
Trimmed URL.
"""
trimmed = ""
url_lc = url.lower()
i = 0
while i < len(url_lc) and i < 128:
c = url_lc[i]
if c in string.ascii_lowercase or c in [str(x) for x in range(0, 10)]:
trimmed += c
i += 1
return trimmed
def download_file(url, dest):
"""
Downloads the file stored at the given URL and returns its hash
and location.
Parameters
----------
url: str
URL to the file to download.
dest: str
Path to where the file should be stored.
Returns
-------
str
Hash of the downloaded file, or empty string if download failed.
"""
file_hash = "-1"
try:
req = requests.get(url)
if req.status_code != 404:
file = open(dest, "wb")
file.write(req.content)
file.close()
hash_process = subprocess.run(f"sha256sum {file.name} | cut -d ' ' -f 1 | tr -d '\n'", capture_output=True, shell=True)
file_hash = hash_process.stdout.decode("utf-8")
except requests.exceptions.ConnectionError:
# We can just ignore this exception, as we will just return an empty
# hash to indicate the error:
pass
return file_hash
def download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name):
"""
Downloads the source of the artifact in 'config'.
Parameters
----------
config: dict
Parsed config file.
arthashlog_path: str
Path to the artifact hash log file.
dl_dir: str
Path to the directory where to download the artifact.
use_cache: bool
Indicates whether the cache should be used or not.
artifact_name: str
Name of the artifact, for the artifact hash log.
Returns
-------
temp_dir: str
Path to the directory where the artifact is downloaded to, or empty
string if download failed.
"""
url = config["artifact_url"]
artcache_dir = trim(url)
artifact_dir = os.path.join(dl_dir, artcache_dir)
# Checking if artifact in cache. Not downloading if it is:
if not os.path.exists(artifact_dir) or not use_cache:
logging.info(f"Downloading artifact from {url}")
# In case cache was used before:
if not use_cache:
os.system(f"rm -rf {artifact_dir}")
os.mkdir(artifact_dir)
artifact_file = tempfile.NamedTemporaryFile()
artifact_path = artifact_file.name
artifact_hash = download_file(url, artifact_path)
# If download was successful:
if artifact_hash != "-1":
if config["type"] == "zip":
artifact = zipfile.ZipFile(artifact_path)
elif config["type"] == "tar":
artifact = tarfile.open(artifact_path)
logging.info(f"Extracting artifact at {artifact_dir}")
artifact.extractall(artifact_dir)
# If download failed:
else:
os.rmdir(artifact_dir)
artifact_dir = ""
# Logging the current hash of the artifact:
arthashlog_file = open(arthashlog_path, "a")
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
# Artifact hash will be an empty string if download failed:
arthashlog_file.write(f"{timestamp},{artifact_hash},{artifact_name}\n")
arthashlog_file.close()
else:
logging.info(f"Cache found for {url}, skipping download")
return artifact_dir
def builderror_identifier(output):
"""
Parses the given 'output' to indentify the error.
Parameters
----------
output: str
Output of Docker.
Returns
-------
found_error: str
The error that has been found in the output, according to the
categories. If there is more than one, only the latest is taken into
account.
"""
# Possible error messages given by 'docker build' and their category.
# The key is the category, the value is a tuple of error messages belonging to
# to this category:
build_errors = {
"package_install_failed":("Unable to locate package", "error: failed to compile"),
"baseimage_unavailable":("manifest unknown: manifest unknown",),
"dockerfile_not_found":("Dockerfile: no such file or directory",)
}
# Last error found is the right one in theory:
found_error = ""
unknown_error = True
for error_cat, error_msgs in build_errors.items():
for error in error_msgs:
if error in output:
unknown_error = False
found_error = error_cat
if unknown_error:
found_error = "unknown_error"
return found_error
def buildresult_saver(result, buildstatus_path, config_path):
"""
Saves the given result in the 'build_status' file.
Parameters
----------
result: str
The result of the build. Either a Docker 'build' error
(see 'builderror_identifier'), another type of error
(for instance 'artifact_unavailable'), or 'success'
if build is successful.
buildstatus_path: str
Path to the build status file.
config_path: str
Path to the config file.
Returns
-------
None
"""
buildstatus_file = open(buildstatus_path, "a")
artifact_name = os.path.basename(config_path).split(".")[0]
# # Writing header in case file didn't exist:
# if not file_exists:
# buildstatus_file.write("yaml_path,timestamp,error")
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
buildstatus_file.close()
def build_image(config, src_dir, image_name, docker_cache = False):
"""
Builds the given Docker image in 'config'.
Parameters
----------
config: dict
Parsed config file.
src_dir: str
Path to the directory where the artifact is stored.
image_name: str
Name of the Docker image.
docker_cache: bool
Enables or disables Docker 'build' cache.
Returns
-------
return_code: bool, build_output: str
Return code and output of Docker 'build'.
"""
cache_arg = " --no-cache"
if docker_cache:
cache_arg = ""
logging.info(f"Starting building image {image_name}")
path = os.path.join(src_dir, config["buildfile_dir"])
# Using trimmed artifact URL as name:
build_command = f"docker build{cache_arg} -t {image_name} ."
build_process = subprocess.run(build_command.split(" "), cwd=path, capture_output=True)
build_output = f"stdout:\n{build_process.stdout.decode('utf-8')}\nstderr:\n{build_process.stderr.decode('utf-8')}"
logging.info(f"Output of '{build_command}':")
logging.info(build_output)
return_code = build_process.returncode
logging.info(f"Command '{build_command}' exited with code {return_code}")
return return_code, build_output
def check_env(config, src_dir, artifact_name, pkglist_path):
"""
Builds a list of all software packages installed in the
Docker image given in 'config', depending on the package managers
specified in the configuration, then stores it in a CSV file.
Parameters
----------
config: dict
Parsed config file.
src_dir: str
Path to the directory where the artifact is stored.
artifact_name: str
Name of the artifact. Used both as the Docker image name, and for the
packages list for tracking purpose during the output analysis.
pkglist_path: str
Path to the package list file.
Returns
-------
None
"""
# Saving the current time to add it to every row:
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
# Commands to list installed packages along with their versions and the name
# of the package manager, depending on the package managers.
# Each package manager is associated with a tuple, the first item being
# the package manager's command, the second being the arguments for the
# query (they must be separated for the "--entrypoint" argument of Docker
# 'run', see below), and the third one being the command that will format
# the output of the query command (this one can be an empty string in case
# the formatting part is already done using the options of the first command).
# The first command needs to be run on the container, and the second on the
# host, to take into account container images that do not have the formatting
# packages installed.
pkgmgr_cmd = {
"dpkg": ("dpkg", "-l", "awk 'NR>5 {print $2 \",\" $3 \",dpkg," + artifact_name + "," + timestamp + "\"}'"), \
"rpm":("rpm", "-qa --queryformat '%{NAME},%{VERSION},rpm," + artifact_name + "," + timestamp + "\\n'", ""), \
"pacman":("pacman", "-Q", "awk '{print $0 \",\" $1 \",pacman," + artifact_name + "," + timestamp + "\"}'"), \
"pip":("pip", "list", "awk 'NR>2 {print $1 \",\" $2 \",\" \"pip," + artifact_name + "," + timestamp + "\"}'"), \
"conda":("/root/.conda/bin/conda", "list -e", "sed 's/=/ /g' | awk 'NR>3 {print $1 \",\" $2 \",conda," + artifact_name + "," + timestamp + "\"}'")
}
# Command to obtain the latest commit hash in a git repository (separated
# into 2 parts for "--entrypoint"):
gitcmd = ("git", "log -n 1 --pretty=format:%H")
logging.info("Checking software environment")
pkglist_file = open(pkglist_path, "w")
# pkglist_file.write("package,version,package_manager\n")
path = os.path.join(src_dir, config["buildfile_dir"])
# Package managers:
for pkgmgr in config["package_managers"]:
# "--entrypoint" requires command and arguments to be separated.
# This Docker 'run' option is used to prevent the shell from printing
# a login message, if any.
pkglist_cmd = pkgmgr_cmd[pkgmgr][0]
pkglist_cmdargs = pkgmgr_cmd[pkgmgr][1].split(" ")
listformat_cmd = pkgmgr_cmd[pkgmgr][2]
logging.info(f"Checking '{pkgmgr}'")
# pkglist_process = subprocess.run(["docker", "run", "--rm", config["image_name"]] + pkglist_cmd.split(" "), cwd=path, capture_output=True)
pkglist_process = subprocess.run(["docker", "run", "--rm", "--entrypoint", pkglist_cmd, artifact_name] + pkglist_cmdargs, cwd=path, capture_output=True)
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
pkglist = format_process.stdout.decode("utf-8")
pkglist_file.write(pkglist)
# Python venvs:
logging.info("Checking Python venvs")
for venv in config["python_venvs"]:
pipcmd = pkgmgr_cmd["pip"][0]
pipcmd_args = pkgmgr_cmd["pip"][1]
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", venv["path"], "--entrypoint", venv["path"] + "/bin/" + pipcmd, artifact_name] + pipcmd_args.split(" "), cwd=path, capture_output=True)
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
pkglist = format_process.stdout.decode("utf-8")
pkglist_file.write(pkglist)
# Git packages:
logging.info("Checking Git packages")
for repo in config["git_packages"]:
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", repo["location"], "--entrypoint", gitcmd[0], artifact_name] + gitcmd[1].split(" "), cwd=path, capture_output=True)
repo_row = f"{repo['name']},{pkglist_process.stdout.decode('utf-8')},git,{artifact_name},{timestamp}"
pkglist_file.write(f"{repo_row}\n")
# Misc packages:
logging.info("Checking miscellaneous packages")
for pkg in config["misc_packages"]:
logging.info(f"Downloading package {pkg['name']} from {pkg['url']}")
pkg_file = tempfile.NamedTemporaryFile()
pkg_path = pkg_file.name
pkg_hash = download_file(pkg["url"], pkg_path)
# Package hash will be an empty string if download failed:
pkg_row = f"{pkg['name']},{pkg_hash},misc,{artifact_name},{timestamp}"
pkglist_file.write(f"{pkg_row}\n")
pkglist_file.close()
def remove_image(config, image_name):
"""
Removes the Docker image given in 'config'.
Parameters
----------
config: dict
Parsed config file.
image_name: str
Name of the Docker image.
Returns
-------
None
"""
logging.info(f"Removing image '{image_name}'")
subprocess.run(["docker", "rmi", image_name], capture_output = True)
def main():
# Paths:
config_path = ""
pkglist_path = "" # Package list being generated
buildstatus_path = "" # Status of the build process of the image, when it fails
arthashlog_path = "" # Log of the hash of the downloaded artifact
cache_dir = "" # Artifact cache directory, when using one. 'None' value indicates no cache.
use_cache = False
# Command line arguments parsing:
parser = argparse.ArgumentParser(
prog = "ecg",
description =
"""
ECG is a program that automates software environment checking for scientific artifacts.
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
"""
)
# parser.add_argument(
# '-v', '--verbose',
# action = 'store_true',
# help = "Shows more details on what is being done."
# )
parser.add_argument(
"config",
help = "The path to the configuration file of the artifact's Docker image."
)
parser.add_argument(
"-p", "--pkg-list",
help = "Path to the file where the package list generated by the program should be written.",
required = True
)
# parser.add_argument(
# "-l", "--log-path",
# help = "Path to the file where to log the output of the program.",
# required = True
# )
parser.add_argument(
"-b", "--build-status",
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
required = True
)
parser.add_argument(
"-a", "--artifact-hash",
help = "Path to the file where to write the log of the hash of the downloaded artifact.",
required = True
)
parser.add_argument(
"-c", "--cache-dir",
help =
"""
Path to the cache directory, where artifacts that are downloaded will be stored for future usage.
If not specified, cache is disabled.
""",
required = False
),
parser.add_argument(
'--docker-cache',
action = 'store_true',
help = "Use cache for Docker 'build'."
)
args = parser.parse_args()
# Setting up the paths of the outputs:
pkglist_path = args.pkg_list
buildstatus_path = args.build_status
arthashlog_path = args.artifact_hash
cache_dir = args.cache_dir
# log_path = "log.txt" # Output of the program
# log_path = args.log_path
# Creating the output files to avoid complaints from Snakemake about missing
# outputs...
pathlib.Path(pkglist_path).touch()
pathlib.Path(buildstatus_path).touch()
pathlib.Path(arthashlog_path).touch()
# Setting up the log:
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
# # Old version where the script writes its own log to the given file:
# print(f"Output will be stored in {log_path}")
# logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
# if args.verbose:
# logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
# Parsing the input file including the configuration of the artifact's
# image:
config_path = args.config
status = ""
config_file = open(config_path, "r")
config = json.loads(config_file.read())
config_file.close()
dl_dir = None
# If not using cache, creates a temporary directory:
if cache_dir == None:
tmp_dir = tempfile.TemporaryDirectory()
dl_dir = tmp_dir.name
else:
use_cache = True
dl_dir = cache_dir
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
# If download was successful:
if artifact_dir != "":
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
if return_code == 0:
status = "success"
check_env(config, artifact_dir, artifact_name, pkglist_path)
remove_image(config, artifact_name)
else:
status = builderror_identifier(build_output)
# Creates file if not already:
pathlib.Path(pkglist_path).touch()
# If download failed, we need to save the error to the build status log:
else:
logging.fatal("Artifact could not be downloaded!")
status = "artifact_unavailable"
# except Exception as err:
# # Handles any possible script's own crashes:
# formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
# log_file = open(log_path, "a")
# log_file.write(formatted_err)
# log_file.close()
# logging.error(formatted_err)
# status = "script_crash"
buildresult_saver(status, buildstatus_path, config_path)
if __name__ == "__main__":
main()

265
ecg/app/ecg.py Executable file
View File

@ -0,0 +1,265 @@
import subprocess
import json
import argparse
import tempfile
import os
import requests
import zipfile
import tarfile
import pathlib
import logging
import datetime
import sys
import string
import traceback
import hashlib
def download_file_and_get_hash(url, dest_path):
file_hash = "-1"
try:
req = requests.get(url)
if req.status_code != 404:
with open(dest_path, "wb") as file:
file.write(req.content)
file_hash = hashlib.sha256(req.content).hexdigest()
except requests.exceptions.ConnectionError:
# We can just ignore this exception, as we will just return an empty
# hash to indicate the error:
pass
return file_hash
def download_sources(url, archive_type, arthashlog_path, dl_dir, artifact_name):
logging.info(f"Downloading artifact from {url}")
artifact_dir = ""
tmp_artifact_file = tempfile.NamedTemporaryFile()
tmp_artifact_path = artifact_file.name
artifact_hash = download_file_and_get_hash(url, tmp_artifact_path)
if artifact_hash != "-1":
logging.info(f"Extracting artifact at {artifact_dir}")
artcache_dir = f"ecg_{artifact_hash[:9]}"
artifact_dir = os.path.join(dl_dir, artcache_dir)
extractors = {
"zip": zipfile.ZipFile,
"tar": tarfile.open
}
os.mkdir(artifact_dir)
extractors[archive_type](artifact_path).extractall(artifact_dir)
with open(arthashlog_path, "w") as arthashlog_file:
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
arthashlog_file.write(f"{timestamp},{artifact_hash},{artifact_name}\n")
return artifact_dir
def builderror_identifier(output):
build_errors = {
"package_install_failed": ("Unable to locate package", "error: failed to compile"),
"baseimage_unavailable": ("manifest unknown: manifest unknown",),
"dockerfile_not_found": ("Dockerfile: no such file or directory",)
}
for error_cat, error_msgs in build_errors.items():
for error in error_msgs:
if error in output:
return error_cat
return "unknown_error"
def buildresult_saver(result, buildstatus_path, config_path):
with open(buildstatus_path, "w") as buildstatus_file:
artifact_name = os.path.basename(config_path).split(".")[0]
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
def build_image(path, image_name):
logging.info(f"Starting building image {image_name}")
path = os.path.join(src_dir, config["buildfile_dir"])
build_command = f"docker build --no-cache -t {image_name} ."
build_process = subprocess.run(build_command.split(" "), cwd=path, capture_output=True)
build_output = f"stdout:\n{build_process.stdout.decode('utf-8')}\nstderr:\n{build_process.stderr.decode('utf-8')}"
logging.info(f"Output of '{build_command}':\n\n{build_output}")
return_code = build_process.returncode
logging.info(f"Command '{build_command}' exited with code {return_code}")
return return_code, build_output
def check_env(config, src_dir, artifact_name, pkglist_path):
"""
Builds a list of all software packages installed in the
Docker image given in 'config', depending on the package managers
specified in the configuration, then stores it in a CSV file.
Parameters
----------
config: dict
Parsed config file.
src_dir: str
Path to the directory where the artifact is stored.
artifact_name: str
Name of the artifact. Used both as the Docker image name, and for the
packages list for tracking purpose during the output analysis.
pkglist_path: str
Path to the package list file.
Returns
-------
None
"""
# Saving the current time to add it to every row:
now = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now))
# Commands to list installed packages along with their versions and the name
# of the package manager, depending on the package managers.
# Each package manager is associated with a tuple, the first item being
# the package manager's command, the second being the arguments for the
# query (they must be separated for the "--entrypoint" argument of Docker
# 'run', see below), and the third one being the command that will format
# the output of the query command (this one can be an empty string in case
# the formatting part is already done using the options of the first command).
# The first command needs to be run on the container, and the second on the
# host, to take into account container images that do not have the formatting
# packages installed.
pkgmgr_cmd = {
"dpkg": ("dpkg",\
"-l",\
f"awk 'NR>5 {{print $2 \",\" $3 \",dpkg,{artifact_name},{timestamp}\"}}'"), \
"rpm":("rpm",\
f"-qa --queryformat '%{{NAME}},%{{VERSION}},rpm,{artifact_name},{timestamp}\\n'",\
""), \
"pacman":("pacman",\
"-Q",\
f"awk '{{print $0 \",\" $1 \",pacman,{artifact_name},{timestamp}\"}}'"), \
"pip":("pip",\
"list",\
f"awk 'NR>2 {{print $1 \",\" $2 \",\" \"pip,{artifact_name},{timestamp}\"}}'"), \
"conda":("/root/.conda/bin/conda",\
"list -e",\
f"sed 's/=/ /g' | awk 'NR>3 {{print $1 \",\" $2 \",conda,{artifact_name},{timestamp}\"}}'")
}
# Command to obtain the latest commit hash in a git repository (separated
# into 2 parts for "--entrypoint"):
gitcmd = ("git", "log -n 1 --pretty=format:%H")
logging.info("Checking software environment")
pkglist_file = open(pkglist_path, "w")
path = os.path.join(src_dir, config["buildfile_dir"])
# Package managers:
for pkgmgr in config["package_managers"]:
# "--entrypoint" requires command and arguments to be separated.
# This Docker 'run' option is used to prevent the shell from printing
# a login message, if any.
pkglist_cmd = pkgmgr_cmd[pkgmgr][0]
pkglist_cmdargs = pkgmgr_cmd[pkgmgr][1].split(" ")
listformat_cmd = pkgmgr_cmd[pkgmgr][2]
logging.info(f"Checking '{pkgmgr}'")
pkglist_process = subprocess.run(["docker", "run", "--rm", "--entrypoint", pkglist_cmd, artifact_name] + pkglist_cmdargs, cwd=path, capture_output=True)
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
pkglist = format_process.stdout.decode("utf-8")
pkglist_file.write(pkglist)
# Python venvs:
logging.info("Checking Python venvs")
for venv in config["python_venvs"]:
pipcmd = pkgmgr_cmd["pip"][0]
pipcmd_args = pkgmgr_cmd["pip"][1]
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", venv["path"], "--entrypoint", venv["path"] + "/bin/" + pipcmd, artifact_name] + pipcmd_args.split(" "), cwd=path, capture_output=True)
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
pkglist = format_process.stdout.decode("utf-8")
pkglist_file.write(pkglist)
# Git packages:
logging.info("Checking Git packages")
for repo in config["git_packages"]:
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", repo["location"], "--entrypoint", gitcmd[0], artifact_name] + gitcmd[1].split(" "), cwd=path, capture_output=True)
repo_row = f"{repo['name']},{pkglist_process.stdout.decode('utf-8')},git,{artifact_name},{timestamp}"
pkglist_file.write(f"{repo_row}\n")
# Misc packages:
logging.info("Checking miscellaneous packages")
for pkg in config["misc_packages"]:
logging.info(f"Downloading package {pkg['name']} from {pkg['url']}")
with tempfile.NamedTemporaryFile() as pkg_file:
pkg_hash = download_file_and_get_hash(pkg["url"], pkg_file.name)
pkglist_file.write(f"{pkg['name']},{pkg_hash},misc,{artifact_name},{timestamp}\n")
pkglist_file.close()
def remove_image(image_name):
logging.info(f"Removing image '{image_name}'")
subprocess.run(["docker", "rmi", image_name], capture_output = True)
def main():
parser = argparse.ArgumentParser(
prog = "ecg",
description =
"""
ECG is a program that automates software environment checking for scientific artifacts.
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
"""
)
parser.add_argument(
"config",
help = "The path to the configuration file of the artifact's Docker image."
)
parser.add_argument(
"-p", "--pkg-list",
help = "Path to the file where the package list generated by the program should be written.",
required = True
)
parser.add_argument(
"-b", "--build-status",
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
required = True
)
parser.add_argument(
"-a", "--artifact-hash",
help = "Path to the file where to write the log of the hash of the downloaded artifact.",
required = True
)
args = parser.parse_args()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
config_path = args.config
with open(config_path, "r") as config_file:
config = json.loads(config_file.read())
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
ecg(artifact_name, config, args.pkg_list, args.build_status, args.artifact_hash)
return 0
def ecg(artifact_name, config, pkglist_path, buildstatus_path, arthashlog_path):
# just in case Snakemake does not create them
pathlib.Path(pkglist_path).touch()
pathlib.Path(buildstatus_path).touch()
pathlib.Path(arthashlog_path).touch()
status = ""
with tempfile.TemporaryDirectory() as tmp_dir:
dl_dir = tmp_dir.name
artifact_dir = download_sources(config["url"], config["type"], arthashlog_path, dl_dir, artifact_name)
if artifact_dir != "":
path = os.path.join(artifact_dir, config["buildfile_dir"])
return_code, build_output = build_image(path, artifact_name)
if return_code == 0:
status = "success"
check_env(config, artifact_dir, artifact_name, pkglist_path)
remove_image(artifact_name)
else:
status = builderror_identifier(build_output)
else:
logging.fatal("Artifact could not be downloaded!")
status = "artifact_unavailable"
buildresult_saver(status, buildstatus_path, config_path)
if __name__ == "__main__":
main()

3
ecg/run.py Normal file
View File

@ -0,0 +1,3 @@
from app import *
ecg.main()

37
ecg/setup.py Normal file
View File

@ -0,0 +1,37 @@
from setuptools import setup
setup(
# Application name:
name="ecg",
# Version number (initial):
version="0.0.1",
# Application author details:
author="Quentin Guilloteau, Antoine Waehren",
author_email="Quentin.Guilloteau@unibas.ch, Antoine.Waehren@stud.unibas.ch",
# Packages
packages=["app"],
# Include additional files into the package
entry_points={
'console_scripts': ['ecg=app.ecg:main'],
},
# Details
url="https://forge.chapril.org/GuilloteauQ/study-docker-repro-longevity",
description="Test the software environment of Dockerfiles from research artifacts",
long_description="""
ECG is a program that automates software environment checking for scientific artifacts.
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
""",
install_requires=[
"requests",
],
include_package_data=True,
)

View File

@ -15,28 +15,22 @@
kapkgs = kapack.packages.${system}; kapkgs = kapack.packages.${system};
in in
{ {
packages = {
ecg = pkgs.python3Packages.buildPythonPackage {
name = "ecg";
version = "0.0.1";
src = ./ecg;
propagatedBuildInputs = with (pkgs.python3Packages); [
requests
];
doCheck = false;
};
};
devShells = { devShells = {
default = pkgs.mkShell { default = import ./workflow/envs/snakemake.nix { inherit pkgs kapkgs; };
packages = with pkgs; [ nickel = import ./workflow/envs/nickel.nix { inherit pkgs kapkgs; };
snakemake latex = import ./workflow/envs/latex.nix { inherit pkgs kapkgs; };
gawk analysis = import ./workflow/envs/analysis.nix { inherit pkgs kapkgs; };
gnused
nickel
graphviz
# TODO separate into several shells
(python3.withPackages (ps: with ps; [
requests
kapkgs.execo
]))
(rWrapper.override { packages = with rPackages; [ tidyverse reshape2 ]; })
];
};
latex = pkgs.mkShell {
packages = with pkgs; [
texliveFull
rubber
];
};
}; };
}); });
} }

View File

@ -8,24 +8,18 @@ DATE = datetime.datetime.now().strftime("%Y%m%d")
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"] ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"] ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
BLACKLIST_FOLDER = config["folder_blacklists"]
EXTENSION = "json"
SYSTEM = config["system"] SYSTEM = config["system"]
CONFERENCE = config["conference"]
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER) ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + CONFERENCE)
PREFIX = config["prefix"] PREFIX = config["prefix"]
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"] ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
SHELLS_ECG = {
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
}
ANALYSIS_DIR = config["analysis_dir"] ANALYSIS_DIR = config["analysis_dir"]
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"] ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
ANALYSIS_SCRIPTS_DIR = "analysis" ANALYSIS_SCRIPTS_DIR = "analysis"
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh" ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh"
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}", ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
artifact = ARTIFACTS artifact = ARTIFACTS
)) ))
@ -34,42 +28,38 @@ SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}",
)) ))
PLOT_DIR = config["plot_dir"] PLOT_DIR = config["plot_dir"]
PLOT_SCRIPT = "plot/plot.r"
PLOT_HEADERS = { PLOT_HEADERS = {
"softenv": "dpkg rpm pacman pip conda git misc", #"softenv": "dpkg rpm pacman pip conda git misc",
"sources_stats": "dpkg rpm pacman pip conda git misc",
"pkgs_changes": "dpkg rpm pacman pip conda git misc",
"build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error", "build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error",
"artifact": "available unavailable changed" "artifact": "available unavailable changed"
} }
rule all: rule all:
input: input:
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf", expand(f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",\
analysis_cat = ANALYSIS_CATS, conference=config['conference'],\
date = DATE artifact=ARTIFACTS,\
), date = DATE)
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/bar/{{date}}.pdf",
analysis_cat = ["sources_stats", "build_status", "artifact"],
date = DATE
),
f"{BLACKLIST_FOLDER}/{DATE}.csv"
# Artifacts configuration files: # Artifacts configuration files:
rule check_all: rule check_all:
input: input:
expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS) expand(f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json", artifact=ARTIFACTS, conference=config['conference'])
rule check_artifact: rule check_artifact:
input: input:
"flake.nix", "flake.nix",
"flake.lock", "flake.lock",
contract="workflow/nickel/artifact_contract.ncl", contract="workflow/nickel/artifact_contract.ncl",
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{artifact}}.ncl" artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl"
output: output:
f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json" f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
shell: shell:
""" """
nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)' nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
""" """
# ECG: # ECG:
@ -78,157 +68,104 @@ rule run_ecg:
input: input:
"flake.nix", "flake.nix",
"flake.lock", "flake.lock",
ecg="ecg.py", ecg="ecg/app/ecg.py",
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
execo_wrapper="workflow/scripts/submission_g5k.py", execo_wrapper="workflow/scripts/submission_g5k.py",
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash", oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}" artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
output: output:
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt", log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv", pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv", build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv", artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv",
shell: shell:
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "") (f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \
--script {{input.oar_wrapper}} \
rule update_blacklist: --site {config['site']} \
input: --cluster {config['cluster']} \
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv", --max-duration {config['max_duration']} \
artifact=ARTIFACTS --checkpoint {config['checkpoint']} \
) {'--besteffort' if config['besteffort'] else ''} \
output: --sleep_time {config['sleep_time']} \
f"{BLACKLIST_FOLDER}/{{date}}.csv" --build_status_file {{output.build_status}} \
shell: --artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \
# We need to ignore lines where build is successful: """
f"cat {{input}} | grep -v ',success' > {{output}} || true" nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status}
""" + \
("'" if SYSTEM == "g5k" else "")
# Analysis: # Analysis:
rule softenv_analysis: #rule softenv_analysis:
wildcard_constraints: # wildcard_constraints:
date="\d+" # date="\d+"
input: # input:
expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv", # expand(f"{PREFIX}{{conference}}/pkgs/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS # artifact = ARTIFACTS
) # )
output: # output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv", # sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" # pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
shell: # shell:
f""" # f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}} # {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS} # {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
""" # """
#
rule buildstatus_analysis: #rule buildstatus_analysis:
wildcard_constraints: # wildcard_constraints:
date="\d+" # date="\d+"
input: # input:
expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv", # expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS # artifact = ARTIFACTS
), # ),
output: # output:
f"{ANALYSIS_DIR}/build_status/{{date}}.csv" # f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
shell: # shell:
f""" # f"""
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}} # {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
""" # """
#
rule artifact_analysis: #rule artifact_analysis:
wildcard_constraints: # wildcard_constraints:
date="\d+" # date="\d+"
input: # input:
expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv", # expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
artifact = ARTIFACTS # artifact = ARTIFACTS
) # )
output: # output:
f"{ANALYSIS_DIR}/artifact/{{date}}.csv" # f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
shell: # shell:
f""" # f"""
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS} # {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
""" # """
#
# Analysis aggregate: ## Analysis aggregate:
#
rule analysis_aggregate: #rule analysis_aggregate:
input: # input:
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv", # expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
cat = ANALYSIS_CATS, # cat = ANALYSIS_CATS,
date = DATE # date = DATE
) # )
#
rule pkgschgs_aggregate: #rule aggregate_by_type:
input: # input:
f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv" # data=f"{ANALYSIS_DIR}/{{type}}/{{date}}.csv",
output: # script="workflow/scripts/aggregate_wrapper.sh"
f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv" # output:
shell: # f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv"
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/pkgs_changes {{output}}" # shell:
# f"{{input.script}} {ANALYSIS_DIR}/{{type}} {{output}}"
rule srcsstats_aggregate: #
input: ## Plot:
f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv" #
output: #rule plot:
f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv" # input:
shell: # script = "plot/plot.r",
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/sources_stats {{output}}" # data = f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv",
# output:
rule artifact_aggregate: # f"{ANALYSIS_DIR}/{{type}}/{{plot}}/{{date}}.pdf"
input: # params:
f"{ANALYSIS_DIR}/artifact/{{date}}.csv" # header = lambda w: PLOT_HEADERS[w.type]
output: # shell:
f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv" # "Rscript {input.script} {wildcards.plot} {input.data} {output} {params.header} timestamp"
shell:
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/artifact {{output}}"
rule buildstatus_aggregate:
input:
f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
output:
f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv"
shell:
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/build_status {{output}}"
# Plot:
rule all_plot:
input:
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
cat = ANALYSIS_CATS,
date = DATE
)
rule line_plot:
input:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv",
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/line/{{date}}.pdf",
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/plot/line/{{date}}.pdf",
build_status = f"{ANALYSIS_DIR}/build_status/plot/line/{{date}}.pdf",
artifact = f"{ANALYSIS_DIR}/artifact/plot/line/{{date}}.pdf"
shell:
f"""
Rscript {PLOT_SCRIPT} line {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.pkgs_changes}} {{output.pkgs_changes}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
Rscript {PLOT_SCRIPT} line {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
"""
rule bar_plot:
input:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
output:
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/bar/{{date}}.pdf",
build_status = f"{ANALYSIS_DIR}/build_status/plot/bar/{{date}}.pdf",
artifact = f"{ANALYSIS_DIR}/artifact/plot/bar/{{date}}.pdf"
shell:
f"""
Rscript {PLOT_SCRIPT} bar {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
Rscript {PLOT_SCRIPT} bar {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
Rscript {PLOT_SCRIPT} bar {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
"""

View File

@ -0,0 +1,12 @@
{ pkgs, kapkgs }:
pkgs.mkShell {
packages = with pkgs; [
(rWrapper.override {
packages = with rPackages; [
tidyverse
reshape2
];
})
];
}

8
workflow/envs/latex.nix Normal file
View File

@ -0,0 +1,8 @@
{ pkgs, kapkgs }:
pkgs.mkShell {
packages = with pkgs; [
texliveFull
rubber
];
}

7
workflow/envs/nickel.nix Normal file
View File

@ -0,0 +1,7 @@
{ pkgs, kapkgs }:
pkgs.mkShell {
packages = with pkgs; [
nickel
];
}

View File

@ -0,0 +1,12 @@
{ pkgs, kapkgs }:
pkgs.mkShell {
packages = with pkgs; [
snakemake
gawk
gnused
(python3.withPackages (ps: with ps; [
kapkgs.execo
]))
];
}

63
workflow/measure.smk Normal file
View File

@ -0,0 +1,63 @@
configfile: "config/config.yaml"
include: "utils.smk"
import os
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
SYSTEM = config["system"]
PREFIX = config["prefix"]
rule main:
input:
lambda w: expand(f"{PREFIX}/{{{{conference}}}}/build_status/{{artifact}}/{{{{date}}}}.csv",\
artifact=get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + w['conference']))
output:
"{conference}_{date}.ok"
shell:
"echo {input} > {output}"
rule check_artifact:
input:
"flake.nix",
"flake.lock",
contract="workflow/nickel/artifact_contract.ncl",
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl"
output:
f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
shell:
"""
nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
"""
# ECG:
rule run_ecg:
input:
"flake.nix",
"flake.lock",
ecg="ecg/app/ecg.py",
execo_wrapper="workflow/scripts/submission_g5k.py",
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
output:
log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt",
pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv",
build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",
artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv",
shell:
(f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \
--script {{input.oar_wrapper}} \
--site {config['site']} \
--cluster {config['cluster']} \
--max-duration {config['max_duration']} \
--checkpoint {config['checkpoint']} \
{'--besteffort' if config['besteffort'] else ''} \
--sleep_time {config['sleep_time']} \
--build_status_file {{output.build_status}} \
--artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \
"""
nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status}
""" + \
("'" if SYSTEM == "g5k" else "")

View File

@ -17,7 +17,7 @@ export PATH=~/.local/bin:$PATH
g5k-setup-docker -t g5k-setup-docker -t
handler() { handler() {
echo "${ARTIFACT_FILE}, `date +%s.%N`, job_time_exceeded" >> ${BUILD_STATUS_FILE}; exit 0; echo "${ARTIFACT_FILE}, `date +%s.%N`, job_time_exceeded" > ${BUILD_STATUS_FILE}; exit 0;
} }
trap handler SIGUSR2 trap handler SIGUSR2

View File

@ -1,14 +0,0 @@
#!/bin/bash
ECG=$1
CONFIG=$2
PKGLIST=$3
BUILD_STATUS=$4
ARTHASH_LOG=$5
OUTPUT_LOG=$6
python3 $ECG -p $PKGLIST -b $BUILD_STATUS -a $ARTHASH_LOG $CONFIG > $OUTPUT_LOG 2> $OUTPUT_LOG
if [ $? -ne 0 ]
then
echo "${CONFIG}, `date +%s.%N`, script_crash" >> ${BUILD_STATUS}; exit 0;
fi

View File

@ -13,11 +13,14 @@ def get_blacklisted(blacklist_dir_path):
blacklisted.add(row[0]) blacklisted.add(row[0])
return blacklisted return blacklisted
def get_artifacts_to_build(artifacts_folder, blacklist_dir_path): #def get_artifacts_to_build(artifacts_folder, blacklist_dir_path):
blacklisted = get_blacklisted(blacklist_dir_path) # blacklisted = get_blacklisted(blacklist_dir_path)
all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) # all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
artifacts_to_build = list(all_artifacts.difference(blacklisted)) # artifacts_to_build = list(all_artifacts.difference(blacklisted))
if artifacts_to_build != []: # if artifacts_to_build != []:
return list(all_artifacts.difference(blacklisted)) # return list(all_artifacts.difference(blacklisted))
else: # else:
raise(Exception(f"There is no artifact to build! Either no artifact configuration files have been found, or they have all been blacklisted.")) # raise(Exception(f"There is no artifact to build! Either no artifact configuration files have been found, or they have all been blacklisted."))
def get_artifacts_to_build(artifacts_folder):
return [os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]