Compare commits
9 Commits
d57f8b019e
...
9289b10b84
Author | SHA1 | Date | |
---|---|---|---|
|
9289b10b84 | ||
|
a62d338bd0 | ||
|
31e2ff0ca5 | ||
|
d8d2a2e8e2 | ||
|
20c7238581 | ||
|
742d6008f5 | ||
|
817ec821c5 | ||
|
258cd64e40 | ||
|
025a16b62c |
13
artifacts/nickel/europar24/canon_solving.ncl
Normal file
13
artifacts/nickel/europar24/canon_solving.ncl
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
version = "1.0",
|
||||||
|
artifact_url = "https://zenodo.org/records/11636529/files/artifact-pap130.zip",
|
||||||
|
type = "zip",
|
||||||
|
doi = "10.5281/zenodo.11636529",
|
||||||
|
conf_date = 2024,
|
||||||
|
virtualization = "docker",
|
||||||
|
buildfile_dir = ".docker",
|
||||||
|
package_managers = [ "dpkg" ],
|
||||||
|
misc_packages = [
|
||||||
|
{ name = "gurobi10.0.1_linux64", url = "https://packages.gurobi.com/10.0/gurobi10.0.1_linux64.tar.gz" }
|
||||||
|
]
|
||||||
|
}
|
10
artifacts/nickel/europar24/geijer_how.ncl
Normal file
10
artifacts/nickel/europar24/geijer_how.ncl
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
version = "1.0",
|
||||||
|
artifact_url = "https://zenodo.org/records/11547063/files/artifact.zip",
|
||||||
|
type = "zip",
|
||||||
|
doi = "10.5281/zenodo.11547063",
|
||||||
|
conf_date = 2024,
|
||||||
|
virtualization = "docker",
|
||||||
|
buildfile_dir = "artifact",
|
||||||
|
package_managers = [ "dpkg", "pip" ]
|
||||||
|
}
|
17
artifacts/nickel/europar24/hiraga_peanuts.ncl
Normal file
17
artifacts/nickel/europar24/hiraga_peanuts.ncl
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
version = "1.0",
|
||||||
|
artifact_url = "https://zenodo.org/records/11558678/files/peanuts-playground.zip",
|
||||||
|
type = "zip",
|
||||||
|
doi = "10.5281/zenodo.11558678",
|
||||||
|
conf_date = 2024,
|
||||||
|
comment = "Files in /var/lib/apt/lists/ are removed.",
|
||||||
|
virtualization = "docker",
|
||||||
|
buildfile_dir = "./",
|
||||||
|
package_managers = [ "dpkg" ],
|
||||||
|
git_packages = [
|
||||||
|
{ name = "spack", location = "/home/vscode/.cache/spack" }
|
||||||
|
],
|
||||||
|
misc_packages = [
|
||||||
|
{ name = "cmake-3.22.2-linux", url = "https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh" }
|
||||||
|
]
|
||||||
|
}
|
10
artifacts/nickel/europar24/lee_accelerated.ncl
Normal file
10
artifacts/nickel/europar24/lee_accelerated.ncl
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
version = "1.0",
|
||||||
|
artifact_url = "https://zenodo.org/records/11579181/files/bsa_spmm.zip",
|
||||||
|
type = "zip",
|
||||||
|
doi = "10.5281/zenodo.11579181",
|
||||||
|
conf_date = 2024,
|
||||||
|
comment = "Are there really Dockerfiles for this artifact?",
|
||||||
|
virtualization = "docker",
|
||||||
|
package_managers = [ "dpkg" ],
|
||||||
|
}
|
11
artifacts/nickel/europar24/wolff_fast.ncl
Normal file
11
artifacts/nickel/europar24/wolff_fast.ncl
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
version = "1.0",
|
||||||
|
artifact_url = "https://zenodo.org/records/11775182/files/Euro-PAR_2024_paper_432.zip",
|
||||||
|
type = "zip",
|
||||||
|
doi = "10.5281/zenodo.11775182",
|
||||||
|
conf_date = 2024,
|
||||||
|
comment = "Files in /var/lib/apt/lists/ are removed.",
|
||||||
|
virtualization = "docker",
|
||||||
|
buildfile_dir = "./",
|
||||||
|
package_managers = [ "dpkg", "pip" ]
|
||||||
|
}
|
@ -12,3 +12,5 @@ max_duration: 60
|
|||||||
checkpoint: 1
|
checkpoint: 1
|
||||||
besteffort: True
|
besteffort: True
|
||||||
sleep_time: 30
|
sleep_time: 30
|
||||||
|
|
||||||
|
conference: "europar24"
|
||||||
|
500
ecg.py
500
ecg.py
@ -1,500 +0,0 @@
|
|||||||
#!/bin/python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
ECG is a program that automates software environment checking
|
|
||||||
for scientific artifacts.
|
|
||||||
|
|
||||||
It is meant to be executed periodically to analyze variations in the
|
|
||||||
software environment of the artifact through time.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
import tempfile
|
|
||||||
import os
|
|
||||||
import requests
|
|
||||||
import zipfile
|
|
||||||
import tarfile
|
|
||||||
import pathlib
|
|
||||||
import logging
|
|
||||||
import datetime
|
|
||||||
import sys
|
|
||||||
import string
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
def trim(url):
|
|
||||||
"""
|
|
||||||
Trims given URL to make it contain only lowercase letters and numbers,
|
|
||||||
as well as with a maximum length of 128.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url: str
|
|
||||||
URL to trim.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str
|
|
||||||
Trimmed URL.
|
|
||||||
"""
|
|
||||||
trimmed = ""
|
|
||||||
url_lc = url.lower()
|
|
||||||
i = 0
|
|
||||||
while i < len(url_lc) and i < 128:
|
|
||||||
c = url_lc[i]
|
|
||||||
if c in string.ascii_lowercase or c in [str(x) for x in range(0, 10)]:
|
|
||||||
trimmed += c
|
|
||||||
i += 1
|
|
||||||
return trimmed
|
|
||||||
|
|
||||||
def download_file(url, dest):
|
|
||||||
"""
|
|
||||||
Downloads the file stored at the given URL and returns its hash
|
|
||||||
and location.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url: str
|
|
||||||
URL to the file to download.
|
|
||||||
dest: str
|
|
||||||
Path to where the file should be stored.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str
|
|
||||||
Hash of the downloaded file, or empty string if download failed.
|
|
||||||
"""
|
|
||||||
file_hash = "-1"
|
|
||||||
try:
|
|
||||||
req = requests.get(url)
|
|
||||||
if req.status_code != 404:
|
|
||||||
file = open(dest, "wb")
|
|
||||||
file.write(req.content)
|
|
||||||
file.close()
|
|
||||||
hash_process = subprocess.run(f"sha256sum {file.name} | cut -d ' ' -f 1 | tr -d '\n'", capture_output=True, shell=True)
|
|
||||||
file_hash = hash_process.stdout.decode("utf-8")
|
|
||||||
except requests.exceptions.ConnectionError:
|
|
||||||
# We can just ignore this exception, as we will just return an empty
|
|
||||||
# hash to indicate the error:
|
|
||||||
pass
|
|
||||||
return file_hash
|
|
||||||
|
|
||||||
def download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name):
|
|
||||||
"""
|
|
||||||
Downloads the source of the artifact in 'config'.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
config: dict
|
|
||||||
Parsed config file.
|
|
||||||
|
|
||||||
arthashlog_path: str
|
|
||||||
Path to the artifact hash log file.
|
|
||||||
|
|
||||||
dl_dir: str
|
|
||||||
Path to the directory where to download the artifact.
|
|
||||||
|
|
||||||
use_cache: bool
|
|
||||||
Indicates whether the cache should be used or not.
|
|
||||||
|
|
||||||
artifact_name: str
|
|
||||||
Name of the artifact, for the artifact hash log.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
temp_dir: str
|
|
||||||
Path to the directory where the artifact is downloaded to, or empty
|
|
||||||
string if download failed.
|
|
||||||
"""
|
|
||||||
url = config["artifact_url"]
|
|
||||||
artcache_dir = trim(url)
|
|
||||||
artifact_dir = os.path.join(dl_dir, artcache_dir)
|
|
||||||
# Checking if artifact in cache. Not downloading if it is:
|
|
||||||
if not os.path.exists(artifact_dir) or not use_cache:
|
|
||||||
logging.info(f"Downloading artifact from {url}")
|
|
||||||
# In case cache was used before:
|
|
||||||
if not use_cache:
|
|
||||||
os.system(f"rm -rf {artifact_dir}")
|
|
||||||
os.mkdir(artifact_dir)
|
|
||||||
artifact_file = tempfile.NamedTemporaryFile()
|
|
||||||
artifact_path = artifact_file.name
|
|
||||||
artifact_hash = download_file(url, artifact_path)
|
|
||||||
# If download was successful:
|
|
||||||
if artifact_hash != "-1":
|
|
||||||
if config["type"] == "zip":
|
|
||||||
artifact = zipfile.ZipFile(artifact_path)
|
|
||||||
elif config["type"] == "tar":
|
|
||||||
artifact = tarfile.open(artifact_path)
|
|
||||||
logging.info(f"Extracting artifact at {artifact_dir}")
|
|
||||||
artifact.extractall(artifact_dir)
|
|
||||||
# If download failed:
|
|
||||||
else:
|
|
||||||
os.rmdir(artifact_dir)
|
|
||||||
artifact_dir = ""
|
|
||||||
# Logging the current hash of the artifact:
|
|
||||||
arthashlog_file = open(arthashlog_path, "a")
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
timestamp = str(datetime.datetime.timestamp(now))
|
|
||||||
# Artifact hash will be an empty string if download failed:
|
|
||||||
arthashlog_file.write(f"{timestamp},{artifact_hash},{artifact_name}\n")
|
|
||||||
arthashlog_file.close()
|
|
||||||
else:
|
|
||||||
logging.info(f"Cache found for {url}, skipping download")
|
|
||||||
return artifact_dir
|
|
||||||
|
|
||||||
def builderror_identifier(output):
|
|
||||||
|
|
||||||
"""
|
|
||||||
Parses the given 'output' to indentify the error.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
output: str
|
|
||||||
Output of Docker.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
found_error: str
|
|
||||||
The error that has been found in the output, according to the
|
|
||||||
categories. If there is more than one, only the latest is taken into
|
|
||||||
account.
|
|
||||||
"""
|
|
||||||
# Possible error messages given by 'docker build' and their category.
|
|
||||||
# The key is the category, the value is a tuple of error messages belonging to
|
|
||||||
# to this category:
|
|
||||||
build_errors = {
|
|
||||||
"package_install_failed":("Unable to locate package", "error: failed to compile"),
|
|
||||||
"baseimage_unavailable":("manifest unknown: manifest unknown",),
|
|
||||||
"dockerfile_not_found":("Dockerfile: no such file or directory",)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Last error found is the right one in theory:
|
|
||||||
found_error = ""
|
|
||||||
unknown_error = True
|
|
||||||
for error_cat, error_msgs in build_errors.items():
|
|
||||||
for error in error_msgs:
|
|
||||||
if error in output:
|
|
||||||
unknown_error = False
|
|
||||||
found_error = error_cat
|
|
||||||
if unknown_error:
|
|
||||||
found_error = "unknown_error"
|
|
||||||
return found_error
|
|
||||||
|
|
||||||
def buildresult_saver(result, buildstatus_path, config_path):
|
|
||||||
"""
|
|
||||||
Saves the given result in the 'build_status' file.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
result: str
|
|
||||||
The result of the build. Either a Docker 'build' error
|
|
||||||
(see 'builderror_identifier'), another type of error
|
|
||||||
(for instance 'artifact_unavailable'), or 'success'
|
|
||||||
if build is successful.
|
|
||||||
|
|
||||||
buildstatus_path: str
|
|
||||||
Path to the build status file.
|
|
||||||
|
|
||||||
config_path: str
|
|
||||||
Path to the config file.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
buildstatus_file = open(buildstatus_path, "a")
|
|
||||||
artifact_name = os.path.basename(config_path).split(".")[0]
|
|
||||||
# # Writing header in case file didn't exist:
|
|
||||||
# if not file_exists:
|
|
||||||
# buildstatus_file.write("yaml_path,timestamp,error")
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
timestamp = str(datetime.datetime.timestamp(now))
|
|
||||||
buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
|
|
||||||
buildstatus_file.close()
|
|
||||||
|
|
||||||
def build_image(config, src_dir, image_name, docker_cache = False):
|
|
||||||
"""
|
|
||||||
Builds the given Docker image in 'config'.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
config: dict
|
|
||||||
Parsed config file.
|
|
||||||
|
|
||||||
src_dir: str
|
|
||||||
Path to the directory where the artifact is stored.
|
|
||||||
|
|
||||||
image_name: str
|
|
||||||
Name of the Docker image.
|
|
||||||
|
|
||||||
docker_cache: bool
|
|
||||||
Enables or disables Docker 'build' cache.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
return_code: bool, build_output: str
|
|
||||||
Return code and output of Docker 'build'.
|
|
||||||
"""
|
|
||||||
cache_arg = " --no-cache"
|
|
||||||
if docker_cache:
|
|
||||||
cache_arg = ""
|
|
||||||
logging.info(f"Starting building image {image_name}")
|
|
||||||
path = os.path.join(src_dir, config["buildfile_dir"])
|
|
||||||
# Using trimmed artifact URL as name:
|
|
||||||
build_command = f"docker build{cache_arg} -t {image_name} ."
|
|
||||||
build_process = subprocess.run(build_command.split(" "), cwd=path, capture_output=True)
|
|
||||||
build_output = f"stdout:\n{build_process.stdout.decode('utf-8')}\nstderr:\n{build_process.stderr.decode('utf-8')}"
|
|
||||||
logging.info(f"Output of '{build_command}':")
|
|
||||||
logging.info(build_output)
|
|
||||||
return_code = build_process.returncode
|
|
||||||
logging.info(f"Command '{build_command}' exited with code {return_code}")
|
|
||||||
return return_code, build_output
|
|
||||||
|
|
||||||
def check_env(config, src_dir, artifact_name, pkglist_path):
|
|
||||||
"""
|
|
||||||
Builds a list of all software packages installed in the
|
|
||||||
Docker image given in 'config', depending on the package managers
|
|
||||||
specified in the configuration, then stores it in a CSV file.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
config: dict
|
|
||||||
Parsed config file.
|
|
||||||
|
|
||||||
src_dir: str
|
|
||||||
Path to the directory where the artifact is stored.
|
|
||||||
|
|
||||||
artifact_name: str
|
|
||||||
Name of the artifact. Used both as the Docker image name, and for the
|
|
||||||
packages list for tracking purpose during the output analysis.
|
|
||||||
|
|
||||||
pkglist_path: str
|
|
||||||
Path to the package list file.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
# Saving the current time to add it to every row:
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
timestamp = str(datetime.datetime.timestamp(now))
|
|
||||||
|
|
||||||
# Commands to list installed packages along with their versions and the name
|
|
||||||
# of the package manager, depending on the package managers.
|
|
||||||
# Each package manager is associated with a tuple, the first item being
|
|
||||||
# the package manager's command, the second being the arguments for the
|
|
||||||
# query (they must be separated for the "--entrypoint" argument of Docker
|
|
||||||
# 'run', see below), and the third one being the command that will format
|
|
||||||
# the output of the query command (this one can be an empty string in case
|
|
||||||
# the formatting part is already done using the options of the first command).
|
|
||||||
# The first command needs to be run on the container, and the second on the
|
|
||||||
# host, to take into account container images that do not have the formatting
|
|
||||||
# packages installed.
|
|
||||||
pkgmgr_cmd = {
|
|
||||||
"dpkg": ("dpkg", "-l", "awk 'NR>5 {print $2 \",\" $3 \",dpkg," + artifact_name + "," + timestamp + "\"}'"), \
|
|
||||||
"rpm":("rpm", "-qa --queryformat '%{NAME},%{VERSION},rpm," + artifact_name + "," + timestamp + "\\n'", ""), \
|
|
||||||
"pacman":("pacman", "-Q", "awk '{print $0 \",\" $1 \",pacman," + artifact_name + "," + timestamp + "\"}'"), \
|
|
||||||
"pip":("pip", "list", "awk 'NR>2 {print $1 \",\" $2 \",\" \"pip," + artifact_name + "," + timestamp + "\"}'"), \
|
|
||||||
"conda":("/root/.conda/bin/conda", "list -e", "sed 's/=/ /g' | awk 'NR>3 {print $1 \",\" $2 \",conda," + artifact_name + "," + timestamp + "\"}'")
|
|
||||||
}
|
|
||||||
# Command to obtain the latest commit hash in a git repository (separated
|
|
||||||
# into 2 parts for "--entrypoint"):
|
|
||||||
gitcmd = ("git", "log -n 1 --pretty=format:%H")
|
|
||||||
|
|
||||||
logging.info("Checking software environment")
|
|
||||||
pkglist_file = open(pkglist_path, "w")
|
|
||||||
# pkglist_file.write("package,version,package_manager\n")
|
|
||||||
path = os.path.join(src_dir, config["buildfile_dir"])
|
|
||||||
# Package managers:
|
|
||||||
for pkgmgr in config["package_managers"]:
|
|
||||||
# "--entrypoint" requires command and arguments to be separated.
|
|
||||||
# This Docker 'run' option is used to prevent the shell from printing
|
|
||||||
# a login message, if any.
|
|
||||||
pkglist_cmd = pkgmgr_cmd[pkgmgr][0]
|
|
||||||
pkglist_cmdargs = pkgmgr_cmd[pkgmgr][1].split(" ")
|
|
||||||
listformat_cmd = pkgmgr_cmd[pkgmgr][2]
|
|
||||||
logging.info(f"Checking '{pkgmgr}'")
|
|
||||||
# pkglist_process = subprocess.run(["docker", "run", "--rm", config["image_name"]] + pkglist_cmd.split(" "), cwd=path, capture_output=True)
|
|
||||||
pkglist_process = subprocess.run(["docker", "run", "--rm", "--entrypoint", pkglist_cmd, artifact_name] + pkglist_cmdargs, cwd=path, capture_output=True)
|
|
||||||
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
|
|
||||||
pkglist = format_process.stdout.decode("utf-8")
|
|
||||||
pkglist_file.write(pkglist)
|
|
||||||
# Python venvs:
|
|
||||||
logging.info("Checking Python venvs")
|
|
||||||
for venv in config["python_venvs"]:
|
|
||||||
pipcmd = pkgmgr_cmd["pip"][0]
|
|
||||||
pipcmd_args = pkgmgr_cmd["pip"][1]
|
|
||||||
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", venv["path"], "--entrypoint", venv["path"] + "/bin/" + pipcmd, artifact_name] + pipcmd_args.split(" "), cwd=path, capture_output=True)
|
|
||||||
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
|
|
||||||
pkglist = format_process.stdout.decode("utf-8")
|
|
||||||
pkglist_file.write(pkglist)
|
|
||||||
|
|
||||||
# Git packages:
|
|
||||||
logging.info("Checking Git packages")
|
|
||||||
for repo in config["git_packages"]:
|
|
||||||
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", repo["location"], "--entrypoint", gitcmd[0], artifact_name] + gitcmd[1].split(" "), cwd=path, capture_output=True)
|
|
||||||
repo_row = f"{repo['name']},{pkglist_process.stdout.decode('utf-8')},git,{artifact_name},{timestamp}"
|
|
||||||
pkglist_file.write(f"{repo_row}\n")
|
|
||||||
|
|
||||||
# Misc packages:
|
|
||||||
logging.info("Checking miscellaneous packages")
|
|
||||||
for pkg in config["misc_packages"]:
|
|
||||||
logging.info(f"Downloading package {pkg['name']} from {pkg['url']}")
|
|
||||||
pkg_file = tempfile.NamedTemporaryFile()
|
|
||||||
pkg_path = pkg_file.name
|
|
||||||
pkg_hash = download_file(pkg["url"], pkg_path)
|
|
||||||
# Package hash will be an empty string if download failed:
|
|
||||||
pkg_row = f"{pkg['name']},{pkg_hash},misc,{artifact_name},{timestamp}"
|
|
||||||
pkglist_file.write(f"{pkg_row}\n")
|
|
||||||
pkglist_file.close()
|
|
||||||
|
|
||||||
def remove_image(config, image_name):
|
|
||||||
"""
|
|
||||||
Removes the Docker image given in 'config'.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
config: dict
|
|
||||||
Parsed config file.
|
|
||||||
|
|
||||||
image_name: str
|
|
||||||
Name of the Docker image.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
logging.info(f"Removing image '{image_name}'")
|
|
||||||
subprocess.run(["docker", "rmi", image_name], capture_output = True)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# Paths:
|
|
||||||
config_path = ""
|
|
||||||
pkglist_path = "" # Package list being generated
|
|
||||||
buildstatus_path = "" # Status of the build process of the image, when it fails
|
|
||||||
arthashlog_path = "" # Log of the hash of the downloaded artifact
|
|
||||||
cache_dir = "" # Artifact cache directory, when using one. 'None' value indicates no cache.
|
|
||||||
use_cache = False
|
|
||||||
|
|
||||||
# Command line arguments parsing:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog = "ecg",
|
|
||||||
description =
|
|
||||||
"""
|
|
||||||
ECG is a program that automates software environment checking for scientific artifacts.
|
|
||||||
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
# parser.add_argument(
|
|
||||||
# '-v', '--verbose',
|
|
||||||
# action = 'store_true',
|
|
||||||
# help = "Shows more details on what is being done."
|
|
||||||
# )
|
|
||||||
parser.add_argument(
|
|
||||||
"config",
|
|
||||||
help = "The path to the configuration file of the artifact's Docker image."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-p", "--pkg-list",
|
|
||||||
help = "Path to the file where the package list generated by the program should be written.",
|
|
||||||
required = True
|
|
||||||
)
|
|
||||||
# parser.add_argument(
|
|
||||||
# "-l", "--log-path",
|
|
||||||
# help = "Path to the file where to log the output of the program.",
|
|
||||||
# required = True
|
|
||||||
# )
|
|
||||||
parser.add_argument(
|
|
||||||
"-b", "--build-status",
|
|
||||||
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
|
|
||||||
required = True
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-a", "--artifact-hash",
|
|
||||||
help = "Path to the file where to write the log of the hash of the downloaded artifact.",
|
|
||||||
required = True
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-c", "--cache-dir",
|
|
||||||
help =
|
|
||||||
"""
|
|
||||||
Path to the cache directory, where artifacts that are downloaded will be stored for future usage.
|
|
||||||
If not specified, cache is disabled.
|
|
||||||
""",
|
|
||||||
required = False
|
|
||||||
),
|
|
||||||
parser.add_argument(
|
|
||||||
'--docker-cache',
|
|
||||||
action = 'store_true',
|
|
||||||
help = "Use cache for Docker 'build'."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Setting up the paths of the outputs:
|
|
||||||
pkglist_path = args.pkg_list
|
|
||||||
buildstatus_path = args.build_status
|
|
||||||
arthashlog_path = args.artifact_hash
|
|
||||||
cache_dir = args.cache_dir
|
|
||||||
# log_path = "log.txt" # Output of the program
|
|
||||||
# log_path = args.log_path
|
|
||||||
|
|
||||||
# Creating the output files to avoid complaints from Snakemake about missing
|
|
||||||
# outputs...
|
|
||||||
pathlib.Path(pkglist_path).touch()
|
|
||||||
pathlib.Path(buildstatus_path).touch()
|
|
||||||
pathlib.Path(arthashlog_path).touch()
|
|
||||||
|
|
||||||
# Setting up the log:
|
|
||||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
|
|
||||||
# # Old version where the script writes its own log to the given file:
|
|
||||||
# print(f"Output will be stored in {log_path}")
|
|
||||||
# logging.basicConfig(filename = log_path, filemode = "w", format = '%(levelname)s: %(message)s', level = logging.INFO)
|
|
||||||
# if args.verbose:
|
|
||||||
# logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
|
|
||||||
|
|
||||||
# Parsing the input file including the configuration of the artifact's
|
|
||||||
# image:
|
|
||||||
config_path = args.config
|
|
||||||
status = ""
|
|
||||||
config_file = open(config_path, "r")
|
|
||||||
config = json.loads(config_file.read())
|
|
||||||
config_file.close()
|
|
||||||
|
|
||||||
dl_dir = None
|
|
||||||
# If not using cache, creates a temporary directory:
|
|
||||||
if cache_dir == None:
|
|
||||||
tmp_dir = tempfile.TemporaryDirectory()
|
|
||||||
dl_dir = tmp_dir.name
|
|
||||||
else:
|
|
||||||
use_cache = True
|
|
||||||
dl_dir = cache_dir
|
|
||||||
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
|
|
||||||
artifact_dir = download_sources(config, arthashlog_path, dl_dir, use_cache, artifact_name)
|
|
||||||
# If download was successful:
|
|
||||||
if artifact_dir != "":
|
|
||||||
return_code, build_output = build_image(config, artifact_dir, artifact_name, args.docker_cache)
|
|
||||||
if return_code == 0:
|
|
||||||
status = "success"
|
|
||||||
check_env(config, artifact_dir, artifact_name, pkglist_path)
|
|
||||||
remove_image(config, artifact_name)
|
|
||||||
else:
|
|
||||||
status = builderror_identifier(build_output)
|
|
||||||
# Creates file if not already:
|
|
||||||
pathlib.Path(pkglist_path).touch()
|
|
||||||
# If download failed, we need to save the error to the build status log:
|
|
||||||
else:
|
|
||||||
logging.fatal("Artifact could not be downloaded!")
|
|
||||||
status = "artifact_unavailable"
|
|
||||||
# except Exception as err:
|
|
||||||
# # Handles any possible script's own crashes:
|
|
||||||
# formatted_err = str(''.join(traceback.format_exception(None, err, err.__traceback__)))
|
|
||||||
# log_file = open(log_path, "a")
|
|
||||||
# log_file.write(formatted_err)
|
|
||||||
# log_file.close()
|
|
||||||
# logging.error(formatted_err)
|
|
||||||
# status = "script_crash"
|
|
||||||
buildresult_saver(status, buildstatus_path, config_path)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
265
ecg/app/ecg.py
Executable file
265
ecg/app/ecg.py
Executable file
@ -0,0 +1,265 @@
|
|||||||
|
import subprocess
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import zipfile
|
||||||
|
import tarfile
|
||||||
|
import pathlib
|
||||||
|
import logging
|
||||||
|
import datetime
|
||||||
|
import sys
|
||||||
|
import string
|
||||||
|
import traceback
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
def download_file_and_get_hash(url, dest_path):
|
||||||
|
file_hash = "-1"
|
||||||
|
try:
|
||||||
|
req = requests.get(url)
|
||||||
|
if req.status_code != 404:
|
||||||
|
with open(dest_path, "wb") as file:
|
||||||
|
file.write(req.content)
|
||||||
|
file_hash = hashlib.sha256(req.content).hexdigest()
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
# We can just ignore this exception, as we will just return an empty
|
||||||
|
# hash to indicate the error:
|
||||||
|
pass
|
||||||
|
return file_hash
|
||||||
|
|
||||||
|
def download_sources(url, archive_type, arthashlog_path, dl_dir, artifact_name):
|
||||||
|
logging.info(f"Downloading artifact from {url}")
|
||||||
|
|
||||||
|
artifact_dir = ""
|
||||||
|
|
||||||
|
tmp_artifact_file = tempfile.NamedTemporaryFile()
|
||||||
|
tmp_artifact_path = artifact_file.name
|
||||||
|
artifact_hash = download_file_and_get_hash(url, tmp_artifact_path)
|
||||||
|
|
||||||
|
if artifact_hash != "-1":
|
||||||
|
logging.info(f"Extracting artifact at {artifact_dir}")
|
||||||
|
artcache_dir = f"ecg_{artifact_hash[:9]}"
|
||||||
|
artifact_dir = os.path.join(dl_dir, artcache_dir)
|
||||||
|
extractors = {
|
||||||
|
"zip": zipfile.ZipFile,
|
||||||
|
"tar": tarfile.open
|
||||||
|
}
|
||||||
|
os.mkdir(artifact_dir)
|
||||||
|
extractors[archive_type](artifact_path).extractall(artifact_dir)
|
||||||
|
|
||||||
|
with open(arthashlog_path, "w") as arthashlog_file:
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
timestamp = str(datetime.datetime.timestamp(now))
|
||||||
|
arthashlog_file.write(f"{timestamp},{artifact_hash},{artifact_name}\n")
|
||||||
|
|
||||||
|
return artifact_dir
|
||||||
|
|
||||||
|
def builderror_identifier(output):
|
||||||
|
build_errors = {
|
||||||
|
"package_install_failed": ("Unable to locate package", "error: failed to compile"),
|
||||||
|
"baseimage_unavailable": ("manifest unknown: manifest unknown",),
|
||||||
|
"dockerfile_not_found": ("Dockerfile: no such file or directory",)
|
||||||
|
}
|
||||||
|
for error_cat, error_msgs in build_errors.items():
|
||||||
|
for error in error_msgs:
|
||||||
|
if error in output:
|
||||||
|
return error_cat
|
||||||
|
return "unknown_error"
|
||||||
|
|
||||||
|
def buildresult_saver(result, buildstatus_path, config_path):
|
||||||
|
with open(buildstatus_path, "w") as buildstatus_file:
|
||||||
|
artifact_name = os.path.basename(config_path).split(".")[0]
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
timestamp = str(datetime.datetime.timestamp(now))
|
||||||
|
buildstatus_file.write(f"{artifact_name},{timestamp},{result}\n")
|
||||||
|
|
||||||
|
def build_image(path, image_name):
|
||||||
|
logging.info(f"Starting building image {image_name}")
|
||||||
|
path = os.path.join(src_dir, config["buildfile_dir"])
|
||||||
|
build_command = f"docker build --no-cache -t {image_name} ."
|
||||||
|
build_process = subprocess.run(build_command.split(" "), cwd=path, capture_output=True)
|
||||||
|
build_output = f"stdout:\n{build_process.stdout.decode('utf-8')}\nstderr:\n{build_process.stderr.decode('utf-8')}"
|
||||||
|
logging.info(f"Output of '{build_command}':\n\n{build_output}")
|
||||||
|
return_code = build_process.returncode
|
||||||
|
logging.info(f"Command '{build_command}' exited with code {return_code}")
|
||||||
|
return return_code, build_output
|
||||||
|
|
||||||
|
def check_env(config, src_dir, artifact_name, pkglist_path):
|
||||||
|
"""
|
||||||
|
Builds a list of all software packages installed in the
|
||||||
|
Docker image given in 'config', depending on the package managers
|
||||||
|
specified in the configuration, then stores it in a CSV file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
config: dict
|
||||||
|
Parsed config file.
|
||||||
|
|
||||||
|
src_dir: str
|
||||||
|
Path to the directory where the artifact is stored.
|
||||||
|
|
||||||
|
artifact_name: str
|
||||||
|
Name of the artifact. Used both as the Docker image name, and for the
|
||||||
|
packages list for tracking purpose during the output analysis.
|
||||||
|
|
||||||
|
pkglist_path: str
|
||||||
|
Path to the package list file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
# Saving the current time to add it to every row:
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
timestamp = str(datetime.datetime.timestamp(now))
|
||||||
|
|
||||||
|
# Commands to list installed packages along with their versions and the name
|
||||||
|
# of the package manager, depending on the package managers.
|
||||||
|
# Each package manager is associated with a tuple, the first item being
|
||||||
|
# the package manager's command, the second being the arguments for the
|
||||||
|
# query (they must be separated for the "--entrypoint" argument of Docker
|
||||||
|
# 'run', see below), and the third one being the command that will format
|
||||||
|
# the output of the query command (this one can be an empty string in case
|
||||||
|
# the formatting part is already done using the options of the first command).
|
||||||
|
# The first command needs to be run on the container, and the second on the
|
||||||
|
# host, to take into account container images that do not have the formatting
|
||||||
|
# packages installed.
|
||||||
|
pkgmgr_cmd = {
|
||||||
|
"dpkg": ("dpkg",\
|
||||||
|
"-l",\
|
||||||
|
f"awk 'NR>5 {{print $2 \",\" $3 \",dpkg,{artifact_name},{timestamp}\"}}'"), \
|
||||||
|
"rpm":("rpm",\
|
||||||
|
f"-qa --queryformat '%{{NAME}},%{{VERSION}},rpm,{artifact_name},{timestamp}\\n'",\
|
||||||
|
""), \
|
||||||
|
"pacman":("pacman",\
|
||||||
|
"-Q",\
|
||||||
|
f"awk '{{print $0 \",\" $1 \",pacman,{artifact_name},{timestamp}\"}}'"), \
|
||||||
|
"pip":("pip",\
|
||||||
|
"list",\
|
||||||
|
f"awk 'NR>2 {{print $1 \",\" $2 \",\" \"pip,{artifact_name},{timestamp}\"}}'"), \
|
||||||
|
"conda":("/root/.conda/bin/conda",\
|
||||||
|
"list -e",\
|
||||||
|
f"sed 's/=/ /g' | awk 'NR>3 {{print $1 \",\" $2 \",conda,{artifact_name},{timestamp}\"}}'")
|
||||||
|
}
|
||||||
|
# Command to obtain the latest commit hash in a git repository (separated
|
||||||
|
# into 2 parts for "--entrypoint"):
|
||||||
|
gitcmd = ("git", "log -n 1 --pretty=format:%H")
|
||||||
|
|
||||||
|
logging.info("Checking software environment")
|
||||||
|
pkglist_file = open(pkglist_path, "w")
|
||||||
|
path = os.path.join(src_dir, config["buildfile_dir"])
|
||||||
|
# Package managers:
|
||||||
|
for pkgmgr in config["package_managers"]:
|
||||||
|
# "--entrypoint" requires command and arguments to be separated.
|
||||||
|
# This Docker 'run' option is used to prevent the shell from printing
|
||||||
|
# a login message, if any.
|
||||||
|
pkglist_cmd = pkgmgr_cmd[pkgmgr][0]
|
||||||
|
pkglist_cmdargs = pkgmgr_cmd[pkgmgr][1].split(" ")
|
||||||
|
listformat_cmd = pkgmgr_cmd[pkgmgr][2]
|
||||||
|
logging.info(f"Checking '{pkgmgr}'")
|
||||||
|
pkglist_process = subprocess.run(["docker", "run", "--rm", "--entrypoint", pkglist_cmd, artifact_name] + pkglist_cmdargs, cwd=path, capture_output=True)
|
||||||
|
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
|
||||||
|
pkglist = format_process.stdout.decode("utf-8")
|
||||||
|
pkglist_file.write(pkglist)
|
||||||
|
|
||||||
|
# Python venvs:
|
||||||
|
logging.info("Checking Python venvs")
|
||||||
|
for venv in config["python_venvs"]:
|
||||||
|
pipcmd = pkgmgr_cmd["pip"][0]
|
||||||
|
pipcmd_args = pkgmgr_cmd["pip"][1]
|
||||||
|
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", venv["path"], "--entrypoint", venv["path"] + "/bin/" + pipcmd, artifact_name] + pipcmd_args.split(" "), cwd=path, capture_output=True)
|
||||||
|
format_process = subprocess.run(f"cat << EOF | {listformat_cmd}\n{pkglist_process.stdout.decode('utf-8')}EOF", cwd=path, capture_output=True, shell=True)
|
||||||
|
pkglist = format_process.stdout.decode("utf-8")
|
||||||
|
pkglist_file.write(pkglist)
|
||||||
|
|
||||||
|
# Git packages:
|
||||||
|
logging.info("Checking Git packages")
|
||||||
|
for repo in config["git_packages"]:
|
||||||
|
pkglist_process = subprocess.run(["docker", "run", "--rm", "-w", repo["location"], "--entrypoint", gitcmd[0], artifact_name] + gitcmd[1].split(" "), cwd=path, capture_output=True)
|
||||||
|
repo_row = f"{repo['name']},{pkglist_process.stdout.decode('utf-8')},git,{artifact_name},{timestamp}"
|
||||||
|
pkglist_file.write(f"{repo_row}\n")
|
||||||
|
|
||||||
|
# Misc packages:
|
||||||
|
logging.info("Checking miscellaneous packages")
|
||||||
|
for pkg in config["misc_packages"]:
|
||||||
|
logging.info(f"Downloading package {pkg['name']} from {pkg['url']}")
|
||||||
|
with tempfile.NamedTemporaryFile() as pkg_file:
|
||||||
|
pkg_hash = download_file_and_get_hash(pkg["url"], pkg_file.name)
|
||||||
|
pkglist_file.write(f"{pkg['name']},{pkg_hash},misc,{artifact_name},{timestamp}\n")
|
||||||
|
pkglist_file.close()
|
||||||
|
|
||||||
|
def remove_image(image_name):
|
||||||
|
logging.info(f"Removing image '{image_name}'")
|
||||||
|
subprocess.run(["docker", "rmi", image_name], capture_output = True)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog = "ecg",
|
||||||
|
description =
|
||||||
|
"""
|
||||||
|
ECG is a program that automates software environment checking for scientific artifacts.
|
||||||
|
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"config",
|
||||||
|
help = "The path to the configuration file of the artifact's Docker image."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-p", "--pkg-list",
|
||||||
|
help = "Path to the file where the package list generated by the program should be written.",
|
||||||
|
required = True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-b", "--build-status",
|
||||||
|
help = "Path to the file where to write the build status of the Docker image given in the configuration file.",
|
||||||
|
required = True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-a", "--artifact-hash",
|
||||||
|
help = "Path to the file where to write the log of the hash of the downloaded artifact.",
|
||||||
|
required = True
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
|
||||||
|
|
||||||
|
config_path = args.config
|
||||||
|
with open(config_path, "r") as config_file:
|
||||||
|
config = json.loads(config_file.read())
|
||||||
|
|
||||||
|
artifact_name = os.path.splitext(os.path.basename(config_path))[0]
|
||||||
|
|
||||||
|
ecg(artifact_name, config, args.pkg_list, args.build_status, args.artifact_hash)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def ecg(artifact_name, config, pkglist_path, buildstatus_path, arthashlog_path):
|
||||||
|
# just in case Snakemake does not create them
|
||||||
|
pathlib.Path(pkglist_path).touch()
|
||||||
|
pathlib.Path(buildstatus_path).touch()
|
||||||
|
pathlib.Path(arthashlog_path).touch()
|
||||||
|
|
||||||
|
status = ""
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
dl_dir = tmp_dir.name
|
||||||
|
artifact_dir = download_sources(config["url"], config["type"], arthashlog_path, dl_dir, artifact_name)
|
||||||
|
|
||||||
|
if artifact_dir != "":
|
||||||
|
path = os.path.join(artifact_dir, config["buildfile_dir"])
|
||||||
|
return_code, build_output = build_image(path, artifact_name)
|
||||||
|
if return_code == 0:
|
||||||
|
status = "success"
|
||||||
|
check_env(config, artifact_dir, artifact_name, pkglist_path)
|
||||||
|
remove_image(artifact_name)
|
||||||
|
else:
|
||||||
|
status = builderror_identifier(build_output)
|
||||||
|
else:
|
||||||
|
logging.fatal("Artifact could not be downloaded!")
|
||||||
|
status = "artifact_unavailable"
|
||||||
|
buildresult_saver(status, buildstatus_path, config_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
3
ecg/run.py
Normal file
3
ecg/run.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from app import *
|
||||||
|
|
||||||
|
ecg.main()
|
37
ecg/setup.py
Normal file
37
ecg/setup.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
# Application name:
|
||||||
|
name="ecg",
|
||||||
|
|
||||||
|
# Version number (initial):
|
||||||
|
version="0.0.1",
|
||||||
|
|
||||||
|
# Application author details:
|
||||||
|
author="Quentin Guilloteau, Antoine Waehren",
|
||||||
|
author_email="Quentin.Guilloteau@unibas.ch, Antoine.Waehren@stud.unibas.ch",
|
||||||
|
|
||||||
|
# Packages
|
||||||
|
packages=["app"],
|
||||||
|
|
||||||
|
# Include additional files into the package
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': ['ecg=app.ecg:main'],
|
||||||
|
},
|
||||||
|
|
||||||
|
# Details
|
||||||
|
url="https://forge.chapril.org/GuilloteauQ/study-docker-repro-longevity",
|
||||||
|
|
||||||
|
description="Test the software environment of Dockerfiles from research artifacts",
|
||||||
|
|
||||||
|
long_description="""
|
||||||
|
ECG is a program that automates software environment checking for scientific artifacts.
|
||||||
|
It is meant to be executed periodically to analyze variations in the software environment of the artifact through time.
|
||||||
|
""",
|
||||||
|
|
||||||
|
install_requires=[
|
||||||
|
"requests",
|
||||||
|
],
|
||||||
|
|
||||||
|
include_package_data=True,
|
||||||
|
)
|
36
flake.nix
36
flake.nix
@ -15,28 +15,22 @@
|
|||||||
kapkgs = kapack.packages.${system};
|
kapkgs = kapack.packages.${system};
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
packages = {
|
||||||
|
ecg = pkgs.python3Packages.buildPythonPackage {
|
||||||
|
name = "ecg";
|
||||||
|
version = "0.0.1";
|
||||||
|
src = ./ecg;
|
||||||
|
propagatedBuildInputs = with (pkgs.python3Packages); [
|
||||||
|
requests
|
||||||
|
];
|
||||||
|
doCheck = false;
|
||||||
|
};
|
||||||
|
};
|
||||||
devShells = {
|
devShells = {
|
||||||
default = pkgs.mkShell {
|
default = import ./workflow/envs/snakemake.nix { inherit pkgs kapkgs; };
|
||||||
packages = with pkgs; [
|
nickel = import ./workflow/envs/nickel.nix { inherit pkgs kapkgs; };
|
||||||
snakemake
|
latex = import ./workflow/envs/latex.nix { inherit pkgs kapkgs; };
|
||||||
gawk
|
analysis = import ./workflow/envs/analysis.nix { inherit pkgs kapkgs; };
|
||||||
gnused
|
|
||||||
nickel
|
|
||||||
graphviz
|
|
||||||
# TODO separate into several shells
|
|
||||||
(python3.withPackages (ps: with ps; [
|
|
||||||
requests
|
|
||||||
kapkgs.execo
|
|
||||||
]))
|
|
||||||
(rWrapper.override { packages = with rPackages; [ tidyverse reshape2 ]; })
|
|
||||||
];
|
|
||||||
};
|
|
||||||
latex = pkgs.mkShell {
|
|
||||||
packages = with pkgs; [
|
|
||||||
texliveFull
|
|
||||||
rubber
|
|
||||||
];
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -8,24 +8,18 @@ DATE = datetime.datetime.now().strftime("%Y%m%d")
|
|||||||
|
|
||||||
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
|
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
|
||||||
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
|
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
|
||||||
BLACKLIST_FOLDER = config["folder_blacklists"]
|
|
||||||
EXTENSION = "json"
|
|
||||||
SYSTEM = config["system"]
|
SYSTEM = config["system"]
|
||||||
|
CONFERENCE = config["conference"]
|
||||||
|
|
||||||
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL, BLACKLIST_FOLDER)
|
ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + CONFERENCE)
|
||||||
|
|
||||||
PREFIX = config["prefix"]
|
PREFIX = config["prefix"]
|
||||||
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
|
ECG_OUTPUTS = ["pkgs", "build_status", "artifact_hash"]
|
||||||
SHELLS_ECG = {
|
|
||||||
"local": f"./{{input.ecg_wrapper}} {{input.ecg}} {ARTIFACTS_FOLDER_JSON}/{{wildcards.artifact}}.{EXTENSION} {{output.pkg}} {{output.build_status}} {{output.artifact_hash}} {{output.log}}",
|
|
||||||
"g5k": f"python3 {{input.execo_wrapper}} --path {os.getcwd()} --script {{input.oar_wrapper}} --site {config['site']} --cluster {config['cluster']} --max-duration {config['max_duration']} --checkpoint {config['checkpoint']} {'--besteffort' if config['besteffort'] else ''} --sleep_time {config['sleep_time']} --build_status_file {{output.build_status}} --artifact {{wildcards.artifact}} -- '"
|
|
||||||
}
|
|
||||||
|
|
||||||
ANALYSIS_DIR = config["analysis_dir"]
|
ANALYSIS_DIR = config["analysis_dir"]
|
||||||
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
|
ANALYSIS_CATS = ["sources_stats", "pkgs_changes", "build_status", "artifact"]
|
||||||
ANALYSIS_SCRIPTS_DIR = "analysis"
|
ANALYSIS_SCRIPTS_DIR = "analysis"
|
||||||
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
|
ANALYSIS_WRAPPER = "workflow/scripts/analysis_wrapper.sh"
|
||||||
AGGREGATE_WRAPPER = "workflow/scripts/aggregate_wrapper.sh"
|
|
||||||
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
|
ARTIFACT_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/artifact_hash/{{artifact}}",
|
||||||
artifact = ARTIFACTS
|
artifact = ARTIFACTS
|
||||||
))
|
))
|
||||||
@ -34,42 +28,38 @@ SOFTENV_ANALYSIS_DIRS = " ".join(expand(f"{PREFIX}/pkgs/{{artifact}}",
|
|||||||
))
|
))
|
||||||
|
|
||||||
PLOT_DIR = config["plot_dir"]
|
PLOT_DIR = config["plot_dir"]
|
||||||
PLOT_SCRIPT = "plot/plot.r"
|
|
||||||
PLOT_HEADERS = {
|
PLOT_HEADERS = {
|
||||||
"softenv": "dpkg rpm pacman pip conda git misc",
|
#"softenv": "dpkg rpm pacman pip conda git misc",
|
||||||
|
"sources_stats": "dpkg rpm pacman pip conda git misc",
|
||||||
|
"pkgs_changes": "dpkg rpm pacman pip conda git misc",
|
||||||
"build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error",
|
"build_status": "success package_install_failed baseimage_unavailable artifact_unavailable dockerfile_not_found script_crash job_time_exceeded unknown_error",
|
||||||
"artifact": "available unavailable changed"
|
"artifact": "available unavailable changed"
|
||||||
}
|
}
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/line/{{date}}.pdf",
|
expand(f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",\
|
||||||
analysis_cat = ANALYSIS_CATS,
|
conference=config['conference'],\
|
||||||
date = DATE
|
artifact=ARTIFACTS,\
|
||||||
),
|
date = DATE)
|
||||||
expand(f"{ANALYSIS_DIR}/{{analysis_cat}}/plot/bar/{{date}}.pdf",
|
|
||||||
analysis_cat = ["sources_stats", "build_status", "artifact"],
|
|
||||||
date = DATE
|
|
||||||
),
|
|
||||||
f"{BLACKLIST_FOLDER}/{DATE}.csv"
|
|
||||||
|
|
||||||
# Artifacts configuration files:
|
# Artifacts configuration files:
|
||||||
|
|
||||||
rule check_all:
|
rule check_all:
|
||||||
input:
|
input:
|
||||||
expand(f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json", artifact=ARTIFACTS)
|
expand(f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json", artifact=ARTIFACTS, conference=config['conference'])
|
||||||
|
|
||||||
rule check_artifact:
|
rule check_artifact:
|
||||||
input:
|
input:
|
||||||
"flake.nix",
|
"flake.nix",
|
||||||
"flake.lock",
|
"flake.lock",
|
||||||
contract="workflow/nickel/artifact_contract.ncl",
|
contract="workflow/nickel/artifact_contract.ncl",
|
||||||
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{artifact}}.ncl"
|
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl"
|
||||||
output:
|
output:
|
||||||
f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.json"
|
f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
|
||||||
shell:
|
shell:
|
||||||
"""
|
"""
|
||||||
nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
|
nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# ECG:
|
# ECG:
|
||||||
@ -78,157 +68,104 @@ rule run_ecg:
|
|||||||
input:
|
input:
|
||||||
"flake.nix",
|
"flake.nix",
|
||||||
"flake.lock",
|
"flake.lock",
|
||||||
ecg="ecg.py",
|
ecg="ecg/app/ecg.py",
|
||||||
ecg_wrapper="workflow/scripts/ecg_wrapper.sh",
|
|
||||||
execo_wrapper="workflow/scripts/submission_g5k.py",
|
execo_wrapper="workflow/scripts/submission_g5k.py",
|
||||||
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
|
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
|
||||||
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{artifact}}.{EXTENSION}"
|
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
|
||||||
output:
|
output:
|
||||||
log = f"{PREFIX}/logs/{{artifact}}/{{date}}.txt",
|
log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt",
|
||||||
pkg = f"{PREFIX}/pkgs/{{artifact}}/{{date}}.csv",
|
pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv",
|
||||||
build_status = f"{PREFIX}/build_status/{{artifact}}/{{date}}.csv",
|
build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",
|
||||||
artifact_hash = f"{PREFIX}/artifact_hash/{{artifact}}/{{date}}.csv",
|
artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv",
|
||||||
shell:
|
shell:
|
||||||
(SHELLS_ECG["g5k"] if SYSTEM == "g5k" else "") + SHELLS_ECG["local"] + ("'" if SYSTEM == "g5k" else "")
|
(f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \
|
||||||
|
--script {{input.oar_wrapper}} \
|
||||||
rule update_blacklist:
|
--site {config['site']} \
|
||||||
input:
|
--cluster {config['cluster']} \
|
||||||
build_status=expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
|
--max-duration {config['max_duration']} \
|
||||||
artifact=ARTIFACTS
|
--checkpoint {config['checkpoint']} \
|
||||||
)
|
{'--besteffort' if config['besteffort'] else ''} \
|
||||||
output:
|
--sleep_time {config['sleep_time']} \
|
||||||
f"{BLACKLIST_FOLDER}/{{date}}.csv"
|
--build_status_file {{output.build_status}} \
|
||||||
shell:
|
--artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \
|
||||||
# We need to ignore lines where build is successful:
|
"""
|
||||||
f"cat {{input}} | grep -v ',success' > {{output}} || true"
|
nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status}
|
||||||
|
""" + \
|
||||||
|
("'" if SYSTEM == "g5k" else "")
|
||||||
|
|
||||||
# Analysis:
|
# Analysis:
|
||||||
|
|
||||||
rule softenv_analysis:
|
#rule softenv_analysis:
|
||||||
wildcard_constraints:
|
# wildcard_constraints:
|
||||||
date="\d+"
|
# date="\d+"
|
||||||
input:
|
# input:
|
||||||
expand(f"{PREFIX}/pkgs/{{artifact}}/{{{{date}}}}.csv",
|
# expand(f"{PREFIX}{{conference}}/pkgs/{{artifact}}/{{{{date}}}}.csv",
|
||||||
artifact = ARTIFACTS
|
# artifact = ARTIFACTS
|
||||||
)
|
# )
|
||||||
output:
|
# output:
|
||||||
sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
|
# sources_stats = f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv",
|
||||||
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
|
# pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
|
||||||
shell:
|
# shell:
|
||||||
f"""
|
# f"""
|
||||||
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
|
# {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t sources-stats {{output.sources_stats}} {{input}}
|
||||||
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
|
# {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/softenv_analysis.py -t pkgs-changes {{output.pkgs_changes}} {SOFTENV_ANALYSIS_DIRS}
|
||||||
"""
|
# """
|
||||||
|
#
|
||||||
rule buildstatus_analysis:
|
#rule buildstatus_analysis:
|
||||||
wildcard_constraints:
|
# wildcard_constraints:
|
||||||
date="\d+"
|
# date="\d+"
|
||||||
input:
|
# input:
|
||||||
expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
|
# expand(f"{PREFIX}/build_status/{{artifact}}/{{{{date}}}}.csv",
|
||||||
artifact = ARTIFACTS
|
# artifact = ARTIFACTS
|
||||||
),
|
# ),
|
||||||
output:
|
# output:
|
||||||
f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
|
# f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
|
||||||
shell:
|
# shell:
|
||||||
f"""
|
# f"""
|
||||||
{ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
|
# {ANALYSIS_WRAPPER} files {ANALYSIS_SCRIPTS_DIR}/buildstatus_analysis.py {{output}} {{input}}
|
||||||
"""
|
# """
|
||||||
|
#
|
||||||
rule artifact_analysis:
|
#rule artifact_analysis:
|
||||||
wildcard_constraints:
|
# wildcard_constraints:
|
||||||
date="\d+"
|
# date="\d+"
|
||||||
input:
|
# input:
|
||||||
expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
|
# expand(f"{PREFIX}/artifact_hash/{{artifact}}/{{{{date}}}}.csv",
|
||||||
artifact = ARTIFACTS
|
# artifact = ARTIFACTS
|
||||||
)
|
# )
|
||||||
output:
|
# output:
|
||||||
f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
|
# f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
|
||||||
shell:
|
# shell:
|
||||||
f"""
|
# f"""
|
||||||
{ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
|
# {ANALYSIS_WRAPPER} dirs {ANALYSIS_SCRIPTS_DIR}/artifact_analysis.py {{output}} {ARTIFACT_ANALYSIS_DIRS}
|
||||||
"""
|
# """
|
||||||
|
#
|
||||||
# Analysis aggregate:
|
## Analysis aggregate:
|
||||||
|
#
|
||||||
rule analysis_aggregate:
|
#rule analysis_aggregate:
|
||||||
input:
|
# input:
|
||||||
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
|
# expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
|
||||||
cat = ANALYSIS_CATS,
|
# cat = ANALYSIS_CATS,
|
||||||
date = DATE
|
# date = DATE
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
rule pkgschgs_aggregate:
|
#rule aggregate_by_type:
|
||||||
input:
|
# input:
|
||||||
f"{ANALYSIS_DIR}/pkgs_changes/{{date}}.csv"
|
# data=f"{ANALYSIS_DIR}/{{type}}/{{date}}.csv",
|
||||||
output:
|
# script="workflow/scripts/aggregate_wrapper.sh"
|
||||||
f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv"
|
# output:
|
||||||
shell:
|
# f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv"
|
||||||
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/pkgs_changes {{output}}"
|
# shell:
|
||||||
|
# f"{{input.script}} {ANALYSIS_DIR}/{{type}} {{output}}"
|
||||||
rule srcsstats_aggregate:
|
#
|
||||||
input:
|
## Plot:
|
||||||
f"{ANALYSIS_DIR}/sources_stats/{{date}}.csv"
|
#
|
||||||
output:
|
#rule plot:
|
||||||
f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv"
|
# input:
|
||||||
shell:
|
# script = "plot/plot.r",
|
||||||
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/sources_stats {{output}}"
|
# data = f"{ANALYSIS_DIR}/{{type}}/aggregated/{{date}}.csv",
|
||||||
|
# output:
|
||||||
rule artifact_aggregate:
|
# f"{ANALYSIS_DIR}/{{type}}/{{plot}}/{{date}}.pdf"
|
||||||
input:
|
# params:
|
||||||
f"{ANALYSIS_DIR}/artifact/{{date}}.csv"
|
# header = lambda w: PLOT_HEADERS[w.type]
|
||||||
output:
|
# shell:
|
||||||
f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
|
# "Rscript {input.script} {wildcards.plot} {input.data} {output} {params.header} timestamp"
|
||||||
shell:
|
|
||||||
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/artifact {{output}}"
|
|
||||||
|
|
||||||
rule buildstatus_aggregate:
|
|
||||||
input:
|
|
||||||
f"{ANALYSIS_DIR}/build_status/{{date}}.csv"
|
|
||||||
output:
|
|
||||||
f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv"
|
|
||||||
shell:
|
|
||||||
f"{AGGREGATE_WRAPPER} {ANALYSIS_DIR}/build_status {{output}}"
|
|
||||||
|
|
||||||
# Plot:
|
|
||||||
|
|
||||||
rule all_plot:
|
|
||||||
input:
|
|
||||||
expand(f"{ANALYSIS_DIR}/{{cat}}/aggregated/{{date}}.csv",
|
|
||||||
cat = ANALYSIS_CATS,
|
|
||||||
date = DATE
|
|
||||||
)
|
|
||||||
|
|
||||||
rule line_plot:
|
|
||||||
input:
|
|
||||||
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
|
|
||||||
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/aggregated/{{date}}.csv",
|
|
||||||
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
|
|
||||||
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
|
|
||||||
output:
|
|
||||||
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/line/{{date}}.pdf",
|
|
||||||
pkgs_changes = f"{ANALYSIS_DIR}/pkgs_changes/plot/line/{{date}}.pdf",
|
|
||||||
build_status = f"{ANALYSIS_DIR}/build_status/plot/line/{{date}}.pdf",
|
|
||||||
artifact = f"{ANALYSIS_DIR}/artifact/plot/line/{{date}}.pdf"
|
|
||||||
shell:
|
|
||||||
f"""
|
|
||||||
Rscript {PLOT_SCRIPT} line {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
|
|
||||||
Rscript {PLOT_SCRIPT} line {{input.pkgs_changes}} {{output.pkgs_changes}} {PLOT_HEADERS["softenv"]} timestamp
|
|
||||||
Rscript {PLOT_SCRIPT} line {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
|
|
||||||
Rscript {PLOT_SCRIPT} line {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
|
|
||||||
"""
|
|
||||||
|
|
||||||
rule bar_plot:
|
|
||||||
input:
|
|
||||||
sources_stats = f"{ANALYSIS_DIR}/sources_stats/aggregated/{{date}}.csv",
|
|
||||||
build_status = f"{ANALYSIS_DIR}/build_status/aggregated/{{date}}.csv",
|
|
||||||
artifact = f"{ANALYSIS_DIR}/artifact/aggregated/{{date}}.csv"
|
|
||||||
output:
|
|
||||||
sources_stats = f"{ANALYSIS_DIR}/sources_stats/plot/bar/{{date}}.pdf",
|
|
||||||
build_status = f"{ANALYSIS_DIR}/build_status/plot/bar/{{date}}.pdf",
|
|
||||||
artifact = f"{ANALYSIS_DIR}/artifact/plot/bar/{{date}}.pdf"
|
|
||||||
shell:
|
|
||||||
f"""
|
|
||||||
Rscript {PLOT_SCRIPT} bar {{input.sources_stats}} {{output.sources_stats}} {PLOT_HEADERS["softenv"]} timestamp
|
|
||||||
Rscript {PLOT_SCRIPT} bar {{input.build_status}} {{output.build_status}} {PLOT_HEADERS["build_status"]} timestamp
|
|
||||||
Rscript {PLOT_SCRIPT} bar {{input.artifact}} {{output.artifact}} {PLOT_HEADERS["artifact"]} timestamp
|
|
||||||
"""
|
|
||||||
|
12
workflow/envs/analysis.nix
Normal file
12
workflow/envs/analysis.nix
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{ pkgs, kapkgs }:
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
packages = with pkgs; [
|
||||||
|
(rWrapper.override {
|
||||||
|
packages = with rPackages; [
|
||||||
|
tidyverse
|
||||||
|
reshape2
|
||||||
|
];
|
||||||
|
})
|
||||||
|
];
|
||||||
|
}
|
8
workflow/envs/latex.nix
Normal file
8
workflow/envs/latex.nix
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{ pkgs, kapkgs }:
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
packages = with pkgs; [
|
||||||
|
texliveFull
|
||||||
|
rubber
|
||||||
|
];
|
||||||
|
}
|
7
workflow/envs/nickel.nix
Normal file
7
workflow/envs/nickel.nix
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{ pkgs, kapkgs }:
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
packages = with pkgs; [
|
||||||
|
nickel
|
||||||
|
];
|
||||||
|
}
|
12
workflow/envs/snakemake.nix
Normal file
12
workflow/envs/snakemake.nix
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{ pkgs, kapkgs }:
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
packages = with pkgs; [
|
||||||
|
snakemake
|
||||||
|
gawk
|
||||||
|
gnused
|
||||||
|
(python3.withPackages (ps: with ps; [
|
||||||
|
kapkgs.execo
|
||||||
|
]))
|
||||||
|
];
|
||||||
|
}
|
63
workflow/measure.smk
Normal file
63
workflow/measure.smk
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
configfile: "config/config.yaml"
|
||||||
|
|
||||||
|
include: "utils.smk"
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
ARTIFACTS_FOLDER_NICKEL = config["folder_artifacts_nickel"]
|
||||||
|
ARTIFACTS_FOLDER_JSON = config["folder_artifacts_json"]
|
||||||
|
SYSTEM = config["system"]
|
||||||
|
PREFIX = config["prefix"]
|
||||||
|
|
||||||
|
rule main:
|
||||||
|
input:
|
||||||
|
lambda w: expand(f"{PREFIX}/{{{{conference}}}}/build_status/{{artifact}}/{{{{date}}}}.csv",\
|
||||||
|
artifact=get_artifacts_to_build(ARTIFACTS_FOLDER_NICKEL + "/" + w['conference']))
|
||||||
|
output:
|
||||||
|
"{conference}_{date}.ok"
|
||||||
|
shell:
|
||||||
|
"echo {input} > {output}"
|
||||||
|
|
||||||
|
rule check_artifact:
|
||||||
|
input:
|
||||||
|
"flake.nix",
|
||||||
|
"flake.lock",
|
||||||
|
contract="workflow/nickel/artifact_contract.ncl",
|
||||||
|
artifact=f"{ARTIFACTS_FOLDER_NICKEL}/{{conference}}/{{artifact}}.ncl"
|
||||||
|
output:
|
||||||
|
f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
|
||||||
|
shell:
|
||||||
|
"""
|
||||||
|
nix develop .#nickel --command nickel export --format json --output {output} <<< 'let {{Artifact, ..}} = import "{input.contract}" in ((import "{input.artifact}") | Artifact)'
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ECG:
|
||||||
|
|
||||||
|
rule run_ecg:
|
||||||
|
input:
|
||||||
|
"flake.nix",
|
||||||
|
"flake.lock",
|
||||||
|
ecg="ecg/app/ecg.py",
|
||||||
|
execo_wrapper="workflow/scripts/submission_g5k.py",
|
||||||
|
oar_wrapper="workflow/scripts/ecg_oar_wrapper.oar.bash",
|
||||||
|
artifact=f"{ARTIFACTS_FOLDER_JSON}/{{conference}}/{{artifact}}.json"
|
||||||
|
output:
|
||||||
|
log = f"{PREFIX}/{{conference}}/logs/{{artifact}}/{{date}}.txt",
|
||||||
|
pkg = f"{PREFIX}/{{conference}}/pkgs/{{artifact}}/{{date}}.csv",
|
||||||
|
build_status = f"{PREFIX}/{{conference}}/build_status/{{artifact}}/{{date}}.csv",
|
||||||
|
artifact_hash = f"{PREFIX}/{{conference}}/artifact_hash/{{artifact}}/{{date}}.csv",
|
||||||
|
shell:
|
||||||
|
(f"python3 {{input.execo_wrapper}} --path {os.getcwd()} \
|
||||||
|
--script {{input.oar_wrapper}} \
|
||||||
|
--site {config['site']} \
|
||||||
|
--cluster {config['cluster']} \
|
||||||
|
--max-duration {config['max_duration']} \
|
||||||
|
--checkpoint {config['checkpoint']} \
|
||||||
|
{'--besteffort' if config['besteffort'] else ''} \
|
||||||
|
--sleep_time {config['sleep_time']} \
|
||||||
|
--build_status_file {{output.build_status}} \
|
||||||
|
--artifact {{wildcards.artifact}} -- '" if SYSTEM == "g5k" else "") + \
|
||||||
|
"""
|
||||||
|
nix shell .#ecg --command ecg -p {output.pkg} -b {output.build_status} -a {output.artifact_hash} {input.artifact} &> {output.log} || echo "{input.artifact}, `date +%s.%N`, script_crash" > {output.build_status}
|
||||||
|
""" + \
|
||||||
|
("'" if SYSTEM == "g5k" else "")
|
@ -17,7 +17,7 @@ export PATH=~/.local/bin:$PATH
|
|||||||
g5k-setup-docker -t
|
g5k-setup-docker -t
|
||||||
|
|
||||||
handler() {
|
handler() {
|
||||||
echo "${ARTIFACT_FILE}, `date +%s.%N`, job_time_exceeded" >> ${BUILD_STATUS_FILE}; exit 0;
|
echo "${ARTIFACT_FILE}, `date +%s.%N`, job_time_exceeded" > ${BUILD_STATUS_FILE}; exit 0;
|
||||||
}
|
}
|
||||||
trap handler SIGUSR2
|
trap handler SIGUSR2
|
||||||
|
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
ECG=$1
|
|
||||||
CONFIG=$2
|
|
||||||
PKGLIST=$3
|
|
||||||
BUILD_STATUS=$4
|
|
||||||
ARTHASH_LOG=$5
|
|
||||||
OUTPUT_LOG=$6
|
|
||||||
|
|
||||||
python3 $ECG -p $PKGLIST -b $BUILD_STATUS -a $ARTHASH_LOG $CONFIG > $OUTPUT_LOG 2> $OUTPUT_LOG
|
|
||||||
if [ $? -ne 0 ]
|
|
||||||
then
|
|
||||||
echo "${CONFIG}, `date +%s.%N`, script_crash" >> ${BUILD_STATUS}; exit 0;
|
|
||||||
fi
|
|
@ -13,11 +13,14 @@ def get_blacklisted(blacklist_dir_path):
|
|||||||
blacklisted.add(row[0])
|
blacklisted.add(row[0])
|
||||||
return blacklisted
|
return blacklisted
|
||||||
|
|
||||||
def get_artifacts_to_build(artifacts_folder, blacklist_dir_path):
|
#def get_artifacts_to_build(artifacts_folder, blacklist_dir_path):
|
||||||
blacklisted = get_blacklisted(blacklist_dir_path)
|
# blacklisted = get_blacklisted(blacklist_dir_path)
|
||||||
all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
|
# all_artifacts = set([os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))])
|
||||||
artifacts_to_build = list(all_artifacts.difference(blacklisted))
|
# artifacts_to_build = list(all_artifacts.difference(blacklisted))
|
||||||
if artifacts_to_build != []:
|
# if artifacts_to_build != []:
|
||||||
return list(all_artifacts.difference(blacklisted))
|
# return list(all_artifacts.difference(blacklisted))
|
||||||
else:
|
# else:
|
||||||
raise(Exception(f"There is no artifact to build! Either no artifact configuration files have been found, or they have all been blacklisted."))
|
# raise(Exception(f"There is no artifact to build! Either no artifact configuration files have been found, or they have all been blacklisted."))
|
||||||
|
|
||||||
|
def get_artifacts_to_build(artifacts_folder):
|
||||||
|
return [os.path.splitext(a)[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]
|
||||||
|
Loading…
Reference in New Issue
Block a user