From 27e0552bc88fb763a3f6052ed5edfd05a7487e31 Mon Sep 17 00:00:00 2001 From: Quentin Guilloteau Date: Thu, 11 Jul 2024 13:35:54 +0200 Subject: [PATCH 1/5] add snakemake and awk to nix flake --- flake.lock | 8 ++++---- flake.nix | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/flake.lock b/flake.lock index d3d0f8a..b7deb4e 100644 --- a/flake.lock +++ b/flake.lock @@ -20,16 +20,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1717179513, - "narHash": "sha256-vboIEwIQojofItm2xGCdZCzW96U85l9nDW3ifMuAIdM=", + "lastModified": 1701282334, + "narHash": "sha256-MxCVrXY6v4QmfTwIysjjaX0XUhqBbxTWWB4HXtDYsdk=", "owner": "nixos", "repo": "nixpkgs", - "rev": "63dacb46bf939521bdc93981b4cbb7ecb58427a0", + "rev": "057f9aecfb71c4437d2b27d3323df7f93c010b7e", "type": "github" }, "original": { "owner": "nixos", - "ref": "24.05", + "ref": "23.11", "repo": "nixpkgs", "type": "github" } diff --git a/flake.nix b/flake.nix index fab3a1e..b4e08d5 100644 --- a/flake.nix +++ b/flake.nix @@ -2,7 +2,7 @@ description = "Flake study docker longevity"; inputs = { - nixpkgs.url = "github:nixos/nixpkgs/24.05"; + nixpkgs.url = "github:nixos/nixpkgs/23.11"; flake-utils.url = "github:numtide/flake-utils"; }; @@ -15,6 +15,8 @@ devShells = { default = pkgs.mkShell { packages = with pkgs; [ + snakemake + gawk (python3.withPackages (ps: with ps; [ requests pyyaml From 1d6cec18ff85c651553e2b91e50c58b0694014a6 Mon Sep 17 00:00:00 2001 From: Quentin Guilloteau Date: Thu, 11 Jul 2024 13:36:22 +0200 Subject: [PATCH 2/5] create folder to store artifacts yamls --- artifacts/example.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 artifacts/example.yaml diff --git a/artifacts/example.yaml b/artifacts/example.yaml new file mode 100644 index 0000000..c7854df --- /dev/null +++ b/artifacts/example.yaml @@ -0,0 +1,15 @@ +artefact_url: "https://example.com/artifact.zip" +type: "zip" # Possible values: zip, tgz +doi: "XX.XXXX/XXXXXXX.XXXXXXX" +dockerfiles: + - name: "image1:version" + location: "path/to/docker/folder" + package_managers: + - "dpkg" # Possible values: dpkg, rpm, pacman, pip, conda + git_packages: + - name: "pkg1" + location: "path/to/git/repo" + misc_packages: + - name: "mpkg1" + url: "https://example.com/package1.zip" + type: "zip" # Possible values: zip, tgz From 3737dd29e690caec67622fcefb3bc434381aff8b Mon Sep 17 00:00:00 2001 From: Quentin Guilloteau Date: Thu, 11 Jul 2024 13:37:07 +0200 Subject: [PATCH 3/5] add start of workflow to extract non blacklisted artifacts --- blacklist.csv | 1 + workflow/1_artifacts_to_build.smk | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 blacklist.csv create mode 100644 workflow/1_artifacts_to_build.smk diff --git a/blacklist.csv b/blacklist.csv new file mode 100644 index 0000000..442168d --- /dev/null +++ b/blacklist.csv @@ -0,0 +1 @@ +artifacts/sc24_test.yaml, IMAGE_NOT_FOUND, 0 diff --git a/workflow/1_artifacts_to_build.smk b/workflow/1_artifacts_to_build.smk new file mode 100644 index 0000000..d62505d --- /dev/null +++ b/workflow/1_artifacts_to_build.smk @@ -0,0 +1,28 @@ +import csv +import os + +ARTIFACTS_FOLDER = "artifacts" +BLACKLIST = "blacklist.csv" + +def get_blacklisted_paths(blacklist_csv_path): + blacklisted = set() + with open(blacklist_csv_path, "r") as csv_file: + spamreader = csv.reader(csv_file, delimiter=",") + for row in spamreader: + blacklisted.add(row[0]) + return blacklisted + +def get_artifacts_to_build(artifacts_folder, blacklist_csv_path): + blacklisted = get_blacklisted_paths(blacklist_csv_path) + all_artifacts = set([os.path.join(artifacts_folder, a) for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) + return list(all_artifacts.difference(blacklisted)) + +rule all: + input: + BLACKLIST, + output: + "all.csv", + params: + artifacts = get_artifacts_to_build(ARTIFACTS_FOLDER, BLACKLIST), + shell: + "echo {params.artifacts} > {output}" From 28126f8b3e3e3f8260c4360cd5878e4b332dcc29 Mon Sep 17 00:00:00 2001 From: Quentin Guilloteau Date: Thu, 11 Jul 2024 15:17:16 +0200 Subject: [PATCH 4/5] basic workflow for ecg --- blacklist.csv | 2 +- blacklists/blacklist.csv | 1 + workflow/Snakefile | 41 +++++++++++++++++++ .../{1_artifacts_to_build.smk => utils.smk} | 15 +------ 4 files changed, 44 insertions(+), 15 deletions(-) mode change 100644 => 120000 blacklist.csv create mode 100644 blacklists/blacklist.csv create mode 100644 workflow/Snakefile rename workflow/{1_artifacts_to_build.smk => utils.smk} (52%) diff --git a/blacklist.csv b/blacklist.csv deleted file mode 100644 index 442168d..0000000 --- a/blacklist.csv +++ /dev/null @@ -1 +0,0 @@ -artifacts/sc24_test.yaml, IMAGE_NOT_FOUND, 0 diff --git a/blacklist.csv b/blacklist.csv new file mode 120000 index 0000000..0e88979 --- /dev/null +++ b/blacklist.csv @@ -0,0 +1 @@ +blacklists/blacklist.csv \ No newline at end of file diff --git a/blacklists/blacklist.csv b/blacklists/blacklist.csv new file mode 100644 index 0000000..5b44974 --- /dev/null +++ b/blacklists/blacklist.csv @@ -0,0 +1 @@ +sc24_test, IMAGE_NOT_FOUND, 0 diff --git a/workflow/Snakefile b/workflow/Snakefile new file mode 100644 index 0000000..9609f19 --- /dev/null +++ b/workflow/Snakefile @@ -0,0 +1,41 @@ +include: "utils.smk" + +import datetime +DATE = datetime.datetime.now().strftime("%Y%m%d") + +ARTIFACTS_FOLDER = "artifacts" +BLACKLIST_FOLDER = "blacklists" +BLACKLIST = "blacklist.csv" +EXTENSION = "yaml" + +ARTIFACTS = get_artifacts_to_build(ARTIFACTS_FOLDER, BLACKLIST) + +rule all: + input: + expand("{folder}/{artifact}/{date}.csv",\ + folder=["logs", "pkgs", "status"],\ + artifact=ARTIFACTS,\ + date=DATE), + f"{BLACKLIST_FOLDER}/{DATE}.csv" + +rule run_ecg: + input: + "flake.nix", + "flake.lock", + ecg="ecg.py", + output: + log = "logs/{artifact}/{date}.csv", + pkg = "pkgs/{artifact}/{date}.csv", + status = "status/{artifact}/{date}.csv", + shell: + f"python3 {{input.ecg}} --log {{output.log}} --pkg {{output.pkg}} --status {{output.pkg}} {ARTIFACTS_FOLDER}/{{wildcards.artifact}}.{EXTENSION}" + +rule update_blacklist: + input: + BLACKLIST, + status=expand("status/{artifact}/{{date}}.csv",\ + artifact=ARTIFACTS) + output: + f"{BLACKLIST_FOLDER}/{{date}}.csv" + shell: + f"cat {{input}} > {{output}} && ln -s {{output}} {BLACKLIST}" diff --git a/workflow/1_artifacts_to_build.smk b/workflow/utils.smk similarity index 52% rename from workflow/1_artifacts_to_build.smk rename to workflow/utils.smk index d62505d..a136bd8 100644 --- a/workflow/1_artifacts_to_build.smk +++ b/workflow/utils.smk @@ -1,9 +1,6 @@ import csv import os -ARTIFACTS_FOLDER = "artifacts" -BLACKLIST = "blacklist.csv" - def get_blacklisted_paths(blacklist_csv_path): blacklisted = set() with open(blacklist_csv_path, "r") as csv_file: @@ -14,15 +11,5 @@ def get_blacklisted_paths(blacklist_csv_path): def get_artifacts_to_build(artifacts_folder, blacklist_csv_path): blacklisted = get_blacklisted_paths(blacklist_csv_path) - all_artifacts = set([os.path.join(artifacts_folder, a) for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) + all_artifacts = set([a.split(".")[0] for a in os.listdir(artifacts_folder) if not os.path.isdir(os.path.join(artifacts_folder, a))]) return list(all_artifacts.difference(blacklisted)) - -rule all: - input: - BLACKLIST, - output: - "all.csv", - params: - artifacts = get_artifacts_to_build(ARTIFACTS_FOLDER, BLACKLIST), - shell: - "echo {params.artifacts} > {output}" From fd7c11483f58b65e7d1fbcf0be3b527216799792 Mon Sep 17 00:00:00 2001 From: antux18 Date: Tue, 16 Jul 2024 10:29:46 +0200 Subject: [PATCH 5/5] Fixed example YAML to fit 'main'. --- example.yaml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/example.yaml b/example.yaml index c7854df..a597ca6 100644 --- a/example.yaml +++ b/example.yaml @@ -1,15 +1,14 @@ -artefact_url: "https://example.com/artifact.zip" +artifact_url: "https://example.com/artifact.zip" type: "zip" # Possible values: zip, tgz doi: "XX.XXXX/XXXXXXX.XXXXXXX" -dockerfiles: - - name: "image1:version" - location: "path/to/docker/folder" - package_managers: - - "dpkg" # Possible values: dpkg, rpm, pacman, pip, conda - git_packages: - - name: "pkg1" - location: "path/to/git/repo" - misc_packages: - - name: "mpkg1" - url: "https://example.com/package1.zip" - type: "zip" # Possible values: zip, tgz +image_name: "image1:version" +dockerfile_location: "path/to/docker/folder" +package_managers: + - "dpkg" # Possible values: dpkg, rpm, pacman, pip, conda +git_packages: + - name: "pkg1" + location: "path/to/git/repo" +misc_packages: + - name: "mpkg1" + url: "https://example.com/package1.zip" + type: "zip" # Possible values: zip, tgz