From 8022eb9dce48da04fe5d41a1536b0e112d43e02c Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 10:54:17 +0200 Subject: [PATCH 01/14] feat(rule): add index and dict to reference genome --- config/config.yml | 6 +++++- workflow/Snakefile | 10 +++++++++- workflow/rules/index_genome.smk | 28 ++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 workflow/rules/index_genome.smk diff --git a/config/config.yml b/config/config.yml index d01cdd2..63af5bb 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,2 +1,6 @@ data: - folder: "data_path" \ No newline at end of file + folder: "data_path" + +reference: + folder: "genome_folder" + genome: "genome.fa" \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 2de55c9..b2ce455 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -7,10 +7,15 @@ configfile: "config/config.yml" # Access the data folder from the config data_folder = config["data"]["folder"] - +reference_folder = config["reference"]["folder"] +genome_name = config["reference"]["genome"] +reference = os.path.join(reference_folder, genome_name) +reference_idx = f"{reference}.fai" +reference_dict = f"{reference}.dict" sample_files = glob.glob(os.path.join(data_folder, "*.bam")) samples = [os.path.basename(f).replace(".bam", "") for f in sample_files] + read_groups = [f"results/grouped/{sample}.bam" for sample in samples] deduped_files = [f"results/dedup/{sample}.bam" for sample in samples] @@ -18,7 +23,10 @@ deduped_files = [f"results/dedup/{sample}.bam" for sample in samples] rule all: input: deduped_files, + reference_idx, + reference_dict, include: "rules/add_or_replace_rg.smk" include: "rules/mark_duplicates.smk" +include: "rules/index_genome.smk" diff --git a/workflow/rules/index_genome.smk b/workflow/rules/index_genome.smk new file mode 100644 index 0000000..157d559 --- /dev/null +++ b/workflow/rules/index_genome.smk @@ -0,0 +1,28 @@ +# Rule to create the FASTA index using samtools +rule samtools_faidx: + input: + reference, + output: + reference_idx, + log: + f"{reference}.log", + params: + extra="", + wrapper: + "v3.12.1/bio/samtools/faidx" + + +# Rule to create the sequence dictionary using Picard +rule create_dict: + input: + reference, + output: + reference_dict, + log: + "logs/picard/create_dict.log", + params: + extra="", # Optional: extra arguments for picard. + resources: + mem_mb=1024, + wrapper: + "v3.12.1/bio/picard/createsequencedictionary" From 3427023fc1d7dbf194030fea20ac99ae9f0995e9 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 17:47:36 +0200 Subject: [PATCH 02/14] Change test to only run with --dry-run --- .github/workflows/main.yml | 4 ++-- .test/config.yml | 6 ++++++ .test/data/placeholder.bam | 0 .test/data/placeholder.fa | 0 README.md | 11 +++-------- 5 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 .test/config.yml create mode 100644 .test/data/placeholder.bam create mode 100644 .test/data/placeholder.fa diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f67f9b..2d1dac5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,9 +42,9 @@ jobs: - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: - directory: . + directory: .test snakefile: workflow/Snakefile - args: "--use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" + args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" - name: Test report uses: snakemake/snakemake-github-action@v1.24.0 diff --git a/.test/config.yml b/.test/config.yml new file mode 100644 index 0000000..f5d25af --- /dev/null +++ b/.test/config.yml @@ -0,0 +1,6 @@ +data: + folder: ".test/data" + +reference: + folder: ".test/data" + genome: "placeholder.fa" \ No newline at end of file diff --git a/.test/data/placeholder.bam b/.test/data/placeholder.bam new file mode 100644 index 0000000..e69de29 diff --git a/.test/data/placeholder.fa b/.test/data/placeholder.fa new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index aab998b..0b708fe 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ -# Snakemake workflow: `` +# Snakemake workflow: SNPs from RNA [![Snakemake](https://img.shields.io/badge/snakemake-≥6.3.0-brightgreen.svg)](https://snakemake.github.io) [![GitHub actions status](https://github.com///workflows/Tests/badge.svg?branch=main)](https://github.com///actions?query=branch%3Amain+workflow%3ATests) -A Snakemake workflow for `` +A Snakemake workflow for the call of Single Nucleotide Polymorphisms (SNPs) from RNA-seq data. +The workflow starts from aligned .bam files and outputs variants called by FreeBayes (or GATK's HaplotypeCaller) and annotated by VEP. ## Usage @@ -13,9 +14,3 @@ The usage of this workflow is described in the [Snakemake Workflow Catalog](http If you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) sitory and its DOI (see above). -# TODO - -* Replace `` and `` everywhere in the template (also under .github/workflows) with the correct `` name and owning user or organization. -* Replace `` with the workflow name (can be the same as ``). -* Replace `` with a description of what the workflow does. -* The workflow will occur in the snakemake-workflow-catalog once it has been made public. Then the link under "Usage" will point to the usage instructions if `` and `` were correctly set. \ No newline at end of file From 31856dc9b0abd36280d10128e8621ec34d736df1 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 18:22:46 +0200 Subject: [PATCH 03/14] Correct test --- .test/{ => config}/config.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .test/{ => config}/config.yml (100%) diff --git a/.test/config.yml b/.test/config/config.yml similarity index 100% rename from .test/config.yml rename to .test/config/config.yml From 80503450367e2898d8f366b94aeee4636652156b Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 22:18:33 +0200 Subject: [PATCH 04/14] Verbose testing --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2d1dac5..652cb34 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,7 +44,7 @@ jobs: with: directory: .test snakefile: workflow/Snakefile - args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" + args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp -p verbose" - name: Test report uses: snakemake/snakemake-github-action@v1.24.0 From bcd98fc4a565a1b2881bad9d0aa4c48735e0b0ca Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 22:22:02 +0200 Subject: [PATCH 05/14] Remove verbose --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 652cb34..2639181 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,8 +44,7 @@ jobs: with: directory: .test snakefile: workflow/Snakefile - args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp -p verbose" - + args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" - name: Test report uses: snakemake/snakemake-github-action@v1.24.0 with: From 5a858729dc1d4dd9cea7eafb7665c2623208075d Mon Sep 17 00:00:00 2001 From: fcaretti Date: Thu, 13 Jun 2024 22:30:05 +0200 Subject: [PATCH 06/14] Added dict and fai --- .test/data/placeholder.fa.dict | 0 .test/data/placeholder.fa.fai | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 .test/data/placeholder.fa.dict create mode 100644 .test/data/placeholder.fa.fai diff --git a/.test/data/placeholder.fa.dict b/.test/data/placeholder.fa.dict new file mode 100644 index 0000000..e69de29 diff --git a/.test/data/placeholder.fa.fai b/.test/data/placeholder.fa.fai new file mode 100644 index 0000000..e69de29 From e04e744f14c40ad889e70f3d7b35722538e28811 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 10:17:37 +0200 Subject: [PATCH 07/14] Added print statement in the test --- .github/workflows/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2639181..0d0dc7a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,6 +31,7 @@ jobs: snakefile: workflow/Snakefile args: "--lint" + Testing: runs-on: ubuntu-latest needs: @@ -39,6 +40,9 @@ jobs: steps: - uses: actions/checkout@v4 + - name: List .test directory contents + run: ls -R .test + - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: From e5449ad37b83cf7cb251f5ff650ffaa35f2464f0 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 10:27:48 +0200 Subject: [PATCH 08/14] Printing permissions --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0d0dc7a..b4ae1d6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,6 +43,9 @@ jobs: - name: List .test directory contents run: ls -R .test + - name: Check file permissions + run: ls -l .test/data + - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: From 33089bfd3179d8e1423eac2a01748acb0177293c Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 10:31:33 +0200 Subject: [PATCH 09/14] Tried to delete some test files --- .test/data/placeholder.fa.dict | 0 .test/data/placeholder.fa.fai | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .test/data/placeholder.fa.dict delete mode 100644 .test/data/placeholder.fa.fai diff --git a/.test/data/placeholder.fa.dict b/.test/data/placeholder.fa.dict deleted file mode 100644 index e69de29..0000000 diff --git a/.test/data/placeholder.fa.fai b/.test/data/placeholder.fa.fai deleted file mode 100644 index e69de29..0000000 From c334670c8656c6a7748e643d7666fa9d221ae30c Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 10:58:43 +0200 Subject: [PATCH 10/14] Another try --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b4ae1d6..1e3c0ee 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,7 +44,8 @@ jobs: run: ls -R .test - name: Check file permissions - run: ls -l .test/data + run: chmod 644 .test/data/placeholder.fa + run: chmod 644 .test/data/placeholder.bam - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 From cb1e8839584e9c77a902b9afa66cc11425d43901 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 11:01:45 +0200 Subject: [PATCH 11/14] Fixed small error --- .github/workflows/main.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1e3c0ee..02bc478 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,8 +44,9 @@ jobs: run: ls -R .test - name: Check file permissions - run: chmod 644 .test/data/placeholder.fa - run: chmod 644 .test/data/placeholder.bam + run: | + chmod 644 .test/data/placeholder.fa + chmod 644 .test/data/placeholder.bam - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 From 02f308ddd206d709425d715ac7eb6ed14ae4e23a Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 11:23:24 +0200 Subject: [PATCH 12/14] Try --- .github/workflows/main.yml | 8 ++++---- workflow/Snakefile | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 02bc478..f840e26 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,12 +51,12 @@ jobs: - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: - directory: .test - snakefile: workflow/Snakefile + directory: '.test' + snakefile: 'workflow/Snakefile' args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" - name: Test report uses: snakemake/snakemake-github-action@v1.24.0 with: - directory: . - snakefile: workflow/Snakefile + directory: '.' + snakefile: 'workflow/Snakefile' args: "--report report.zip" \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index b2ce455..9bcfb63 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -2,7 +2,7 @@ import glob import os -configfile: "config/config.yml" +configfile: ".test/config/config.yml" # Access the data folder from the config From 5016fae22192c8833f524ea688fd7fe0e34c2d4f Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 11:28:15 +0200 Subject: [PATCH 13/14] Try2 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f840e26..9f8a222 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,7 @@ jobs: - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: - directory: '.test' + directory: '.' snakefile: 'workflow/Snakefile' args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp" - name: Test report From 8cf3c10f060105b17cabbed1675edcba92070864 Mon Sep 17 00:00:00 2001 From: fcaretti Date: Fri, 14 Jun 2024 11:33:00 +0200 Subject: [PATCH 14/14] Remove unnecessary tests --- .github/workflows/main.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9f8a222..1ff5bf5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,14 +40,6 @@ jobs: steps: - uses: actions/checkout@v4 - - name: List .test directory contents - run: ls -R .test - - - name: Check file permissions - run: | - chmod 644 .test/data/placeholder.fa - chmod 644 .test/data/placeholder.bam - - name: Test workflow uses: snakemake/snakemake-github-action@v1.24.0 with: