Skip to content

Commit

Permalink
Merge pull request #4 from fcaretti/Add-rules
Browse files Browse the repository at this point in the history
feat(rule): add Split N CIGAR rule
  • Loading branch information
fcaretti authored Jun 18, 2024
2 parents 5b71699 + 0dec1d6 commit 6f0b91d
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 13 deletions.
16 changes: 14 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
snakefile: workflow/Snakefile
args: "--lint"



Testing:
runs-on: ubuntu-latest
Expand All @@ -40,15 +41,26 @@ jobs:
steps:
- uses: actions/checkout@v4


- name: Test workflow
uses: snakemake/[email protected]
with:
directory: '.'
directory: '.test'
snakefile: 'workflow/Snakefile'
args: "--dry-run --use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache --all-temp"
stagein: |
echo "Current working directory: $(pwd)"
echo "Listing contents of current directory:"
ls -R
echo "Listing contents of .test directory:"
ls -R .test
echo "Listing contents of .test/config directory:"
ls -R .test/config
echo "Listing contents of .test/data directory:"
ls -R .test/data
- name: Test report
uses: snakemake/[email protected]
with:
directory: '.'
directory: '.test'
snakefile: 'workflow/Snakefile'
args: "--report report.zip"
2 changes: 1 addition & 1 deletion config/config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
data:
folder: "data_path"
folder: "data_folder"

reference:
folder: "genome_folder"
Expand Down
8 changes: 4 additions & 4 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import glob
import os


configfile: ".test/config/config.yml"
configfile: "config/config.yml"


# Access the data folder from the config
Expand All @@ -18,15 +18,15 @@ samples = [os.path.basename(f).replace(".bam", "") for f in sample_files]

read_groups = [f"results/grouped/{sample}.bam" for sample in samples]
deduped_files = [f"results/dedup/{sample}.bam" for sample in samples]
splitted_files = [f"results/split/{sample}.bam" for sample in samples]


rule all:
input:
deduped_files,
reference_idx,
reference_dict,
splitted_files,


include: "rules/add_or_replace_rg.smk"
include: "rules/mark_duplicates.smk"
include: "rules/index_genome.smk"
include: "rules/split_n_cigar_reads.smk"
4 changes: 2 additions & 2 deletions workflow/rules/index_genome.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ rule samtools_faidx:
output:
reference_idx,
log:
f"{reference}.log",
f"logs/create_idx.log",
params:
extra="",
wrapper:
Expand All @@ -19,7 +19,7 @@ rule create_dict:
output:
reference_dict,
log:
"logs/picard/create_dict.log",
"logs/create_dict.log",
params:
extra="", # Optional: extra arguments for picard.
resources:
Expand Down
5 changes: 1 addition & 4 deletions workflow/rules/mark_duplicates.smk
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
rule markduplicates_bam:
input:
bams="results/grouped/{sample}.bam",
# optional to specify a list of BAMs; this has the same effect
# of marking duplicates on separate read groups for a sample
# and then merging
output:
bam="results/dedup/{sample}.bam",
bam=temp("results/dedup/{sample}.bam"),
metrics="results/dedup/{sample}.metrics.txt",
log:
"logs/dedup_bam/{sample}.log",
Expand Down
17 changes: 17 additions & 0 deletions workflow/rules/split_n_cigar_reads.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
rule splitncigarreads:
input:
bam="results/dedup/{sample}.bam",
ref=reference,
idx=reference_idx,
dict=reference_dict,
output:
temp("results/split/{sample}.bam"),
log:
"logs/splitNCIGARreads/{sample}.log",
params:
extra="", # optional
java_mem_overhead_mb=512, # Specify overhead for non-heap memory
resources:
mem_mb=4096, # Total memory available for the rule
wrapper:
"v3.12.1/bio/gatk/splitncigarreads"

0 comments on commit 6f0b91d

Please sign in to comment.