From 0d34988df8228de94523c7ed0f8a5d0f5304fab2 Mon Sep 17 00:00:00 2001 From: riasc Date: Wed, 26 Jun 2024 22:08:50 -0500 Subject: [PATCH] changes to speedup process --- CHANGELOG.md | 7 +++++++ workflow/rules/align.smk | 6 ++++-- workflow/rules/hlatyping.smk | 7 +++++-- workflow/scripts/genotyping/optitype_wrapper.py | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f41306b..1ee574b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prioritization of neoantigens is now done separately for each variant type (speeds up the process) - NMD information (e.g., escape rule,...) is now also calculated for all variants +## [0.2.8] - 2024-06-26 + +### Fix + +- Added threads option to samtools sort calls to speed up the process +- Fixed wrong call to optitype within the wrapper script + ## [0.2.7] - 2024-06-23 ### Fix diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index bf29421..9c030b1 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -248,11 +248,13 @@ if config['data']['dnaseq_filetype'] in ['.fq','.fastq']: "../envs/samtools.yml" params: extra="" - threads: config['threads'] + threads: 6 + resources: + mem_mb=20000 shell: """ samtools fixmate -pcmu -O bam -@ 6 {input.aln} - \ - | samtools sort -m1g -O bam -T tmp/ - -o - \ + | samtools sort -@ 4 -m1g -O bam -T tmp/ - -o - \ | samtools markdup -r -@ 6 - {output.bam} > {log} 2>&1 """ diff --git a/workflow/rules/hlatyping.smk b/workflow/rules/hlatyping.smk index f3b3270..9f4717f 100644 --- a/workflow/rules/hlatyping.smk +++ b/workflow/rules/hlatyping.smk @@ -65,12 +65,15 @@ rule get_reads_hlatyping_PE: "Retrieve paired-end reads ({wildcards.nartype}) for HLA genotyping - sample:{wildcards.sample} group:{wildcards.group}" log: "logs/{sample}/hla/get_reads_hlatyping_PE_{group}_{nartype}.log" + threads: 4 + resources: + mem_mb=20000 conda: "../envs/samtools.yml" shell: """ - samtools sort -n {input.fwd} -T tmp/ | samtools fastq > {output.fwd} - samtools sort -n {input.rev} -T tmp/ | samtools fastq > {output.rev} + samtools sort -@4 -m4g -n {input.fwd} -T tmp/ | samtools fastq -@4 > {output.fwd} + samtools sort -@4 -m4g -n {input.rev} -T tmp/ | samtools fastq -@4 > {output.rev} """ ######### single-end reads ######### diff --git a/workflow/scripts/genotyping/optitype_wrapper.py b/workflow/scripts/genotyping/optitype_wrapper.py index f50b1b9..b26cf85 100644 --- a/workflow/scripts/genotyping/optitype_wrapper.py +++ b/workflow/scripts/genotyping/optitype_wrapper.py @@ -32,7 +32,7 @@ def main(): tsv = subprocess.Popen("touch " + outpath + prefix + "_result.tsv", shell=True) else: # call optitype - optitype = subprocess.Popen("optitype --input " + inbams + + optitype = subprocess.Popen("OptiTypePipeline.py --input " + inbams + " --prefix " + prefix + " --" + nartype + "-v", shell=True)