From 09663fc75380c4e670d2c7f1b12d488c852f5396 Mon Sep 17 00:00:00 2001 From: Alastair Smith <49727900+alsmith151@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:47:41 +0000 Subject: [PATCH] Fix add library complexity (#161) * Add Singularity bind for presets ending with "s" * Set APPTAINER_BIND environment variable in cli_pipeline function * Update Singularity and Slurm configurations * Refactor APPTAINER_BINDPATH in cli_pipeline function * Increase runtime for trimgalore_paired rule * Refactor is_paired method in AssayNonIP class * Add rule align_single_spikein for aligning single spikein samples * add align single to ruleorder * Increase number of jobs to 100 in slurm-singularity profile * Refactor align_single_spikein rule * fix exo align mem resources * Increase runtime for deeptools_make_bigwigs rules * Add retries option to config.yaml * Add fastq_screen flag to Output class * Update QCFiles instantiation in NonRNAOutput and ChIPOutput * Add library complexity flag to QCFiles instantiation * add lib comp to config and test * fix typo in config yaml * fix align paired rule * increase memory on sort bam spikein * move lib comp to remove dupes in config * update mem resources in all rules * update mem for index --------- Co-authored-by: CChahrour Co-authored-by: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com> --- seqnado/config.py | 10 +++++----- seqnado/workflow/config/config.yaml.jinja | 1 + seqnado/workflow/rules/alignment_counts.smk | 2 +- .../workflow/rules/alignment_post_processing.smk | 4 ++-- seqnado/workflow/rules/hub.smk | 2 +- seqnado/workflow/rules/motif.smk | 6 +++--- seqnado/workflow/rules/peak_call_chip.smk | 2 +- seqnado/workflow/rules/peak_call_other.smk | 2 +- seqnado/workflow/rules/qc.smk | 14 +++++++------- seqnado/workflow/rules/variant.smk | 4 ++-- tests/test_pipelines.py | 1 + 11 files changed, 25 insertions(+), 23 deletions(-) diff --git a/seqnado/config.py b/seqnado/config.py index 3fea73af..35bf07ae 100644 --- a/seqnado/config.py +++ b/seqnado/config.py @@ -87,10 +87,6 @@ def setup_configuration(assay, genome, template_data): default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf", ) - # Library Complexity - template_data["library_complexity"] = get_user_input( - "Calculate library complexity? (yes/no)", default="no", is_boolean=True - ) # Blacklist template_data["remove_blacklist"] = get_user_input( "Do you want to remove blacklist regions? (yes/no)", @@ -110,9 +106,13 @@ def setup_configuration(assay, genome, template_data): template_data["remove_pcr_duplicates_method"] = get_user_input( "Remove PCR duplicates method:", default="picard", choices=["picard"] ) - + # Library Complexity + template_data["library_complexity"] = get_user_input( + "Calculate library complexity? (yes/no)", default="no", is_boolean=True + ) else: template_data["remove_pcr_duplicates_method"] = "False" + template_data["library_complexity"] = "False" # Shift reads if assay == "atac": diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja index 493d0105..ae041425 100755 --- a/seqnado/workflow/config/config.yaml.jinja +++ b/seqnado/workflow/config/config.yaml.jinja @@ -21,6 +21,7 @@ remove_blacklist: "{{remove_blacklist}}" blacklist: "{{blacklist}}" remove_pcr_duplicates_method: "{{remove_pcr_duplicates_method}}" +library_complexity: "{{library_complexity}}" shift_atac_reads: "{{shift_atac_reads}}" diff --git a/seqnado/workflow/rules/alignment_counts.smk b/seqnado/workflow/rules/alignment_counts.smk index 0d4a4aad..cdc9b314 100644 --- a/seqnado/workflow/rules/alignment_counts.smk +++ b/seqnado/workflow/rules/alignment_counts.smk @@ -13,7 +13,7 @@ rule feature_counts: options=check_options(config["featurecounts"]["options"]), threads: config["featurecounts"]["threads"] resources: - mem=lambda wildcards, attempt: 3000 * 2**attempt, + mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB", runtime="2h", log: "seqnado_output/logs/readcounts/featurecounts/featurecounts.log", diff --git a/seqnado/workflow/rules/alignment_post_processing.smk b/seqnado/workflow/rules/alignment_post_processing.smk index a7b65b23..18a6bbdc 100644 --- a/seqnado/workflow/rules/alignment_post_processing.smk +++ b/seqnado/workflow/rules/alignment_post_processing.smk @@ -7,7 +7,7 @@ rule sort_bam: output: bam=temp("seqnado_output/aligned/sorted/{sample}.bam"), resources: - mem=lambda wildcards, attempt: 4000 * 2**attempt, + mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB", threads: 8 log: "seqnado_output/logs/sorted/{sample}.log", @@ -26,7 +26,7 @@ rule index_bam: bai=temp("seqnado_output/aligned/sorted/{sample}.bam.bai"), threads: 1 resources: - mem=1000, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt - 1)}GB", shell: "samtools index -@ {threads} -b {input.bam}" diff --git a/seqnado/workflow/rules/hub.smk b/seqnado/workflow/rules/hub.smk index 7948a808..7404d2c3 100644 --- a/seqnado/workflow/rules/hub.smk +++ b/seqnado/workflow/rules/hub.smk @@ -79,7 +79,7 @@ rule bed_to_bigbed: params: chrom_sizes=config["genome"]["chromosome_sizes"], resources: - mem=500, + mem="1GB", log: "seqnado_output/logs/bed_to_bigbed/{directory}/{sample}.log", shell: diff --git a/seqnado/workflow/rules/motif.smk b/seqnado/workflow/rules/motif.smk index 12777b87..1b752810 100644 --- a/seqnado/workflow/rules/motif.smk +++ b/seqnado/workflow/rules/motif.smk @@ -8,7 +8,7 @@ rule get_fasta: params: genome=config["genome"]["fasta"], resources: - mem=lambda wildcards, attempt: 1000 * 2**attempt, + mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB", log: "seqnado_output/logs/motifs/fasta/{sample}.log", shell: @@ -28,7 +28,7 @@ rule motif_meme_chip: meme_chip_params=config["meme"]["meme_chip_params"], meme_chip_db=config["meme"]["meme_chip_db"], resources: - mem=lambda wildcards, attempt: 1000 * 2**attempt, + mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB", log: "seqnado_output/logs/motifs/meme/{sample}.log", shell: @@ -47,7 +47,7 @@ rule motif_homer: homer_params=config["homer"]["homer_params"], homer_bg=config["homer"]["homer_bg"], resources: - mem=lambda wildcards, attempt: 1000 * 2**attempt, + mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB", log: "seqnado_output/logs/motifs/homer/{sample}.log", shell: diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk index 7b65e2b2..966dca46 100644 --- a/seqnado/workflow/rules/peak_call_chip.smk +++ b/seqnado/workflow/rules/peak_call_chip.smk @@ -168,7 +168,7 @@ rule lanceotron_no_input: container: "library://asmith151/seqnado/seqnado_extra:latest" resources: - mem=10_1000, + mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB", runtime="6h", shell: """ diff --git a/seqnado/workflow/rules/peak_call_other.smk b/seqnado/workflow/rules/peak_call_other.smk index b5c05d29..00e62094 100644 --- a/seqnado/workflow/rules/peak_call_other.smk +++ b/seqnado/workflow/rules/peak_call_other.smk @@ -67,7 +67,7 @@ rule lanceotron_no_input: container: "library://asmith151/seqnado/seqnado_extra:latest" resources: - mem=10_1000, + mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB", runtime="6h", shell: """ diff --git a/seqnado/workflow/rules/qc.smk b/seqnado/workflow/rules/qc.smk index ffa5140c..351d7907 100644 --- a/seqnado/workflow/rules/qc.smk +++ b/seqnado/workflow/rules/qc.smk @@ -82,7 +82,7 @@ rule samtools_stats: stats="seqnado_output/qc/alignment_raw/{sample}.txt", threads: 1 resources: - mem=1000, + mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB", shell: """samtools stats {input.bam} > {output.stats}""" @@ -142,7 +142,7 @@ rule multiqc: log: "seqnado_output/logs/multiqc.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1" @@ -170,7 +170,7 @@ rule multiqc_raw: log: "seqnado_output/logs/multiqc_raw.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_raw -n fastq_raw_qc.html --force > {log} 2>&1" @@ -198,7 +198,7 @@ rule multiqc_trimmed: log: "seqnado_output/logs/multiqc_trimmed.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_trimmed -n fastq_trimmed_qc.html --force > {log} 2>&1" @@ -214,7 +214,7 @@ rule multiqc_alignment_raw: log: "seqnado_output/logs/multiqc_alignment_raw.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_raw -n alignment_raw_qc.html --force > {log} 2>&1" @@ -230,7 +230,7 @@ rule multiqc_alignment_filtered: log: "seqnado_output/logs/multiqc_alignment_filtered.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_filtered -n alignment_filtered_qc.html --force > {log} 2>&1" @@ -246,7 +246,7 @@ rule multiqc_library_complexity: log: "seqnado_output/logs/multiqc_library_complexity.log", resources: - mem=lambda wildcards, attempt: 2000 * 2**attempt, + mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB", shell: "multiqc -o seqnado_output/qc seqnado_output/aligned/duplicates_removed -n library_complexity_qc.html --force > {log} 2>&1" diff --git a/seqnado/workflow/rules/variant.smk b/seqnado/workflow/rules/variant.smk index c00fd805..daf95d9b 100755 --- a/seqnado/workflow/rules/variant.smk +++ b/seqnado/workflow/rules/variant.smk @@ -10,8 +10,8 @@ if config["call_snps"]: fasta=config["genome"]["fasta"], faidx=config["genome"]["fasta_index"], resources: - mem=1024 * 10, - runtime="0-"4h"", + mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB", + runtime=lambda wildcards, attempt: f"{5 * 2 ** (attempt - 1)}h", threads: config["bcftools"]["threads"] log: "seqnado_output/logs/variant/bcftools/{sample}.log", diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index c585a0b2..28e0eb6e 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -207,6 +207,7 @@ def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, bl defaults_atac = { "remove_pcr_duplicates": "yes", "remove_pcr_duplicates_method": "picard", + "library_complexity": "yes", "shift_atac_reads": "yes", "make_bigwigs": "yes", "pileup_method": "deeptools",