Fix add library complexity (#161)

* Add Singularity bind for presets ending with "s" * Set APPTAINER_BIND environment variable in cli_pipeline function * Update Singularity and Slurm configurations * Refactor APPTAINER_BINDPATH in cli_pipeline function * Increase runtime for trimgalore_paired rule * Refactor is_paired method in AssayNonIP class * Add rule align_single_spikein for aligning single spikein samples * add align single to ruleorder * Increase number of jobs to 100 in slurm-singularity profile * Refactor align_single_spikein rule * fix exo align mem resources * Increase runtime for deeptools_make_bigwigs rules * Add retries option to config.yaml * Add fastq_screen flag to Output class * Update QCFiles instantiation in NonRNAOutput and ChIPOutput * Add library complexity flag to QCFiles instantiation * add lib comp to config and test * fix typo in config yaml * fix align paired rule * increase memory on sort bam spikein * move lib comp to remove dupes in config * update mem resources in all rules * update mem for index --------- Co-authored-by: CChahrour <[email protected]> Co-authored-by: Catherine Chahrour <[email protected]>
alsmith151 · Mar 26, 2024 · 09663fc · 09663fc
1 parent 23d5210
commit 09663fc
Show file tree

Hide file tree

Showing 11 changed files with 25 additions and 23 deletions.
diff --git a/seqnado/config.py b/seqnado/config.py
@@ -87,10 +87,6 @@ def setup_configuration(assay, genome, template_data):
             default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf",
         )
 
-    # Library Complexity
-    template_data["library_complexity"] = get_user_input(
-        "Calculate library complexity? (yes/no)", default="no", is_boolean=True
-    )
     # Blacklist
     template_data["remove_blacklist"] = get_user_input(
         "Do you want to remove blacklist regions? (yes/no)",
@@ -110,9 +106,13 @@ def setup_configuration(assay, genome, template_data):
         template_data["remove_pcr_duplicates_method"] = get_user_input(
             "Remove PCR duplicates method:", default="picard", choices=["picard"]
         )
-
+        # Library Complexity
+        template_data["library_complexity"] = get_user_input(
+        "Calculate library complexity? (yes/no)", default="no", is_boolean=True
+    )
     else:
         template_data["remove_pcr_duplicates_method"] = "False"
+        template_data["library_complexity"] = "False"
 
     # Shift reads
     if assay == "atac":

diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja
@@ -21,6 +21,7 @@ remove_blacklist: "{{remove_blacklist}}"
 blacklist: "{{blacklist}}"
 
 remove_pcr_duplicates_method: "{{remove_pcr_duplicates_method}}"
+library_complexity: "{{library_complexity}}"
 
 shift_atac_reads: "{{shift_atac_reads}}"
 

diff --git a/seqnado/workflow/rules/alignment_counts.smk b/seqnado/workflow/rules/alignment_counts.smk
@@ -13,7 +13,7 @@ rule feature_counts:
         options=check_options(config["featurecounts"]["options"]),
     threads: config["featurecounts"]["threads"]
     resources:
-        mem=lambda wildcards, attempt: 3000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB",
         runtime="2h",
     log:
         "seqnado_output/logs/readcounts/featurecounts/featurecounts.log",

diff --git a/seqnado/workflow/rules/alignment_post_processing.smk b/seqnado/workflow/rules/alignment_post_processing.smk
@@ -7,7 +7,7 @@ rule sort_bam:
     output:
         bam=temp("seqnado_output/aligned/sorted/{sample}.bam"),
     resources:
-        mem=lambda wildcards, attempt: 4000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
     threads: 8
     log:
         "seqnado_output/logs/sorted/{sample}.log",
@@ -26,7 +26,7 @@ rule index_bam:
         bai=temp("seqnado_output/aligned/sorted/{sample}.bam.bai"),
     threads: 1
     resources:
-        mem=1000,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt - 1)}GB",
     shell:
         "samtools index -@ {threads} -b {input.bam}"
 

diff --git a/seqnado/workflow/rules/hub.smk b/seqnado/workflow/rules/hub.smk
@@ -79,7 +79,7 @@ rule bed_to_bigbed:
     params:
         chrom_sizes=config["genome"]["chromosome_sizes"],
     resources:
-        mem=500,
+        mem="1GB",
     log:
         "seqnado_output/logs/bed_to_bigbed/{directory}/{sample}.log",
     shell:

diff --git a/seqnado/workflow/rules/motif.smk b/seqnado/workflow/rules/motif.smk
@@ -8,7 +8,7 @@ rule get_fasta:
     params:
         genome=config["genome"]["fasta"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/fasta/{sample}.log",
     shell:
@@ -28,7 +28,7 @@ rule motif_meme_chip:
         meme_chip_params=config["meme"]["meme_chip_params"],
         meme_chip_db=config["meme"]["meme_chip_db"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/meme/{sample}.log",
     shell:
@@ -47,7 +47,7 @@ rule motif_homer:
         homer_params=config["homer"]["homer_params"],
         homer_bg=config["homer"]["homer_bg"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/homer/{sample}.log",
     shell:

diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk
@@ -168,7 +168,7 @@ rule lanceotron_no_input:
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
     resources:
-        mem=10_1000,
+        mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB",
         runtime="6h",
     shell:
         """

diff --git a/seqnado/workflow/rules/peak_call_other.smk b/seqnado/workflow/rules/peak_call_other.smk
@@ -67,7 +67,7 @@ rule lanceotron_no_input:
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
     resources:
-        mem=10_1000,
+        mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB",
         runtime="6h",
     shell:
         """

diff --git a/seqnado/workflow/rules/qc.smk b/seqnado/workflow/rules/qc.smk
@@ -82,7 +82,7 @@ rule samtools_stats:
         stats="seqnado_output/qc/alignment_raw/{sample}.txt",
     threads: 1
     resources:
-        mem=1000,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     shell:
         """samtools stats {input.bam} > {output.stats}"""
 
@@ -142,7 +142,7 @@ rule multiqc:
     log:
         "seqnado_output/logs/multiqc.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1"
 
@@ -170,7 +170,7 @@ rule multiqc_raw:
     log:
         "seqnado_output/logs/multiqc_raw.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_raw -n fastq_raw_qc.html --force > {log} 2>&1"
 
@@ -198,7 +198,7 @@ rule multiqc_trimmed:
     log:
         "seqnado_output/logs/multiqc_trimmed.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_trimmed -n fastq_trimmed_qc.html --force > {log} 2>&1"
 
@@ -214,7 +214,7 @@ rule multiqc_alignment_raw:
     log:
         "seqnado_output/logs/multiqc_alignment_raw.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_raw -n alignment_raw_qc.html --force > {log} 2>&1"
 
@@ -230,7 +230,7 @@ rule multiqc_alignment_filtered:
     log:
         "seqnado_output/logs/multiqc_alignment_filtered.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_filtered -n alignment_filtered_qc.html --force > {log} 2>&1"
 
@@ -246,7 +246,7 @@ rule multiqc_library_complexity:
     log:
         "seqnado_output/logs/multiqc_library_complexity.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/aligned/duplicates_removed -n library_complexity_qc.html --force > {log} 2>&1"
 

diff --git a/seqnado/workflow/rules/variant.smk b/seqnado/workflow/rules/variant.smk
@@ -10,8 +10,8 @@ if config["call_snps"]:
             fasta=config["genome"]["fasta"],
             faidx=config["genome"]["fasta_index"],
         resources:
-            mem=1024 * 10,
-            runtime="0-"4h"",
+            mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB",
+            runtime=lambda wildcards, attempt: f"{5 * 2 ** (attempt - 1)}h",
         threads: config["bcftools"]["threads"]
         log:
             "seqnado_output/logs/variant/bcftools/{sample}.log",

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
@@ -207,6 +207,7 @@ def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, bl
     defaults_atac = {
         "remove_pcr_duplicates": "yes",
         "remove_pcr_duplicates_method": "picard",
+        "library_complexity": "yes",
         "shift_atac_reads": "yes",
         "make_bigwigs": "yes",
         "pileup_method": "deeptools",