From 09663fc75380c4e670d2c7f1b12d488c852f5396 Mon Sep 17 00:00:00 2001
From: Alastair Smith <49727900+alsmith151@users.noreply.github.com>
Date: Tue, 26 Mar 2024 17:47:41 +0000
Subject: [PATCH] Fix add library complexity (#161)

* Add Singularity bind for presets ending with "s"

* Set APPTAINER_BIND environment variable in cli_pipeline function

* Update Singularity and Slurm configurations

* Refactor APPTAINER_BINDPATH in cli_pipeline function

* Increase runtime for trimgalore_paired rule

* Refactor is_paired method in AssayNonIP class

* Add rule align_single_spikein for aligning single spikein samples

* add align single to ruleorder

* Increase number of jobs to 100 in slurm-singularity profile

* Refactor align_single_spikein rule

* fix exo align mem resources

* Increase runtime for deeptools_make_bigwigs rules

* Add retries option to config.yaml

* Add fastq_screen flag to Output class

* Update QCFiles instantiation in NonRNAOutput and ChIPOutput

* Add library complexity flag to QCFiles instantiation

* add lib comp to config and test

* fix typo in config yaml

* fix align paired rule

* increase memory on sort bam spikein

* move lib comp to remove dupes in config

* update mem resources in all rules

* update mem for index

---------

Co-authored-by: CChahrour <catherine.chahrour@gmail.com>
Co-authored-by: Catherine Chahrour <74187550+CChahrour@users.noreply.github.com>
---
 seqnado/config.py                                  | 10 +++++-----
 seqnado/workflow/config/config.yaml.jinja          |  1 +
 seqnado/workflow/rules/alignment_counts.smk        |  2 +-
 .../workflow/rules/alignment_post_processing.smk   |  4 ++--
 seqnado/workflow/rules/hub.smk                     |  2 +-
 seqnado/workflow/rules/motif.smk                   |  6 +++---
 seqnado/workflow/rules/peak_call_chip.smk          |  2 +-
 seqnado/workflow/rules/peak_call_other.smk         |  2 +-
 seqnado/workflow/rules/qc.smk                      | 14 +++++++-------
 seqnado/workflow/rules/variant.smk                 |  4 ++--
 tests/test_pipelines.py                            |  1 +
 11 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/seqnado/config.py b/seqnado/config.py
index 3fea73af..35bf07ae 100644
--- a/seqnado/config.py
+++ b/seqnado/config.py
@@ -87,10 +87,6 @@ def setup_configuration(assay, genome, template_data):
             default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf",
         )
     
-    # Library Complexity
-    template_data["library_complexity"] = get_user_input(
-        "Calculate library complexity? (yes/no)", default="no", is_boolean=True
-    )
     # Blacklist
     template_data["remove_blacklist"] = get_user_input(
         "Do you want to remove blacklist regions? (yes/no)",
@@ -110,9 +106,13 @@ def setup_configuration(assay, genome, template_data):
         template_data["remove_pcr_duplicates_method"] = get_user_input(
             "Remove PCR duplicates method:", default="picard", choices=["picard"]
         )
-
+        # Library Complexity
+        template_data["library_complexity"] = get_user_input(
+        "Calculate library complexity? (yes/no)", default="no", is_boolean=True
+    )
     else:
         template_data["remove_pcr_duplicates_method"] = "False"
+        template_data["library_complexity"] = "False"
 
     # Shift reads
     if assay == "atac":
diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja
index 493d0105..ae041425 100755
--- a/seqnado/workflow/config/config.yaml.jinja
+++ b/seqnado/workflow/config/config.yaml.jinja
@@ -21,6 +21,7 @@ remove_blacklist: "{{remove_blacklist}}"
 blacklist: "{{blacklist}}"
 
 remove_pcr_duplicates_method: "{{remove_pcr_duplicates_method}}"
+library_complexity: "{{library_complexity}}"
 
 shift_atac_reads: "{{shift_atac_reads}}"
 
diff --git a/seqnado/workflow/rules/alignment_counts.smk b/seqnado/workflow/rules/alignment_counts.smk
index 0d4a4aad..cdc9b314 100644
--- a/seqnado/workflow/rules/alignment_counts.smk
+++ b/seqnado/workflow/rules/alignment_counts.smk
@@ -13,7 +13,7 @@ rule feature_counts:
         options=check_options(config["featurecounts"]["options"]),
     threads: config["featurecounts"]["threads"]
     resources:
-        mem=lambda wildcards, attempt: 3000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB",
         runtime="2h",
     log:
         "seqnado_output/logs/readcounts/featurecounts/featurecounts.log",
diff --git a/seqnado/workflow/rules/alignment_post_processing.smk b/seqnado/workflow/rules/alignment_post_processing.smk
index a7b65b23..18a6bbdc 100644
--- a/seqnado/workflow/rules/alignment_post_processing.smk
+++ b/seqnado/workflow/rules/alignment_post_processing.smk
@@ -7,7 +7,7 @@ rule sort_bam:
     output:
         bam=temp("seqnado_output/aligned/sorted/{sample}.bam"),
     resources:
-        mem=lambda wildcards, attempt: 4000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
     threads: 8
     log:
         "seqnado_output/logs/sorted/{sample}.log",
@@ -26,7 +26,7 @@ rule index_bam:
         bai=temp("seqnado_output/aligned/sorted/{sample}.bam.bai"),
     threads: 1
     resources:
-        mem=1000,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt - 1)}GB",
     shell:
         "samtools index -@ {threads} -b {input.bam}"
 
diff --git a/seqnado/workflow/rules/hub.smk b/seqnado/workflow/rules/hub.smk
index 7948a808..7404d2c3 100644
--- a/seqnado/workflow/rules/hub.smk
+++ b/seqnado/workflow/rules/hub.smk
@@ -79,7 +79,7 @@ rule bed_to_bigbed:
     params:
         chrom_sizes=config["genome"]["chromosome_sizes"],
     resources:
-        mem=500,
+        mem="1GB",
     log:
         "seqnado_output/logs/bed_to_bigbed/{directory}/{sample}.log",
     shell:
diff --git a/seqnado/workflow/rules/motif.smk b/seqnado/workflow/rules/motif.smk
index 12777b87..1b752810 100644
--- a/seqnado/workflow/rules/motif.smk
+++ b/seqnado/workflow/rules/motif.smk
@@ -8,7 +8,7 @@ rule get_fasta:
     params:
         genome=config["genome"]["fasta"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/fasta/{sample}.log",
     shell:
@@ -28,7 +28,7 @@ rule motif_meme_chip:
         meme_chip_params=config["meme"]["meme_chip_params"],
         meme_chip_db=config["meme"]["meme_chip_db"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/meme/{sample}.log",
     shell:
@@ -47,7 +47,7 @@ rule motif_homer:
         homer_params=config["homer"]["homer_params"],
         homer_bg=config["homer"]["homer_bg"],
     resources:
-        mem=lambda wildcards, attempt: 1000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     log:
         "seqnado_output/logs/motifs/homer/{sample}.log",
     shell:
diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk
index 7b65e2b2..966dca46 100644
--- a/seqnado/workflow/rules/peak_call_chip.smk
+++ b/seqnado/workflow/rules/peak_call_chip.smk
@@ -168,7 +168,7 @@ rule lanceotron_no_input:
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
     resources:
-        mem=10_1000,
+        mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB",
         runtime="6h",
     shell:
         """
diff --git a/seqnado/workflow/rules/peak_call_other.smk b/seqnado/workflow/rules/peak_call_other.smk
index b5c05d29..00e62094 100644
--- a/seqnado/workflow/rules/peak_call_other.smk
+++ b/seqnado/workflow/rules/peak_call_other.smk
@@ -67,7 +67,7 @@ rule lanceotron_no_input:
     container:
         "library://asmith151/seqnado/seqnado_extra:latest"
     resources:
-        mem=10_1000,
+        mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt)}GB",
         runtime="6h",
     shell:
         """
diff --git a/seqnado/workflow/rules/qc.smk b/seqnado/workflow/rules/qc.smk
index ffa5140c..351d7907 100644
--- a/seqnado/workflow/rules/qc.smk
+++ b/seqnado/workflow/rules/qc.smk
@@ -82,7 +82,7 @@ rule samtools_stats:
         stats="seqnado_output/qc/alignment_raw/{sample}.txt",
     threads: 1
     resources:
-        mem=1000,
+        mem=lambda wildcards, attempt: f"{1 * 2 ** (attempt)}GB",
     shell:
         """samtools stats {input.bam} > {output.stats}"""
 
@@ -142,7 +142,7 @@ rule multiqc:
     log:
         "seqnado_output/logs/multiqc.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc -n full_qc_report.html --force > {log} 2>&1"
 
@@ -170,7 +170,7 @@ rule multiqc_raw:
     log:
         "seqnado_output/logs/multiqc_raw.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_raw -n fastq_raw_qc.html --force > {log} 2>&1"
 
@@ -198,7 +198,7 @@ rule multiqc_trimmed:
     log:
         "seqnado_output/logs/multiqc_trimmed.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/fastqc_trimmed -n fastq_trimmed_qc.html --force > {log} 2>&1"
 
@@ -214,7 +214,7 @@ rule multiqc_alignment_raw:
     log:
         "seqnado_output/logs/multiqc_alignment_raw.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_raw -n alignment_raw_qc.html --force > {log} 2>&1"
 
@@ -230,7 +230,7 @@ rule multiqc_alignment_filtered:
     log:
         "seqnado_output/logs/multiqc_alignment_filtered.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/qc/alignment_filtered -n alignment_filtered_qc.html --force > {log} 2>&1"
 
@@ -246,7 +246,7 @@ rule multiqc_library_complexity:
     log:
         "seqnado_output/logs/multiqc_library_complexity.log",
     resources:
-        mem=lambda wildcards, attempt: 2000 * 2**attempt,
+        mem=lambda wildcards, attempt: f"{2 * 2 ** (attempt)}GB",
     shell:
         "multiqc -o seqnado_output/qc seqnado_output/aligned/duplicates_removed -n library_complexity_qc.html --force > {log} 2>&1"
 
diff --git a/seqnado/workflow/rules/variant.smk b/seqnado/workflow/rules/variant.smk
index c00fd805..daf95d9b 100755
--- a/seqnado/workflow/rules/variant.smk
+++ b/seqnado/workflow/rules/variant.smk
@@ -10,8 +10,8 @@ if config["call_snps"]:
             fasta=config["genome"]["fasta"],
             faidx=config["genome"]["fasta_index"],
         resources:
-            mem=1024 * 10,
-            runtime="0-"4h"",
+            mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB",
+            runtime=lambda wildcards, attempt: f"{5 * 2 ** (attempt - 1)}h",
         threads: config["bcftools"]["threads"]
         log:
             "seqnado_output/logs/variant/bcftools/{sample}.log",
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index c585a0b2..28e0eb6e 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -207,6 +207,7 @@ def user_inputs(test_data_path, indicies, chromsizes, assay, assay_type, gtf, bl
     defaults_atac = {
         "remove_pcr_duplicates": "yes",
         "remove_pcr_duplicates_method": "picard",
+        "library_complexity": "yes",
         "shift_atac_reads": "yes",
         "make_bigwigs": "yes",
         "pileup_method": "deeptools",