From 05915f4e0ec8f42714ced2732281e9dadcacc2b4 Mon Sep 17 00:00:00 2001
From: Alastair Smith <49727900+alsmith151@users.noreply.github.com>
Date: Tue, 28 May 2024 17:15:29 +0100
Subject: [PATCH] fix: heatmaps always created and peak files absent (#183)

* Refactor HeatmapFiles class to include make_heatmaps property in design.py

* Refactor NonRNAOutput class in design.py to remove computed_field decorator

* fix: correct query function to provide both the ip and control if requested

* Refactor control lambda function in peak_call_chip.smk to allow null control files

* Refactor query function in DesignIP class to return IPExperiment object

---------

Co-authored-by: CChahrour <catherine.chahrour@gmail.com>
---
 seqnado/design.py                         | 32 +++++++++++------------
 seqnado/workflow/rules/peak_call_chip.smk | 23 ++++++++++++++++
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/seqnado/design.py b/seqnado/design.py
index 30ca101e..7f4cc03f 100755
--- a/seqnado/design.py
+++ b/seqnado/design.py
@@ -484,9 +484,7 @@ def controls_performed(self) -> List[str]:
                 control.add(f.control_performed)
         return list(control)
 
-    def query(
-        self, sample_name: str, full_experiment: bool = False
-    ) -> Union[FastqSetIP, Dict[str, FastqSetIP]]:
+    def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, IPExperiment]:
         """
         Extracts a pair of fastq files from the design.
         """
@@ -496,32 +494,27 @@ def query(
         )
         is_control = False
 
-        experiment_files = dict()
-
         if sample_name in ip_names or sample_name in control_names:
             for experiment in self.experiments:
                 if experiment.ip_set_fullname == sample_name:
-                    experiment_files["ip"] = experiment.ip
-                    experiment_files["control"] = experiment.control
-
+                    exp = experiment
+                    break
                 elif (
                     experiment.has_control
                     and experiment.control_fullname == sample_name
                 ):
                     is_control = True
-                    experiment_files["ip"] = experiment.ip
-                    experiment_files["control"] = experiment.control
+                    exp = experiment
+                    break
+
         else:
             raise ValueError(f"Could not find sample with name {sample_name}")
+        
 
         if full_experiment:
-            return experiment_files
+            return exp
         else:
-            return (
-                experiment_files["ip"]
-                if not is_control
-                else experiment_files["control"]
-            )
+            return exp.ip if not is_control else exp.control
 
     @classmethod
     def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs):
@@ -970,6 +963,7 @@ def files(self) -> List[str]:
 class HeatmapFiles(BaseModel):
     assay: Literal["ChIP", "ATAC", "RNA", "SNP"]
     make_heatmaps: bool = False
+    make_heatmaps: bool = False
 
     @property
     def heatmap_files(self) -> List[str]:
@@ -985,6 +979,10 @@ def files(self) -> List[str]:
             return self.heatmap_files
         else:
             return []
+        if self.make_heatmaps:
+            return self.heatmap_files
+        else:
+            return []
 
 
 class HubFiles(BaseModel):
@@ -1255,8 +1253,10 @@ def peaks(self):
             s
             for s in self.sample_names
             if not any([c in s for c in self.control_names])
+            if not any([c in s for c in self.control_names])
         ]
 
+
         pcf_samples = PeakCallingFiles(
             assay=self.assay,
             names=ip_sample_names,
diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk
index 989e86f4..640d4c7e 100755
--- a/seqnado/workflow/rules/peak_call_chip.smk
+++ b/seqnado/workflow/rules/peak_call_chip.smk
@@ -37,11 +37,29 @@ def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allo
             return []
         else:
             return "UNDEFINED"
+def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allow_null=False):
+    exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}", full_experiment=True)
+    
+    if not exp.has_control and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running
+        return "UNDEFINED"
+    elif not exp.has_control and allow_null: # if control is not defined, return empty list
+        return []
+    
+    match file_type:
+        case "bam":
+            fn =  f"seqnado_output/aligned/{exp.control_fullname}.bam"
+        case "tag":
+            fn =  f"seqnado_output/tag_dirs/{exp.control_fullname}"
+        case "bigwig":
+            fn =  f"seqnado_output/bigwigs/deeptools/unscaled/{exp.control_fullname}.bigWig"
+    return fn
+
 
 rule macs2_with_input:
     input:
         treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
         control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False),
+        control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False),
     output:
         peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
     params:
@@ -65,6 +83,7 @@ rule macs2_no_input:
     input:
         treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
         control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True), 
+        control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True), 
     output:
         peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
     params:
@@ -88,6 +107,7 @@ rule homer_with_input:
     input:
         treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
         control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False),
+        control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False),
     output:
         peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
     log:
@@ -110,6 +130,7 @@ rule homer_no_input:
     input:
         treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
         control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True),
+        control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True),
     output:
         peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
     log:
@@ -132,6 +153,7 @@ rule lanceotron_with_input:
     input:
         treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
         control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
+        control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
     output:
         peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
     log:
@@ -157,6 +179,7 @@ rule lanceotron_no_input:
     input:
         treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
         control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True),
+        control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True),
     output:
         peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
     log: