From 05915f4e0ec8f42714ced2732281e9dadcacc2b4 Mon Sep 17 00:00:00 2001 From: Alastair Smith <49727900+alsmith151@users.noreply.github.com> Date: Tue, 28 May 2024 17:15:29 +0100 Subject: [PATCH] fix: heatmaps always created and peak files absent (#183) * Refactor HeatmapFiles class to include make_heatmaps property in design.py * Refactor NonRNAOutput class in design.py to remove computed_field decorator * fix: correct query function to provide both the ip and control if requested * Refactor control lambda function in peak_call_chip.smk to allow null control files * Refactor query function in DesignIP class to return IPExperiment object --------- Co-authored-by: CChahrour <catherine.chahrour@gmail.com> --- seqnado/design.py | 32 +++++++++++------------ seqnado/workflow/rules/peak_call_chip.smk | 23 ++++++++++++++++ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/seqnado/design.py b/seqnado/design.py index 30ca101e..7f4cc03f 100755 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -484,9 +484,7 @@ def controls_performed(self) -> List[str]: control.add(f.control_performed) return list(control) - def query( - self, sample_name: str, full_experiment: bool = False - ) -> Union[FastqSetIP, Dict[str, FastqSetIP]]: + def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, IPExperiment]: """ Extracts a pair of fastq files from the design. """ @@ -496,32 +494,27 @@ def query( ) is_control = False - experiment_files = dict() - if sample_name in ip_names or sample_name in control_names: for experiment in self.experiments: if experiment.ip_set_fullname == sample_name: - experiment_files["ip"] = experiment.ip - experiment_files["control"] = experiment.control - + exp = experiment + break elif ( experiment.has_control and experiment.control_fullname == sample_name ): is_control = True - experiment_files["ip"] = experiment.ip - experiment_files["control"] = experiment.control + exp = experiment + break + else: raise ValueError(f"Could not find sample with name {sample_name}") + if full_experiment: - return experiment_files + return exp else: - return ( - experiment_files["ip"] - if not is_control - else experiment_files["control"] - ) + return exp.ip if not is_control else exp.control @classmethod def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs): @@ -970,6 +963,7 @@ def files(self) -> List[str]: class HeatmapFiles(BaseModel): assay: Literal["ChIP", "ATAC", "RNA", "SNP"] make_heatmaps: bool = False + make_heatmaps: bool = False @property def heatmap_files(self) -> List[str]: @@ -985,6 +979,10 @@ def files(self) -> List[str]: return self.heatmap_files else: return [] + if self.make_heatmaps: + return self.heatmap_files + else: + return [] class HubFiles(BaseModel): @@ -1255,8 +1253,10 @@ def peaks(self): s for s in self.sample_names if not any([c in s for c in self.control_names]) + if not any([c in s for c in self.control_names]) ] + pcf_samples = PeakCallingFiles( assay=self.assay, names=ip_sample_names, diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk index 989e86f4..640d4c7e 100755 --- a/seqnado/workflow/rules/peak_call_chip.smk +++ b/seqnado/workflow/rules/peak_call_chip.smk @@ -37,11 +37,29 @@ def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allo return [] else: return "UNDEFINED" +def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allow_null=False): + exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}", full_experiment=True) + + if not exp.has_control and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running + return "UNDEFINED" + elif not exp.has_control and allow_null: # if control is not defined, return empty list + return [] + + match file_type: + case "bam": + fn = f"seqnado_output/aligned/{exp.control_fullname}.bam" + case "tag": + fn = f"seqnado_output/tag_dirs/{exp.control_fullname}" + case "bigwig": + fn = f"seqnado_output/bigwigs/deeptools/unscaled/{exp.control_fullname}.bigWig" + return fn + rule macs2_with_input: input: treatment="seqnado_output/aligned/{sample}_{treatment}.bam", control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False), + control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False), output: peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed", params: @@ -65,6 +83,7 @@ rule macs2_no_input: input: treatment="seqnado_output/aligned/{sample}_{treatment}.bam", control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True), + control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True), output: peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed", params: @@ -88,6 +107,7 @@ rule homer_with_input: input: treatment="seqnado_output/tag_dirs/{sample}_{treatment}", control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False), + control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False), output: peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed", log: @@ -110,6 +130,7 @@ rule homer_no_input: input: treatment="seqnado_output/tag_dirs/{sample}_{treatment}", control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True), + control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True), output: peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed", log: @@ -132,6 +153,7 @@ rule lanceotron_with_input: input: treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig", control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False), + control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False), output: peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed", log: @@ -157,6 +179,7 @@ rule lanceotron_no_input: input: treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig", control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True), + control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True), output: peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed", log: