Skip to content

Commit

Permalink
fix: correct query function to provide both the ip and control if req…
Browse files Browse the repository at this point in the history
…uested
  • Loading branch information
alsmith151 committed Apr 26, 2024
1 parent efa0f0c commit 65bc64c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 34 deletions.
19 changes: 16 additions & 3 deletions seqnado/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,26 +480,39 @@ def controls_performed(self) -> List[str]:
control.add(f.control_performed)
return list(control)

def query(self, sample_name: str) -> FastqSetIP:
def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, Dict[str, FastqSetIP]]:
"""
Extracts a pair of fastq files from the design.
"""
ip_names = set(f.ip_set_fullname for f in self.experiments)
control_names = set(
f.control_fullname for f in self.experiments if f.has_control
)
is_control = False

experiment_files = dict()

if sample_name in ip_names or sample_name in control_names:
for experiment in self.experiments:
if experiment.ip_set_fullname == sample_name:
return experiment.ip
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control

elif (
experiment.has_control
and experiment.control_fullname == sample_name
):
return experiment.control
is_control = True
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control
else:
raise ValueError(f"Could not find sample with name {sample_name}")


if full_experiment:
return experiment_files
else:
return experiment_files["ip"] if not is_control else experiment_files["control"]

@classmethod
def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs):
Expand Down
52 changes: 21 additions & 31 deletions seqnado/workflow/rules/peak_call_chip.smk
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,27 @@ def format_macs_options(wildcards, options):
return options


def get_control_bam(wildcards):
exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}")
if exp:
control = f"seqnado_output/aligned/{wildcards.sample}_{exp.ip_or_control_name}.bam"
else:
control = "UNDEFINED"
return control


def get_control_tag(wildcards):
exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}")
if not exp:
control = "UNDEFINED"
else:
control = f"seqnado_output/tag_dirs/{wildcards.sample}_{exp.ip_or_control_name}"
return control


def get_control_bigwig(wildcards):
exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}")
if not exp:
control = "UNDEFINED"
else:
control = f"seqnado_output/bigwigs/deeptools/unscaled/{wildcards.sample}_{exp.ip_or_control_name}.bigWig"
return control
def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allow_null=False):
exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}", full_experiment=True)

if not exp["control"] and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running
return "UNDEFINED"
elif not exp["control"] and allow_null: # if control is not defined, return empty list
return []

match file_type:
case "bam":
return f"seqnado_output/aligned/{exp['control'].name}.bam"
case "tag":
return f"seqnado_output/tag_dirs/{exp['control'].name}"
case "bigwig":
return f"seqnado_output/bigwigs/deeptools/unscaled/{exp['control'].name}.bigWig"


rule macs2_with_input:
input:
treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
control=get_control_bam,
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False),
output:
peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
params:
Expand All @@ -78,7 +68,7 @@ rule macs2_with_input:
rule macs2_no_input:
input:
treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
control=lambda wc: [] if get_control_bam(wc) == "UNDEFINED" else get_control_bam(wc),
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True),
output:
peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
params:
Expand All @@ -101,7 +91,7 @@ rule macs2_no_input:
rule homer_with_input:
input:
treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
control=get_control_tag,
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False),
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
Expand All @@ -123,7 +113,7 @@ rule homer_with_input:
rule homer_no_input:
input:
treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
control=lambda wc: [] if get_control_tag(wc) == "UNDEFINED" else get_control_tag(wc),
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True),
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
Expand All @@ -145,7 +135,7 @@ rule homer_no_input:
rule lanceotron_with_input:
input:
treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
control=get_control_bigwig,
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
Expand All @@ -170,7 +160,7 @@ rule lanceotron_with_input:
rule lanceotron_no_input:
input:
treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
control=lambda wc: [] if get_control_bigwig(wc) == "UNDEFINED" else get_control_bigwig(wc),
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
Expand Down

0 comments on commit 65bc64c

Please sign in to comment.