Skip to content

Commit

Permalink
Merge branch 'master' into fix-se-with-inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
CChahrour committed May 28, 2024
2 parents ebde634 + 4059dc1 commit 310a728
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 7 deletions.
80 changes: 80 additions & 0 deletions seqnado/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def setup_configuration(assay, genome, template_data):
genome_dict[genome] = {
"indices": genome_values[genome].get(
"star_indices" if assay in ["rna"] else "bt2_indices"
"star_indices" if assay in ["rna"] else "bt2_indices"
),
"chromosome_sizes": genome_values[genome].get("chromosome_sizes", ""),
"gtf": genome_values[genome].get("gtf", ""),
Expand Down Expand Up @@ -162,6 +163,26 @@ def setup_configuration(assay, genome, template_data):
template_data["pileup_method"] = "False"
template_data["scale"] = "False"
template_data["make_heatmaps"] = "False"
if assay not in ["snp"]:
template_data["make_bigwigs"] = get_user_input(
"Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
)
if template_data["make_bigwigs"]:
template_data["pileup_method"] = get_user_input(
"Pileup method:",
default="deeptools",
choices=["deeptools", "homer"],
)
template_data["scale"] = get_user_input(
"Scale bigwigs? (yes/no)", default="no", is_boolean=True
)
template_data["make_heatmaps"] = get_user_input(
"Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
)
else:
template_data["pileup_method"] = "False"
template_data["scale"] = "False"
template_data["make_heatmaps"] = "False"

# Call peaks
if assay in ["chip", "atac"]:
Expand Down Expand Up @@ -202,6 +223,38 @@ def setup_configuration(assay, genome, template_data):
else "False"
)

# SNP options
template_data["call_snps"] = (
get_user_input("Call SNPs? (yes/no)", default="no", is_boolean=True)
if assay == "snp"
else "False"
)
if assay == "snp" and template_data["call_snps"]:

template_data["snp_calling_method"] = get_user_input(
"SNP caller:",
default="bcftools",
choices=["bcftools", "deepvariant"],
)

template_data["fasta"] = get_user_input(
"Path to reference fasta:", default="path/to/reference.fasta"
)

template_data["fasta_index"] = get_user_input(
"Path to reference fasta index:", default="path/to/reference.fasta.fai"
)

template_data["snp_database"] = get_user_input(
"Path to SNP database:",
default="path/to/snp_database",
)
else:
template_data["snp_calling_method"] = "False"
template_data["fasta"] = "False"
template_data["fasta_index"] = "False"
template_data["snp_database"] = "False"

# SNP options
template_data["call_snps"] = (
get_user_input("Call SNPs? (yes/no)", default="no", is_boolean=True)
Expand Down Expand Up @@ -263,6 +316,13 @@ def setup_configuration(assay, genome, template_data):
if assay == "rna"
else TOOL_OPTIONS_SNP if assay == "snp" else ""
)
TOOL_OPTIONS
if assay in ["chip", "atac"]
else (
TOOL_OPTIONS_RNA
if assay == "rna"
else TOOL_OPTIONS_SNP if assay == "snp" else ""
)
)


Expand Down Expand Up @@ -364,6 +424,26 @@ def setup_configuration(assay, genome, template_data):
"""


TOOL_OPTIONS_SNP = """
trim_galore:
threads: 8
options: --2colour 20
bowtie2:
threads: 8
options:
picard:
threads: 8
options:
bcftools:
threads: 16
options:
"""


def create_config(assay, genome, rerun, debug=False):
env = Environment(loader=FileSystemLoader(template_dir), auto_reload=False)

Expand Down
97 changes: 96 additions & 1 deletion seqnado/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,13 @@ def is_path(path: Optional[Union[str, pathlib.Path]]) -> Optional[pathlib.Path]:

class FastqFile(BaseModel):
path: pathlib.Path
use_resolved_name: bool = False

def model_post_init(self, *args):
self.path = pathlib.Path(self.path).resolve()
if self.use_resolved_name:
self.path = pathlib.Path(self.path).resolve()
else:
self.path = pathlib.Path(self.path).absolute()

if not self.path.exists() or str(self.path) in ["-", ".", "", None]:
raise FileNotFoundError(f"{self.path} does not exist.")
Expand Down Expand Up @@ -480,6 +484,9 @@ def controls_performed(self) -> List[str]:
control.add(f.control_performed)
return list(control)

def query(
self, sample_name: str, full_experiment: bool = False
) -> Union[FastqSetIP, Dict[str, FastqSetIP]]:
def query(
self, sample_name: str, full_experiment: bool = False
) -> Union[FastqSetIP, Dict[str, FastqSetIP]]:
Expand All @@ -492,6 +499,9 @@ def query(
)
is_control = False

experiment_files = dict()
is_control = False

experiment_files = dict()

if sample_name in ip_names or sample_name in control_names:
Expand All @@ -500,13 +510,19 @@ def query(
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control

experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control

elif (
experiment.has_control
and experiment.control_fullname == sample_name
):
is_control = True
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control
is_control = True
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control
else:
raise ValueError(f"Could not find sample with name {sample_name}")

Expand All @@ -519,6 +535,15 @@ def query(
else experiment_files["control"]
)

if full_experiment:
return experiment_files
else:
return (
experiment_files["ip"]
if not is_control
else experiment_files["control"]
)

@classmethod
def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs):
"""
Expand Down Expand Up @@ -885,6 +910,13 @@ class BigWigFiles(BaseModel):
"homer",
]
],
Literal["deeptools", "homer", False],
List[
Literal[
"deeptools",
"homer",
]
],
] = None
make_bigwigs: bool = False
scale_method: Optional[Literal["cpm", "rpkm", "spikein", "csaw", "merged"]] = None
Expand Down Expand Up @@ -965,6 +997,7 @@ def files(self) -> List[str]:
class HeatmapFiles(BaseModel):
assay: Literal["ChIP", "ATAC", "RNA", "SNP"]
make_heatmaps: bool = False
make_heatmaps: bool = False

@property
def heatmap_files(self) -> List[str]:
Expand All @@ -980,6 +1013,10 @@ def files(self) -> List[str]:
return self.heatmap_files
else:
return []
if self.make_heatmaps:
return self.heatmap_files
else:
return []


class HubFiles(BaseModel):
Expand Down Expand Up @@ -1033,11 +1070,15 @@ class Output(BaseModel):
sample_names: List[str]

make_bigwigs: bool = False
pileup_method: Union[
Literal["deeptools", "homer", False],
List[Literal["deeptools", "homer"]],
pileup_method: Union[
Literal["deeptools", "homer", False],
List[Literal["deeptools", "homer"]],
] = None


scale_method: Optional[Literal["cpm", "rpkm", "spikein", "csaw"]] = None

make_heatmaps: bool = False
Expand Down Expand Up @@ -1250,8 +1291,10 @@ def peaks(self):
s
for s in self.sample_names
if not any([c in s for c in self.control_names])
if not any([c in s for c in self.control_names])
]


pcf_samples = PeakCallingFiles(
assay=self.assay,
names=ip_sample_names,
Expand Down Expand Up @@ -1350,3 +1393,55 @@ def files(self) -> List[str]:
files.append(self.snp_files)

return files


class SNPOutput(Output):
assay: Literal["SNP"]
call_snps: bool = False
sample_names: List[str]
make_ucsc_hub: bool = False
snp_calling_method: Optional[
Union[
Literal["bcftools", "deepvariant", False],
List[Literal["bcftools", "deepvariant"]],
]
] = None

@property
def design(self):
return ["seqnado_output/design.csv"]

@property
def snp_files(self) -> List[str]:
if self.call_snps:
return expand(
"seqnado_output/variant/{method}/{sample}.vcf.gz",
sample=self.sample_names,
method=self.snp_calling_method,
)
else:
return []

@computed_field
@property
def files(self) -> List[str]:
files = []
files.extend(
QCFiles(
assay=self.assay,
fastq_screen=self.fastq_screen,
library_complexity=self.library_complexity,
).files
)

for file_list in (
self.snp_files,
self.design,
):
if file_list:
files.extend(file_list)

if self.call_snps:
files.append(self.snp_files)

return files
10 changes: 10 additions & 0 deletions seqnado/workflow/rules/peak_call_chip.smk
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,16 @@ def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allo
return []
else:
return "UNDEFINED"
if allow_null:
return []
else:
return "UNDEFINED"

rule macs2_with_input:
input:
treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False),
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False),
output:
peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
params:
Expand All @@ -65,6 +70,7 @@ rule macs2_no_input:
input:
treatment="seqnado_output/aligned/{sample}_{treatment}.bam",
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True),
control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True),
output:
peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed",
params:
Expand All @@ -88,6 +94,7 @@ rule homer_with_input:
input:
treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False),
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False),
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
Expand All @@ -110,6 +117,7 @@ rule homer_no_input:
input:
treatment="seqnado_output/tag_dirs/{sample}_{treatment}",
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True),
control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True),
output:
peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed",
log:
Expand All @@ -132,6 +140,7 @@ rule lanceotron_with_input:
input:
treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False),
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
Expand All @@ -157,6 +166,7 @@ rule lanceotron_no_input:
input:
treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig",
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True),
control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True),
output:
peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed",
log:
Expand Down
Loading

0 comments on commit 310a728

Please sign in to comment.