Skip to content

Commit

Permalink
fix resources and snp output
Browse files Browse the repository at this point in the history
  • Loading branch information
CChahrour committed Oct 17, 2024
1 parent 97f585f commit bbc2e36
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 98 deletions.
1 change: 1 addition & 0 deletions seqnado/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ def setup_configuration(assay, genome, template_data):
bcftools:
threads: 16
options:
filter:
"""

Expand Down
54 changes: 2 additions & 52 deletions seqnado/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,8 @@ def snp_files(self) -> List[str]:
if self.call_snps:
return expand(
"seqnado_output/variant/{method}/{sample}.vcf.gz",
"seqnado_output/variant/{method}/{sample}.anno.vcf.gz",
"seqnado_output/variant/{method}/{sample}/{sample}_summary.pdf",
sample=self.sample_names,
method=self.snp_calling_method,
)
Expand Down Expand Up @@ -1388,55 +1390,3 @@ def files(self) -> List[str]:
files.append(self.snp_files)

return files


class SNPOutput(Output):
assay: Literal["SNP"]
call_snps: bool = False
sample_names: List[str]
make_ucsc_hub: bool = False
snp_calling_method: Optional[
Union[
Literal["bcftools", "deepvariant", False],
List[Literal["bcftools", "deepvariant"]],
]
] = None

@property
def design(self):
return ["seqnado_output/design.csv"]

@property
def snp_files(self) -> List[str]:
if self.call_snps:
return expand(
"seqnado_output/variant/{method}/{sample}.vcf.gz",
sample=self.sample_names,
method=self.snp_calling_method,
)
else:
return []

@computed_field
@property
def files(self) -> List[str]:
files = []
files.extend(
QCFiles(
assay=self.assay,
fastq_screen=self.fastq_screen,
library_complexity=self.library_complexity,
).files
)

for file_list in (
self.snp_files,
self.design,
):
if file_list:
files.extend(file_list)

if self.call_snps:
files.append(self.snp_files)

return files
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ retries: 3

default-resources:
slurm_partition: "short"
runtime: "1h"
mem: "3G"
runtime: "8h"
mem: "10G"

# local-storage-prefix: $TMPDIR
# default-storage-provider: fs
Expand Down
6 changes: 3 additions & 3 deletions seqnado/workflow/rules/fastq_trim.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ rule trimgalore_paired:
output:
trimmed1=temp("seqnado_output/trimmed/{sample}_1.fastq.gz"),
trimmed2=temp("seqnado_output/trimmed/{sample}_2.fastq.gz"),
threads: 4
resources:
mem="2GB",
runtime="4h",
runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h",
mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB",
threads: config["trim_galore"]["threads"]
params:
options=check_options(config["trim_galore"]["options"]),
trim_dir="seqnado_output/trimmed",
Expand Down
85 changes: 44 additions & 41 deletions seqnado/workflow/rules/variant.smk
Original file line number Diff line number Diff line change
Expand Up @@ -18,64 +18,67 @@ if config["call_snps"]:
shell:
"bcftools mpileup --threads {threads} -Ou -f {params.fasta} {input.bam} | bcftools call --threads {threads} -mv -Oz -o {output.vcf} > {log} 2>&1"

rule index_snp:
rule bcftools_split_multiallelic:
input:
vcf=rules.bcftools_call_snp.output.vcf,
output:
vcf="seqnado_output/variant/bcftools/{sample}_filtered.vcf.gz.tbi",
shell:
"tabix -f {input.vcf} > {output.vcf}"
vcf="seqnado_output/variant/bcftools/{sample}.split.vcf.gz",
resources:
mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB",
runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h",
threads: config["bcftools"]["threads"]
shell:"""
bcftools norm --threads {threads} -Ou -m - {input.vcf} -Oz -o {output.vcf}
tabix -f {output.vcf} > {output.vcf}.tbi
"""


rule bcftools_stats:

rule bcftools_filter_snp:
input:
vcf=rules.bcftools_split_multiallelic.output.vcf,
output:
vcf="seqnado_output/variant/bcftools/{sample}.filtered.vcf.gz",
params:
options=check_options(config["bcftools"]["filter"]),
resources:
mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB",
runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h",
threads: config["bcftools"]["threads"]
shell: """
bcftools view --threads {threads} {params.options} -Oz -o {output.vcf} {input.vcf}
tabix -f {output.vcf} > {output.vcf}.tbi
"""

rule bcftools_annotate:
input:
vcf=rules.bcftools_call_snp.output.vcf,
idx=rules.index_snp.output.vcf,
output:
stats="seqnado_output/variant/bcftools/{sample}_filtered.stats.txt",
vcf="seqnado_output/variant/bcftools/{sample}.anno.vcf.gz",
params:
fasta=config["fasta"],
dbsnp=config["snp_database"],
threads: config["bcftools"]["threads"]
shell:
"bcftools stats -F {params.fasta} -s - {input.vcf} > {output.stats}"
"bcftools annotate --threads {threads} -a {params.dbsnp} -c ID -Oz -o {output.vcf} {input.vcf}"

rule bcftools_stats_plot:
rule bcftools_stats:
input:
stats=rules.bcftools_stats.output.stats,
vcf=rules.bcftools_annotate.output.vcf,
output:
summary="seqnado_output/variant/bcftools/{sample}_summary.pdf",
stats="seqnado_output/variant/bcftools/{sample}.anno.stats.txt",
params:
fasta=config["fasta"],
out_dir="seqnado_output/variant/bcftools/",
threads: config["bcftools"]["threads"]
shell:
"plot-vcfstats -p {params.out_dir} {input.stats}"
"bcftools stats -F {params.fasta} -s - {input.vcf} > {output.stats}"

rule bcftools_annotate:
rule bcftools_stats_plot:
input:
vcf=rules.bcftools_call_snp.output.vcf,
idx=rules.index_snp.output.vcf,
stats=rules.bcftools_stats.output.stats,
output:
vcf="seqnado_output/variant/bcftools/{sample}_filtered.anno.vcf.gz",
summary="seqnado_output/variant/bcftools/{sample}/{sample}_summary.pdf",
params:
dbsnp=config["snp_database"],
threads: 16
fasta=config["fasta"],
out_dir="seqnado_output/variant/bcftools/{sample}",
shell:
"bcftools annotate --threads 16 -c ID -a {params.dbsnp} {input.vcf} > {output.vcf}"

# rule bcftools_split_multiallelic:
# input:
# vcf=rules.bcftools_call_snp.output.vcf,
# output:
# vcf="seqnado_output/variant/bcftools/{sample}_splitmultiallelic.vcf.gz",
# shell:
# "bcftools norm -m -any {input.vcf} -o {output.vcf} -Oz"

# rule bcftools_filter_snp:
# input:
# vcf=rules.bcftools_split_multiallelic.output.vcf,
# output:
# vcf="seqnado_output/variant/bcftools/{sample}_filtered.vcf.gz",
# params:
# options=check_options(config["bcftools"]["options"]),
# shell:
# """bcftools view {params.options} -o {output.vcf} {input.vcf}"""


"plot-vcfstats -p {params.out_dir} {input.stats}"

0 comments on commit bbc2e36

Please sign in to comment.