diff --git a/seqnado/config.py b/seqnado/config.py index 03391cca..c993173c 100755 --- a/seqnado/config.py +++ b/seqnado/config.py @@ -360,6 +360,7 @@ def setup_configuration(assay, genome, template_data): bcftools: threads: 16 options: + filter: """ diff --git a/seqnado/design.py b/seqnado/design.py index 0000205e..3f46cc57 100755 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -1359,6 +1359,8 @@ def snp_files(self) -> List[str]: if self.call_snps: return expand( "seqnado_output/variant/{method}/{sample}.vcf.gz", + "seqnado_output/variant/{method}/{sample}.anno.vcf.gz", + "seqnado_output/variant/{method}/{sample}/{sample}_summary.pdf", sample=self.sample_names, method=self.snp_calling_method, ) @@ -1388,55 +1390,3 @@ def files(self) -> List[str]: files.append(self.snp_files) return files - - -class SNPOutput(Output): - assay: Literal["SNP"] - call_snps: bool = False - sample_names: List[str] - make_ucsc_hub: bool = False - snp_calling_method: Optional[ - Union[ - Literal["bcftools", "deepvariant", False], - List[Literal["bcftools", "deepvariant"]], - ] - ] = None - - @property - def design(self): - return ["seqnado_output/design.csv"] - - @property - def snp_files(self) -> List[str]: - if self.call_snps: - return expand( - "seqnado_output/variant/{method}/{sample}.vcf.gz", - sample=self.sample_names, - method=self.snp_calling_method, - ) - else: - return [] - - @computed_field - @property - def files(self) -> List[str]: - files = [] - files.extend( - QCFiles( - assay=self.assay, - fastq_screen=self.fastq_screen, - library_complexity=self.library_complexity, - ).files - ) - - for file_list in ( - self.snp_files, - self.design, - ): - if file_list: - files.extend(file_list) - - if self.call_snps: - files.append(self.snp_files) - - return files \ No newline at end of file diff --git a/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml b/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml index 01a3f504..09060ff6 100755 --- a/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml +++ b/seqnado/workflow/envs/profiles/profile_slurm_singularity/config.yaml @@ -14,8 +14,8 @@ retries: 3 default-resources: slurm_partition: "short" - runtime: "1h" - mem: "3G" + runtime: "8h" + mem: "10G" # local-storage-prefix: $TMPDIR # default-storage-provider: fs diff --git a/seqnado/workflow/rules/fastq_trim.smk b/seqnado/workflow/rules/fastq_trim.smk index c76bba98..d801490c 100755 --- a/seqnado/workflow/rules/fastq_trim.smk +++ b/seqnado/workflow/rules/fastq_trim.smk @@ -9,10 +9,10 @@ rule trimgalore_paired: output: trimmed1=temp("seqnado_output/trimmed/{sample}_1.fastq.gz"), trimmed2=temp("seqnado_output/trimmed/{sample}_2.fastq.gz"), - threads: 4 resources: - mem="2GB", - runtime="4h", + runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h", + mem=lambda wildcards, attempt: f"{4 * 2 ** (attempt - 1)}GB", + threads: config["trim_galore"]["threads"] params: options=check_options(config["trim_galore"]["options"]), trim_dir="seqnado_output/trimmed", diff --git a/seqnado/workflow/rules/variant.smk b/seqnado/workflow/rules/variant.smk index 7aadeece..4bb33793 100755 --- a/seqnado/workflow/rules/variant.smk +++ b/seqnado/workflow/rules/variant.smk @@ -18,64 +18,67 @@ if config["call_snps"]: shell: "bcftools mpileup --threads {threads} -Ou -f {params.fasta} {input.bam} | bcftools call --threads {threads} -mv -Oz -o {output.vcf} > {log} 2>&1" - rule index_snp: + rule bcftools_split_multiallelic: input: vcf=rules.bcftools_call_snp.output.vcf, output: - vcf="seqnado_output/variant/bcftools/{sample}_filtered.vcf.gz.tbi", - shell: - "tabix -f {input.vcf} > {output.vcf}" + vcf="seqnado_output/variant/bcftools/{sample}.split.vcf.gz", + resources: + mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB", + runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h", + threads: config["bcftools"]["threads"] + shell:""" + bcftools norm --threads {threads} -Ou -m - {input.vcf} -Oz -o {output.vcf} + tabix -f {output.vcf} > {output.vcf}.tbi + """ - - rule bcftools_stats: + + rule bcftools_filter_snp: + input: + vcf=rules.bcftools_split_multiallelic.output.vcf, + output: + vcf="seqnado_output/variant/bcftools/{sample}.filtered.vcf.gz", + params: + options=check_options(config["bcftools"]["filter"]), + resources: + mem=lambda wildcards, attempt: f"{10 * 2 ** (attempt -1)}GB", + runtime=lambda wildcards, attempt: f"{6 * 2 ** (attempt - 1)}h", + threads: config["bcftools"]["threads"] + shell: """ + bcftools view --threads {threads} {params.options} -Oz -o {output.vcf} {input.vcf} + tabix -f {output.vcf} > {output.vcf}.tbi + """ + + rule bcftools_annotate: input: vcf=rules.bcftools_call_snp.output.vcf, + idx=rules.index_snp.output.vcf, output: - stats="seqnado_output/variant/bcftools/{sample}_filtered.stats.txt", + vcf="seqnado_output/variant/bcftools/{sample}.anno.vcf.gz", params: - fasta=config["fasta"], + dbsnp=config["snp_database"], + threads: config["bcftools"]["threads"] shell: - "bcftools stats -F {params.fasta} -s - {input.vcf} > {output.stats}" + "bcftools annotate --threads {threads} -a {params.dbsnp} -c ID -Oz -o {output.vcf} {input.vcf}" - rule bcftools_stats_plot: + rule bcftools_stats: input: - stats=rules.bcftools_stats.output.stats, + vcf=rules.bcftools_annotate.output.vcf, output: - summary="seqnado_output/variant/bcftools/{sample}_summary.pdf", + stats="seqnado_output/variant/bcftools/{sample}.anno.stats.txt", params: fasta=config["fasta"], - out_dir="seqnado_output/variant/bcftools/", + threads: config["bcftools"]["threads"] shell: - "plot-vcfstats -p {params.out_dir} {input.stats}" + "bcftools stats -F {params.fasta} -s - {input.vcf} > {output.stats}" - rule bcftools_annotate: + rule bcftools_stats_plot: input: - vcf=rules.bcftools_call_snp.output.vcf, - idx=rules.index_snp.output.vcf, + stats=rules.bcftools_stats.output.stats, output: - vcf="seqnado_output/variant/bcftools/{sample}_filtered.anno.vcf.gz", + summary="seqnado_output/variant/bcftools/{sample}/{sample}_summary.pdf", params: - dbsnp=config["snp_database"], - threads: 16 + fasta=config["fasta"], + out_dir="seqnado_output/variant/bcftools/{sample}", shell: - "bcftools annotate --threads 16 -c ID -a {params.dbsnp} {input.vcf} > {output.vcf}" - - # rule bcftools_split_multiallelic: - # input: - # vcf=rules.bcftools_call_snp.output.vcf, - # output: - # vcf="seqnado_output/variant/bcftools/{sample}_splitmultiallelic.vcf.gz", - # shell: - # "bcftools norm -m -any {input.vcf} -o {output.vcf} -Oz" - - # rule bcftools_filter_snp: - # input: - # vcf=rules.bcftools_split_multiallelic.output.vcf, - # output: - # vcf="seqnado_output/variant/bcftools/{sample}_filtered.vcf.gz", - # params: - # options=check_options(config["bcftools"]["options"]), - # shell: - # """bcftools view {params.options} -o {output.vcf} {input.vcf}""" - - + "plot-vcfstats -p {params.out_dir} {input.stats}" \ No newline at end of file