diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index fcaeb24b..44268e0f 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -140,6 +140,12 @@ def is_illumina(wildcards, sample=None): return get_technology(None, sample) == ILLUMINA +def is_illumina_se(wildcards, sample=None): + if sample is None: + return get_technology(wildcards) == ILLUMINA_SE + return get_technology(None, sample) == ILLUMINA_SE + + def is_ion_torrent(wildcards, sample=None): """Returns if the sample was sequenced with the Ion Torrent platform.""" if sample is None: @@ -150,8 +156,8 @@ def is_ion_torrent(wildcards, sample=None): def has_pseudo_assembly(wildcards, sample=None): """Returns if a pseudo-assembly should be created for the sample.""" if sample is None: - return is_illumina(wildcards) or is_ion_torrent(wildcards) - return is_illumina(None, sample) or is_ion_torrent(None, sample) + return is_illumina(wildcards) or is_ion_torrent(wildcards) or is_illumina_se(wildacrds) + return is_illumina(None, sample) or is_ion_torrent(None, sample) or is_illumina_se(None, sample) def has_consensus_assembly(wildcards, sample=None): @@ -164,8 +170,8 @@ def has_consensus_assembly(wildcards, sample=None): def is_single_end(wildcards, sample=None): """Returns if the sample was sequenced with single end technology.""" if sample is None: - return is_ont(wildcards) or is_ion_torrent(wildcards) - return is_ont(None, sample) or is_ion_torrent(None, sample) + return is_ont(wildcards) or is_ion_torrent(wildcards) or is_illumina_se(wildacrds) + return is_ont(None, sample) or is_ion_torrent(None, sample) or is_illumina_se(None, sample) def get_fastqs(wildcards): @@ -205,6 +211,8 @@ def get_fastqs(wildcards): # default case, look up FASTQs in the sample sheet if is_illumina(wildcards): return pep.sample_table.loc[wildcards.sample][["fq1", "fq2"]] + elif is_illumina_se(wildcards): + return pep.sample_table.loc[wildcards.sample][["fq1"]] elif is_ont(wildcards): return pep.sample_table.loc[wildcards.sample][["fq1"]] elif is_ion_torrent(wildcards): @@ -413,8 +421,12 @@ def get_reads(wildcards): "results/{date}/trimmed/fastp-pe/{sample}.{read}.fastq.gz", read=[1, 2], **wildcards, - ) + ) + illumina_se_pattern = expand( + "results/{date}/trimmed/fastp-se/{sample}.fastq.gz", + **wildcards, + ) ont_pattern = expand( "results/{date}/corrected/{sample}/{sample}.correctedReads.clip.fasta", **wildcards, @@ -428,6 +440,7 @@ def get_reads(wildcards): return get_pattern_by_technology( wildcards, illumina_pattern=illumina_pattern, + illumina_se_pattern=illumina_se_pattern, ont_pattern=ont_pattern, ion_torrent_pattern=ion_torrent_pattern, ) @@ -449,6 +462,7 @@ def get_non_human_reads(wildcards): "results/{{date}}/nonhuman-reads/pe/{{sample}}.{read}.fastq.gz", read=[1, 2], ), + illumina_se_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz", ont_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz", ion_torrent_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz", ) @@ -466,6 +480,9 @@ def get_reads_after_qc(wildcards, read="both"): read=[1, 2], **wildcards, ) + illumina_se_pattern = expand( + "results/{date}/nonhuman-reads/se/{sample}.fastq", **wildcards + ) ont_pattern = expand( "results/{date}/nonhuman-reads/se/{sample}.fastq", **wildcards ) @@ -476,6 +493,7 @@ def get_reads_after_qc(wildcards, read="both"): pattern = get_pattern_by_technology( wildcards, illumina_pattern=illumina_pattern, + illumina_se_pattern=illumina_se_pattern, ont_pattern=ont_pattern, ion_torrent_pattern=ion_torrent_pattern, ) @@ -487,6 +505,10 @@ def get_reads_after_qc(wildcards, read="both"): **wildcards, ) + illumina_se_pattern = expand( + "results/{date}/nonhuman-reads/se/{sample}.fastq.gz", **wildcards + ) + ont_pattern = expand( "results/{date}/nonhuman-reads/se/{sample}.fastq.gz", **wildcards ) @@ -498,6 +520,7 @@ def get_reads_after_qc(wildcards, read="both"): pattern = get_pattern_by_technology( wildcards, illumina_pattern=illumina_pattern, + illumina_se_pattern=illumina_se_pattern, ont_pattern=ont_pattern, ion_torrent_pattern=ion_torrent_pattern, ) @@ -532,6 +555,9 @@ def return_assembler(sample): illumina_pattern="{assembler}-pe".format( assembler=config["assembly"]["illumina"]["amplicon"] ), + illumina_se_pattern="{assembler}-se".format( + assembler=config["assembly"]["illumina"]["amplicon"] + ), ont_pattern="{assembler}-se".format( assembler=config["assembly"]["oxford nanopore"]["amplicon"] ), @@ -546,6 +572,9 @@ def return_assembler(sample): illumina_pattern="{assembler}-pe".format( assembler=config["assembly"]["illumina"]["shotgun"] ), + illumina_se_pattern="{assembler}-se".format( + assembler=config["assembly"]["illumina"]["shotgun"] + ), ont_pattern="{assembler}-se".format( assembler=config["assembly"]["oxford nanopore"]["shotgun"] ), @@ -834,7 +863,7 @@ def get_varlociraptor_bias_flags(wildcards): if is_amplicon_data(wildcards.sample): # no bias detection possible return ( - "--omit-strand-bias --omit-read-orientation-bias --omit-read-position-bias" + "--omit-strand-bias --omit-read-orientation-bias --omit-read-position-bias --omit-softclip-bias" ) return "" @@ -853,6 +882,7 @@ def get_depth_input(wildcards): return get_pattern_by_technology( wildcards, illumina_pattern="results/{date}/read-sorted/pe~position/{sample}.hardclipped.bam", + illumina_se_pattern="results/{date}/read-sorted/se~position/{sample}.hardclipped.bam", ont_pattern=expand( "results/{{date}}/mapped/ref~{ref}/{{sample}}.bam", ref=config["preprocessing"]["amplicon-reference"], @@ -1028,6 +1058,7 @@ def get_assemblies_for_submission(wildcards, agg_type): wildcards, sample=sample, illumina_pattern="ILLUMINA", + illumina_se_pattern="ILLUMINA_SE", ont_pattern="OXFORD_NANOPORE", ion_torrent_pattern="ION_TORRENT", ) @@ -1099,6 +1130,7 @@ def get_input_plotting_primer_clipping(wildcards, stage, suffix=""): wildcards, return_only_amplicon_samples=True, illumina_pattern=f"results/{{{{date}}}}/read-sorted/pe~position/{{sample}}.{stage}.bam{suffix}", + illumina_se_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}", ont_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}", ion_torrent_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}", ) @@ -1136,6 +1168,7 @@ def get_fallbacks_for_report(fallback_type): def get_pattern_by_technology( wildcards, illumina_pattern=None, + illumina_se_pattern=None, ont_pattern=None, ion_torrent_pattern=None, sample=None, @@ -1144,6 +1177,8 @@ def get_pattern_by_technology( if sample is None: if is_illumina(wildcards): return illumina_pattern + elif is_illumina_se(wildcards): + return illumina_se_pattern elif is_ont(wildcards): return ont_pattern elif is_ion_torrent(wildcards): @@ -1151,6 +1186,8 @@ def get_pattern_by_technology( if is_illumina(None, sample): return illumina_pattern + elif is_illumina_se(None, sample): + return illumina_se_pattern elif is_ont(None, sample): return ont_pattern elif is_ion_torrent(None, sample): @@ -1180,6 +1217,7 @@ def format_patterns(input_patterns, sample, formated_patterns): def get_list_of_expanded_patters_by_technology( wildcards, illumina_pattern=None, + illumina_se_pattern=None, ont_pattern=None, ion_torrent_pattern=None, return_only_amplicon_samples=False, @@ -1197,6 +1235,12 @@ def get_list_of_expanded_patters_by_technology( and is_amplicon_data(sample) ): patterns = format_patterns(illumina_pattern, sample, patterns) + elif ( + illumina_se_pattern is not None + and is_illumina_se(None, sample) + and is_amplicon_data(sample) + ): + patterns = format_patterns(illumina_se_pattern, sample, patterns) elif ( ont_pattern is not None and is_ont(None, sample) @@ -1214,6 +1258,8 @@ def get_list_of_expanded_patters_by_technology( for sample in samples: if illumina_pattern is not None and is_illumina(None, sample): patterns = format_patterns(illumina_pattern, sample, patterns) + if illumina_se_pattern is not None and is_illumina_se(None, sample): + patterns = format_patterns(illumina_pattern_se, sample, patterns) elif ont_pattern is not None and is_ont(None, sample): patterns = format_patterns(ont_pattern, sample, patterns) elif ion_torrent_pattern is not None and is_ion_torrent(None, sample): @@ -1226,6 +1272,7 @@ def get_raw_reads_counts(wildcards): return get_list_of_expanded_patters_by_technology( wildcards, illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json", + illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ont_pattern="results/{{date}}/tables/fastq-read-counts/raw~{sample}.txt", ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ) @@ -1236,6 +1283,7 @@ def get_trimmed_reads_counts(wildcards): return get_list_of_expanded_patters_by_technology( wildcards, illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json", + illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ont_pattern="results/{{date}}/tables/fastq-read-counts/trimmed~{sample}.txt", ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ) @@ -1247,6 +1295,7 @@ def get_fastp_results(wildcards): return get_list_of_expanded_patters_by_technology( wildcards, illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json", + illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json", ) @@ -1329,6 +1378,7 @@ def get_trimmed_reads(wildcards): "results/{{{{date}}}}/trimmed/fastp-pe/{{sample}}.{read}.fastq.gz", read=[1, 2], ), + illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastq.gz", ont_pattern="results/{{date}}/corrected/{sample}/{sample}.correctedReads.fasta.gz", ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastq.gz", ) @@ -1341,6 +1391,9 @@ def get_kraken_output(wildcards): illumina_pattern="results/{date}/species-diversity/pe/{{sample}}/{{sample}}.uncleaned.kreport2".format( **wildcards ), + illumina_se_pattern="results/{date}/species-diversity/se/{{sample}}/{{sample}}.uncleaned.kreport2".format( + **wildcards + ), ont_pattern="results/{date}/species-diversity/se/{{sample}}/{{sample}}.uncleaned.kreport2".format( **wildcards ), @@ -1357,6 +1410,9 @@ def get_kraken_output_after_filtering(wildcards): illumina_pattern="results/{date}/species-diversity-nonhuman/pe/{{sample}}/{{sample}}.cleaned.kreport2".format( **wildcards ), + illumina_se_pattern="results/{date}/species-diversity-nonhuman/se/{{sample}}/{{sample}}.cleaned.kreport2".format( + **wildcards + ), ont_pattern="results/{date}/species-diversity-nonhuman/se/{{sample}}/{{sample}}.cleaned.kreport2".format( **wildcards ), @@ -1453,6 +1509,7 @@ def get_varrange(wildcards): return get_pattern_by_technology( wildcards, illumina_pattern=ILLUMINA_VARRANGE, + illumina_se_pattern=ILLUMINA_SE_VARRANGE, ont_pattern=ONT_VARRANGE, ion_torrent_pattern=ION_VARRANGE, ) @@ -1492,6 +1549,7 @@ def get_seq_type(wildcards): return get_list_of_expanded_patters_by_technology( wildcards, illumina_pattern="ILLUMINA", + illumina_se_pattern="ILLUMINA_SE", ont_pattern="OXFORD_NANOPORE", ion_torrent_pattern="ION_TORRENT", ) @@ -1595,7 +1653,7 @@ def get_varlociraptor_preprocess_flags(wildcards): technology = get_technology(wildcards) if technology == "ont": return "--pairhmm-mode homopolymer" - elif technology == "illumina" or technology == "ion": + elif technology == "illumina" or technology == "illumina_se" or technology == "ion": return "" else: raise NotImplementedError(f"Technology {technology} not supported.") @@ -1689,3 +1747,4 @@ wildcard_constraints: list(map(re.escape, config["variant-calling"]["filters"])) + ["nofilter"] ), varrange="structural|small|homopolymer-medaka|homopolymer-longshot|lineage-variants", +