Skip to content

Commit

Permalink
add illumina se in common.smk
Browse files Browse the repository at this point in the history
  • Loading branch information
alethomas committed May 18, 2024
1 parent 53b99c4 commit 04ef5cd
Showing 1 changed file with 66 additions and 7 deletions.
73 changes: 66 additions & 7 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ def is_illumina(wildcards, sample=None):
return get_technology(None, sample) == ILLUMINA


def is_illumina_se(wildcards, sample=None):
if sample is None:
return get_technology(wildcards) == ILLUMINA_SE
return get_technology(None, sample) == ILLUMINA_SE


def is_ion_torrent(wildcards, sample=None):
"""Returns if the sample was sequenced with the Ion Torrent platform."""
if sample is None:
Expand All @@ -150,8 +156,8 @@ def is_ion_torrent(wildcards, sample=None):
def has_pseudo_assembly(wildcards, sample=None):
"""Returns if a pseudo-assembly should be created for the sample."""
if sample is None:
return is_illumina(wildcards) or is_ion_torrent(wildcards)
return is_illumina(None, sample) or is_ion_torrent(None, sample)
return is_illumina(wildcards) or is_ion_torrent(wildcards) or is_illumina_se(wildacrds)
return is_illumina(None, sample) or is_ion_torrent(None, sample) or is_illumina_se(None, sample)


def has_consensus_assembly(wildcards, sample=None):
Expand All @@ -164,8 +170,8 @@ def has_consensus_assembly(wildcards, sample=None):
def is_single_end(wildcards, sample=None):
"""Returns if the sample was sequenced with single end technology."""
if sample is None:
return is_ont(wildcards) or is_ion_torrent(wildcards)
return is_ont(None, sample) or is_ion_torrent(None, sample)
return is_ont(wildcards) or is_ion_torrent(wildcards) or is_illumina_se(wildacrds)
return is_ont(None, sample) or is_ion_torrent(None, sample) or is_illumina_se(None, sample)


def get_fastqs(wildcards):
Expand Down Expand Up @@ -205,6 +211,8 @@ def get_fastqs(wildcards):
# default case, look up FASTQs in the sample sheet
if is_illumina(wildcards):
return pep.sample_table.loc[wildcards.sample][["fq1", "fq2"]]
elif is_illumina_se(wildcards):
return pep.sample_table.loc[wildcards.sample][["fq1"]]
elif is_ont(wildcards):
return pep.sample_table.loc[wildcards.sample][["fq1"]]
elif is_ion_torrent(wildcards):
Expand Down Expand Up @@ -413,8 +421,12 @@ def get_reads(wildcards):
"results/{date}/trimmed/fastp-pe/{sample}.{read}.fastq.gz",
read=[1, 2],
**wildcards,
)
)

illumina_se_pattern = expand(
"results/{date}/trimmed/fastp-se/{sample}.fastq.gz",
**wildcards,
)
ont_pattern = expand(
"results/{date}/corrected/{sample}/{sample}.correctedReads.clip.fasta",
**wildcards,
Expand All @@ -428,6 +440,7 @@ def get_reads(wildcards):
return get_pattern_by_technology(
wildcards,
illumina_pattern=illumina_pattern,
illumina_se_pattern=illumina_se_pattern,
ont_pattern=ont_pattern,
ion_torrent_pattern=ion_torrent_pattern,
)
Expand All @@ -449,6 +462,7 @@ def get_non_human_reads(wildcards):
"results/{{date}}/nonhuman-reads/pe/{{sample}}.{read}.fastq.gz",
read=[1, 2],
),
illumina_se_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz",
ont_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz",
ion_torrent_pattern="results/{date}/nonhuman-reads/se/{sample}.fastq.gz",
)
Expand All @@ -466,6 +480,9 @@ def get_reads_after_qc(wildcards, read="both"):
read=[1, 2],
**wildcards,
)
illumina_se_pattern = expand(
"results/{date}/nonhuman-reads/se/{sample}.fastq", **wildcards
)
ont_pattern = expand(
"results/{date}/nonhuman-reads/se/{sample}.fastq", **wildcards
)
Expand All @@ -476,6 +493,7 @@ def get_reads_after_qc(wildcards, read="both"):
pattern = get_pattern_by_technology(
wildcards,
illumina_pattern=illumina_pattern,
illumina_se_pattern=illumina_se_pattern,
ont_pattern=ont_pattern,
ion_torrent_pattern=ion_torrent_pattern,
)
Expand All @@ -487,6 +505,10 @@ def get_reads_after_qc(wildcards, read="both"):
**wildcards,
)

illumina_se_pattern = expand(
"results/{date}/nonhuman-reads/se/{sample}.fastq.gz", **wildcards
)

ont_pattern = expand(
"results/{date}/nonhuman-reads/se/{sample}.fastq.gz", **wildcards
)
Expand All @@ -498,6 +520,7 @@ def get_reads_after_qc(wildcards, read="both"):
pattern = get_pattern_by_technology(
wildcards,
illumina_pattern=illumina_pattern,
illumina_se_pattern=illumina_se_pattern,
ont_pattern=ont_pattern,
ion_torrent_pattern=ion_torrent_pattern,
)
Expand Down Expand Up @@ -532,6 +555,9 @@ def return_assembler(sample):
illumina_pattern="{assembler}-pe".format(
assembler=config["assembly"]["illumina"]["amplicon"]
),
illumina_se_pattern="{assembler}-se".format(
assembler=config["assembly"]["illumina"]["amplicon"]
),
ont_pattern="{assembler}-se".format(
assembler=config["assembly"]["oxford nanopore"]["amplicon"]
),
Expand All @@ -546,6 +572,9 @@ def return_assembler(sample):
illumina_pattern="{assembler}-pe".format(
assembler=config["assembly"]["illumina"]["shotgun"]
),
illumina_se_pattern="{assembler}-se".format(
assembler=config["assembly"]["illumina"]["shotgun"]
),
ont_pattern="{assembler}-se".format(
assembler=config["assembly"]["oxford nanopore"]["shotgun"]
),
Expand Down Expand Up @@ -834,7 +863,7 @@ def get_varlociraptor_bias_flags(wildcards):
if is_amplicon_data(wildcards.sample):
# no bias detection possible
return (
"--omit-strand-bias --omit-read-orientation-bias --omit-read-position-bias"
"--omit-strand-bias --omit-read-orientation-bias --omit-read-position-bias --omit-softclip-bias"
)
return ""

Expand All @@ -853,6 +882,7 @@ def get_depth_input(wildcards):
return get_pattern_by_technology(
wildcards,
illumina_pattern="results/{date}/read-sorted/pe~position/{sample}.hardclipped.bam",
illumina_se_pattern="results/{date}/read-sorted/se~position/{sample}.hardclipped.bam",
ont_pattern=expand(
"results/{{date}}/mapped/ref~{ref}/{{sample}}.bam",
ref=config["preprocessing"]["amplicon-reference"],
Expand Down Expand Up @@ -1028,6 +1058,7 @@ def get_assemblies_for_submission(wildcards, agg_type):
wildcards,
sample=sample,
illumina_pattern="ILLUMINA",
illumina_se_pattern="ILLUMINA_SE",
ont_pattern="OXFORD_NANOPORE",
ion_torrent_pattern="ION_TORRENT",
)
Expand Down Expand Up @@ -1099,6 +1130,7 @@ def get_input_plotting_primer_clipping(wildcards, stage, suffix=""):
wildcards,
return_only_amplicon_samples=True,
illumina_pattern=f"results/{{{{date}}}}/read-sorted/pe~position/{{sample}}.{stage}.bam{suffix}",
illumina_se_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}",
ont_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}",
ion_torrent_pattern=f"results/{{{{date}}}}/read-sorted/se~position/{{sample}}.{stage}.bam{suffix}",
)
Expand Down Expand Up @@ -1136,6 +1168,7 @@ def get_fallbacks_for_report(fallback_type):
def get_pattern_by_technology(
wildcards,
illumina_pattern=None,
illumina_se_pattern=None,
ont_pattern=None,
ion_torrent_pattern=None,
sample=None,
Expand All @@ -1144,13 +1177,17 @@ def get_pattern_by_technology(
if sample is None:
if is_illumina(wildcards):
return illumina_pattern
elif is_illumina_se(wildcards):
return illumina_se_pattern
elif is_ont(wildcards):
return ont_pattern
elif is_ion_torrent(wildcards):
return ion_torrent_pattern

if is_illumina(None, sample):
return illumina_pattern
elif is_illumina_se(None, sample):
return illumina_se_pattern
elif is_ont(None, sample):
return ont_pattern
elif is_ion_torrent(None, sample):
Expand Down Expand Up @@ -1180,6 +1217,7 @@ def format_patterns(input_patterns, sample, formated_patterns):
def get_list_of_expanded_patters_by_technology(
wildcards,
illumina_pattern=None,
illumina_se_pattern=None,
ont_pattern=None,
ion_torrent_pattern=None,
return_only_amplicon_samples=False,
Expand All @@ -1197,6 +1235,12 @@ def get_list_of_expanded_patters_by_technology(
and is_amplicon_data(sample)
):
patterns = format_patterns(illumina_pattern, sample, patterns)
elif (
illumina_se_pattern is not None
and is_illumina_se(None, sample)
and is_amplicon_data(sample)
):
patterns = format_patterns(illumina_se_pattern, sample, patterns)
elif (
ont_pattern is not None
and is_ont(None, sample)
Expand All @@ -1214,6 +1258,8 @@ def get_list_of_expanded_patters_by_technology(
for sample in samples:
if illumina_pattern is not None and is_illumina(None, sample):
patterns = format_patterns(illumina_pattern, sample, patterns)
if illumina_se_pattern is not None and is_illumina_se(None, sample):
patterns = format_patterns(illumina_pattern_se, sample, patterns)
elif ont_pattern is not None and is_ont(None, sample):
patterns = format_patterns(ont_pattern, sample, patterns)
elif ion_torrent_pattern is not None and is_ion_torrent(None, sample):
Expand All @@ -1226,6 +1272,7 @@ def get_raw_reads_counts(wildcards):
return get_list_of_expanded_patters_by_technology(
wildcards,
illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json",
illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
ont_pattern="results/{{date}}/tables/fastq-read-counts/raw~{sample}.txt",
ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
)
Expand All @@ -1236,6 +1283,7 @@ def get_trimmed_reads_counts(wildcards):
return get_list_of_expanded_patters_by_technology(
wildcards,
illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json",
illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
ont_pattern="results/{{date}}/tables/fastq-read-counts/trimmed~{sample}.txt",
ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
)
Expand All @@ -1247,6 +1295,7 @@ def get_fastp_results(wildcards):
return get_list_of_expanded_patters_by_technology(
wildcards,
illumina_pattern="results/{{date}}/trimmed/fastp-pe/{sample}.fastp.json",
illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastp.json",
)

Expand Down Expand Up @@ -1329,6 +1378,7 @@ def get_trimmed_reads(wildcards):
"results/{{{{date}}}}/trimmed/fastp-pe/{{sample}}.{read}.fastq.gz",
read=[1, 2],
),
illumina_se_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastq.gz",
ont_pattern="results/{{date}}/corrected/{sample}/{sample}.correctedReads.fasta.gz",
ion_torrent_pattern="results/{{date}}/trimmed/fastp-se/{sample}.fastq.gz",
)
Expand All @@ -1341,6 +1391,9 @@ def get_kraken_output(wildcards):
illumina_pattern="results/{date}/species-diversity/pe/{{sample}}/{{sample}}.uncleaned.kreport2".format(
**wildcards
),
illumina_se_pattern="results/{date}/species-diversity/se/{{sample}}/{{sample}}.uncleaned.kreport2".format(
**wildcards
),
ont_pattern="results/{date}/species-diversity/se/{{sample}}/{{sample}}.uncleaned.kreport2".format(
**wildcards
),
Expand All @@ -1357,6 +1410,9 @@ def get_kraken_output_after_filtering(wildcards):
illumina_pattern="results/{date}/species-diversity-nonhuman/pe/{{sample}}/{{sample}}.cleaned.kreport2".format(
**wildcards
),
illumina_se_pattern="results/{date}/species-diversity-nonhuman/se/{{sample}}/{{sample}}.cleaned.kreport2".format(
**wildcards
),
ont_pattern="results/{date}/species-diversity-nonhuman/se/{{sample}}/{{sample}}.cleaned.kreport2".format(
**wildcards
),
Expand Down Expand Up @@ -1453,6 +1509,7 @@ def get_varrange(wildcards):
return get_pattern_by_technology(
wildcards,
illumina_pattern=ILLUMINA_VARRANGE,
illumina_se_pattern=ILLUMINA_SE_VARRANGE,
ont_pattern=ONT_VARRANGE,
ion_torrent_pattern=ION_VARRANGE,
)
Expand Down Expand Up @@ -1492,6 +1549,7 @@ def get_seq_type(wildcards):
return get_list_of_expanded_patters_by_technology(
wildcards,
illumina_pattern="ILLUMINA",
illumina_se_pattern="ILLUMINA_SE",
ont_pattern="OXFORD_NANOPORE",
ion_torrent_pattern="ION_TORRENT",
)
Expand Down Expand Up @@ -1595,7 +1653,7 @@ def get_varlociraptor_preprocess_flags(wildcards):
technology = get_technology(wildcards)
if technology == "ont":
return "--pairhmm-mode homopolymer"
elif technology == "illumina" or technology == "ion":
elif technology == "illumina" or technology == "illumina_se" or technology == "ion":
return ""
else:
raise NotImplementedError(f"Technology {technology} not supported.")
Expand Down Expand Up @@ -1689,3 +1747,4 @@ wildcard_constraints:
list(map(re.escape, config["variant-calling"]["filters"])) + ["nofilter"]
),
varrange="structural|small|homopolymer-medaka|homopolymer-longshot|lineage-variants",

0 comments on commit 04ef5cd

Please sign in to comment.