From f77d42e2154a95b6ca0e2c127df8ca134eda1789 Mon Sep 17 00:00:00 2001 From: Alexander Thomas <77535027+alethomas@users.noreply.github.com> Date: Thu, 29 Aug 2024 12:18:01 +0200 Subject: [PATCH] feat: FLiRT Mutations column (#664) * init * fmt * add to other config.yaml and template files * add def * add to env output * fmt --- .tests/config/config.yaml | 6 +++++ config/config.yaml | 14 +++++++---- resources/report-table-formatter.js | 3 +++ workflow/rules/generate_output.smk | 26 +++++++++++++-------- workflow/schemas/config.schema.yaml | 4 ++++ workflow/scripts/generate-overview-table.py | 6 ++++- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/.tests/config/config.yaml b/.tests/config/config.yaml index 25a8ea6f4..5a278a36b 100644 --- a/.tests/config/config.yaml +++ b/.tests/config/config.yaml @@ -166,6 +166,12 @@ mixtures: - "_MIX_B-1-1-7_PERC_90_MIX_B-1-351_PERC_10" # mutations to be highlighted (protein name -> variants) + +flirt: + S: + - F456L + - R346T + mth: S: - N501Y diff --git a/config/config.yaml b/config/config.yaml index 898b305ac..84b0886f1 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -21,7 +21,7 @@ data-handling: # flag for archiving data # True: data is archived in path defined below # False: data is not archived - archive-data: True + archive-data: False # path of incoming data, which is moved to the # data directory by the preprocessing script incoming: ../incoming/ @@ -34,7 +34,7 @@ data-handling: quality-criteria: illumina: # minimal length of acceptable reads - min-length-reads: 30 + min-length-reads: 100 # average quality of acceptable reads (PHRED) min-PHRED: 20 ont: @@ -61,9 +61,9 @@ preprocessing: # ARTIC primer version to clip from reads. See # https://github.com/artic-network/artic-ncov2019/tree/master/primer_schemes/nCoV-2019/V4 # for more information - artic-primer-version: 3 + artic-primer-version: 4 # path to amplicon primers in bed format for hard-clipping on paired end files (illumina) or url to file that should be downloaded - amplicon-primers: "resources/nCoV-2019.primer.bed" + amplicon-primers: "resources/SARS-CoV-2-artic-v4_1.primer.bed" # GenBank accession of reference sequence of the amplicon primers amplicon-reference: "MN908947" @@ -124,6 +124,12 @@ strain-calling: B.1.617.2: OK091006 # mutations to be highlighted (protein name -> variants) + +flirt: + S: + - F456L + - R346T + mth: S: - L18F diff --git a/resources/report-table-formatter.js b/resources/report-table-formatter.js index ab07f1589..c2d1a13dd 100644 --- a/resources/report-table-formatter.js +++ b/resources/report-table-formatter.js @@ -91,6 +91,9 @@ } }, + "FLiRT Mutations": function format(value) { + return this["variant helper"](value, true); + }, "VOC Mutations": function format(value) { return this["variant helper"](value, true); }, diff --git a/workflow/rules/generate_output.smk b/workflow/rules/generate_output.smk index 11899c101..c272e7832 100644 --- a/workflow/rules/generate_output.smk +++ b/workflow/rules/generate_output.smk @@ -153,6 +153,7 @@ rule overview_table_patient_csv: wildcards, "all samples" ), mth=config.get("mth"), + flirt=config.get("flirt"), samples=lambda wildcards: get_samples_for_date(wildcards.date), mode=config["mode"], log: @@ -178,6 +179,7 @@ use rule overview_table_patient_csv as overview_table_environment_csv with: qc_data="results/{date}/tables/environment-overview.csv", params: mth=config.get("mth"), + flirt=config.get("flirt"), samples=lambda wildcards: get_samples_for_date(wildcards.date), mode=config["mode"], log: @@ -371,6 +373,7 @@ rule snakemake_reports_patient: expand_samples_for_date( ["results/{{date}}/lineage-variant-report/{sample}.lineage-variants"] ), + # lambda wildcards: "results/{date}/lineage-variant-report/all", lambda wildcards: expand( "results/{{date}}/vcf-report/{target}.{filter}.{annotation}", target=get_samples_for_date(wildcards.date) + ["all"], @@ -380,10 +383,10 @@ rule snakemake_reports_patient: # 3. Sequencing Details "results/{date}/qc/laboratory/multiqc.html", "results/{date}/plots/coverage-reference-genome.png", - "results/{date}/plots/coverage-assembled-genome.png", - lambda wildcards: "results/{date}/plots/primer-clipping-intervals.svg" - if any_sample_is_amplicon(wildcards) - else [], + # "results/{date}/plots/coverage-assembled-genome.png", + # lambda wildcards: "results/{date}/plots/primer-clipping-intervals.svg" + # if any_sample_is_amplicon(wildcards) + # else [], # 4. Assembly "results/{date}/filter-overview", "results/{date}/pangolin-call-overview", @@ -406,8 +409,6 @@ rule snakemake_reports_patient: "results/patient-reports/{date}.zip", params: for_testing=get_if_testing("--snakefile ../workflow/Snakefile"), - conda: - "../envs/snakemake.yaml" log: "logs/snakemake_reports/{date}.log", shell: @@ -423,10 +424,10 @@ use rule snakemake_reports_patient as snakemake_reports_environment with: "results/{{date}}/{execution_mode}/overview/", execution_mode=get_checked_mode(), ), - "results/{date}/plots/all.major-strain.strains.kallisto.svg", - expand_samples_for_date( - ["results/{{date}}/plots/strain-calls/{sample}.strains.kallisto.svg"] - ), + # "results/{date}/plots/all.major-strain.strains.kallisto.svg", + # expand_samples_for_date( + # ["results/{{date}}/plots/strain-calls/{sample}.strains.kallisto.svg"] + # ), # 2. Variant Call Details expand_samples_for_date( ["results/{{date}}/lineage-variant-report/{sample}.lineage-variants"] @@ -444,3 +445,8 @@ use rule snakemake_reports_patient as snakemake_reports_environment with: "results/environment-reports/{date}.zip", log: "logs/snakemake_reports/{date}.log", + + +# rule output_sample_sheet: +# input: +# Path(pep.config_file()).parent / pep.config()["sample_table"], diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index d4d498813..2ae788b79 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -115,6 +115,9 @@ properties: description: flag for using gisaid or genbank lineage-references: type: object + flirt: + type: object + description: mutations to be highlighted (protein name -> variants) mth: type: object description: mutations to be highlighted (protein name -> variants) @@ -130,4 +133,5 @@ required: - assembly - variant-calling - strain-calling + - flirt - mth diff --git a/workflow/scripts/generate-overview-table.py b/workflow/scripts/generate-overview-table.py index 9afd487b8..aebe14c6c 100644 --- a/workflow/scripts/generate-overview-table.py +++ b/workflow/scripts/generate-overview-table.py @@ -193,6 +193,7 @@ def register_contig_lengths(assemblies, name): } for sample, file in iter_with_samples(snakemake.input.bcf): + flirt_mutations = {} mutations_of_interest = {} other_mutations = {} @@ -228,11 +229,14 @@ def fmt_variants(variants): hgvsp = f"{feature}:{alteration}" entry = (hgvsp, f"{vaf:.3f}") - if alteration in snakemake.params.mth.get(feature, {}): + if alteration in snakemake.params.flirt.get(feature, {}): + insert_entry(flirt_mutations, hgvsp, vaf) + elif alteration in snakemake.params.mth.get(feature, {}): insert_entry(mutations_of_interest, hgvsp, vaf) else: insert_entry(other_mutations, hgvsp, vaf) + data.loc[sample, "FLiRT Mutations"] = fmt_variants(flirt_mutations) data.loc[sample, "VOC Mutations"] = fmt_variants(mutations_of_interest) data.loc[sample, "Other Mutations"] = fmt_variants(other_mutations)