From 4cb3720fdd6b0f7b1707899e539f826561f01109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Thu, 12 Dec 2024 09:50:57 +0100 Subject: [PATCH 1/5] feat: allow custom alignment properties --- .test/config-chm-eval/config.yaml | 5 ++++ .test/config-giab/config.yaml | 5 ++++ .../config-no-candidate-filtering/config.yaml | 5 ++++ .test/config-simple/config.yaml | 5 ++++ .test/config-sra/config.yaml | 7 ++++- .test/config-target-regions/config.yaml | 5 ++++ .../config_multiple_beds.yaml | 5 ++++ .test/config_primers/config.yaml | 7 ++++- config/config.yaml | 9 ++++++ workflow/rules/calling.smk | 2 +- workflow/rules/common.smk | 30 +++++++++++++++++++ workflow/schemas/config.schema.yaml | 12 ++++++++ 12 files changed, 94 insertions(+), 3 deletions(-) diff --git a/.test/config-chm-eval/config.yaml b/.test/config-chm-eval/config.yaml index a3219ad1c..9841d043f 100644 --- a/.test/config-chm-eval/config.yaml +++ b/.test/config-chm-eval/config.yaml @@ -184,3 +184,8 @@ report: stratify: activate: false by-column: condition + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-giab/config.yaml b/.test/config-giab/config.yaml index 10e9c94d8..97522927a 100644 --- a/.test/config-giab/config.yaml +++ b/.test/config-giab/config.yaml @@ -157,3 +157,8 @@ params: gene_coverage: min_avg_coverage: 5 + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-no-candidate-filtering/config.yaml b/.test/config-no-candidate-filtering/config.yaml index 897a5a00e..f9b624af3 100644 --- a/.test/config-no-candidate-filtering/config.yaml +++ b/.test/config-no-candidate-filtering/config.yaml @@ -123,3 +123,8 @@ report: stratify: activate: false by-column: condition + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-simple/config.yaml b/.test/config-simple/config.yaml index de1d124c2..7f155372c 100644 --- a/.test/config-simple/config.yaml +++ b/.test/config-simple/config.yaml @@ -127,3 +127,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-sra/config.yaml b/.test/config-sra/config.yaml index 22751b786..1594fef8f 100644 --- a/.test/config-sra/config.yaml +++ b/.test/config-sra/config.yaml @@ -112,4 +112,9 @@ tables: genotype: true coverage: true event_prob: true - generate_excel: true \ No newline at end of file + generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-target-regions/config.yaml b/.test/config-target-regions/config.yaml index e1172464d..120b73b54 100644 --- a/.test/config-target-regions/config.yaml +++ b/.test/config-target-regions/config.yaml @@ -126,3 +126,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config-target-regions/config_multiple_beds.yaml b/.test/config-target-regions/config_multiple_beds.yaml index 660845052..af690a1a3 100644 --- a/.test/config-target-regions/config_multiple_beds.yaml +++ b/.test/config-target-regions/config_multiple_beds.yaml @@ -124,3 +124,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/.test/config_primers/config.yaml b/.test/config_primers/config.yaml index c2921ba8d..bd0747671 100644 --- a/.test/config_primers/config.yaml +++ b/.test/config_primers/config.yaml @@ -115,4 +115,9 @@ tables: genotype: true coverage: true event_prob: true - generate_excel: true \ No newline at end of file + generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 009a06ef7..1878f6e90 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -282,3 +282,12 @@ params: freebayes: min_alternate_fraction: 0.05 # Reduce for calling variants with lower VAFs extra: "" + +# If activated preprocessed alignment properties can be applied to each sample individually. +# paths to the alignment properties json-files need to be set in a tsv-file containing a property name and path. +# alignment properties will be derived from a customizable column in the sample sheet. +# if not property name is set for a sample the alignment properties will be estimated best on the samples mapping +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config_alignment_properties.tsv" \ No newline at end of file diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk index b311bf443..2866dac25 100644 --- a/workflow/rules/calling.smk +++ b/workflow/rules/calling.smk @@ -47,7 +47,7 @@ rule varlociraptor_preprocess: candidates=get_candidate_calls, bam="results/recal/{sample}.bam", bai="results/recal/{sample}.bai", - alignment_props="results/alignment-properties/{group}/{sample}.json", + alignment_props=get_alignment_props(), output: "results/observations/{group}/{sample}.{caller}.{scatteritem}.bcf", params: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 662278dc7..f80559849 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -111,6 +111,21 @@ primer_panels = ( else None ) +custom_alignment_props = ( + ( + pd.read_csv( + config["custom_alignment_properties"]["tsv"], + sep="\t", + dtype={"name": str, "path": str}, + comment="#", + ) + .set_index(["name"], drop=False) + .sort_index() + ) + if config["custom_alignment_properties"].get("tsv", "") + else None +) + def get_calling_events(calling_type): events = [ @@ -1591,3 +1606,18 @@ def get_delly_excluded_regions(): ) else: return [] + + +def get_alignment_props(): + def inner(wildcards): + if is_activated("custom_alignment_properties"): + alignment_prop_column = config["custom_alignment_properties"]["column"] + prop_name = extract_unique_sample_column_value( + wildcards.sample, alignment_prop_column + ) + print(prop_name) + if pd.notna(prop_name): + return custom_alignment_props.loc[prop_name, "path"] + return "results/alignment-properties/{group}/{sample}.json" + + return inner diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index e86231b0a..0e162f24d 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -347,6 +347,17 @@ properties: - freebayes - varlociraptor + custom_alignment_properties: + type: object + properties: + activate: + type: boolean + column: + type: string + tsv: + type: string + + required: - samples - units @@ -355,3 +366,4 @@ required: - calling - params - annotations + - custom_alignment_properties From e8459163b9f4af370cc490b0cf63b461f001a98b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Thu, 12 Dec 2024 10:10:15 +0100 Subject: [PATCH 2/5] move function --- .test/config-chm-eval/config.yaml | 2 +- .test/config-giab/config.yaml | 2 +- .test/config-no-candidate-filtering/config.yaml | 2 +- .test/config-simple/config.yaml | 2 +- .test/config-sra/config.yaml | 2 +- .test/config-target-regions/config.yaml | 2 +- .../config_multiple_beds.yaml | 2 +- .test/config_primers/config.yaml | 2 +- config/alignment_properties.tsv | 1 + config/config.yaml | 8 ++++---- workflow/rules/common.smk | 17 +++++++++-------- 11 files changed, 22 insertions(+), 20 deletions(-) create mode 100644 config/alignment_properties.tsv diff --git a/.test/config-chm-eval/config.yaml b/.test/config-chm-eval/config.yaml index 9841d043f..a8bb79ca5 100644 --- a/.test/config-chm-eval/config.yaml +++ b/.test/config-chm-eval/config.yaml @@ -188,4 +188,4 @@ report: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-giab/config.yaml b/.test/config-giab/config.yaml index 97522927a..6742e6e7b 100644 --- a/.test/config-giab/config.yaml +++ b/.test/config-giab/config.yaml @@ -161,4 +161,4 @@ gene_coverage: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-no-candidate-filtering/config.yaml b/.test/config-no-candidate-filtering/config.yaml index f9b624af3..c77b75259 100644 --- a/.test/config-no-candidate-filtering/config.yaml +++ b/.test/config-no-candidate-filtering/config.yaml @@ -127,4 +127,4 @@ report: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-simple/config.yaml b/.test/config-simple/config.yaml index 7f155372c..0e0abc552 100644 --- a/.test/config-simple/config.yaml +++ b/.test/config-simple/config.yaml @@ -131,4 +131,4 @@ tables: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-sra/config.yaml b/.test/config-sra/config.yaml index 1594fef8f..21df0691d 100644 --- a/.test/config-sra/config.yaml +++ b/.test/config-sra/config.yaml @@ -117,4 +117,4 @@ tables: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-target-regions/config.yaml b/.test/config-target-regions/config.yaml index 120b73b54..eb0290d41 100644 --- a/.test/config-target-regions/config.yaml +++ b/.test/config-target-regions/config.yaml @@ -130,4 +130,4 @@ tables: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config-target-regions/config_multiple_beds.yaml b/.test/config-target-regions/config_multiple_beds.yaml index af690a1a3..ab83cc2ba 100644 --- a/.test/config-target-regions/config_multiple_beds.yaml +++ b/.test/config-target-regions/config_multiple_beds.yaml @@ -128,4 +128,4 @@ tables: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/.test/config_primers/config.yaml b/.test/config_primers/config.yaml index bd0747671..15ad0df3d 100644 --- a/.test/config_primers/config.yaml +++ b/.test/config_primers/config.yaml @@ -120,4 +120,4 @@ tables: custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "" \ No newline at end of file diff --git a/config/alignment_properties.tsv b/config/alignment_properties.tsv new file mode 100644 index 000000000..7ef0d7401 --- /dev/null +++ b/config/alignment_properties.tsv @@ -0,0 +1 @@ +name path \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 1878f6e90..163e9bb20 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -284,10 +284,10 @@ params: extra: "" # If activated preprocessed alignment properties can be applied to each sample individually. -# paths to the alignment properties json-files need to be set in a tsv-file containing a property name and path. -# alignment properties will be derived from a customizable column in the sample sheet. -# if not property name is set for a sample the alignment properties will be estimated best on the samples mapping +# Paths to the alignment properties json-files need to be set in a tsv-file containing a property name and path. +# Alignment properties will be derived from a customizable column in the sample sheet. +# If not property name is set for a sample the alignment properties will be estimated best on the samples mapping. custom_alignment_properties: activate: false column: "panel" - tsv: "config_alignment_properties.tsv" \ No newline at end of file + tsv: "config/alignment_properties.tsv" \ No newline at end of file diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index f80559849..22b7df280 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -111,6 +111,14 @@ primer_panels = ( else None ) + +def is_activated(xpath): + c = config + for entry in xpath.split("/"): + c = c.get(entry, {}) + return bool(c.get("activate", False)) + + custom_alignment_props = ( ( pd.read_csv( @@ -122,7 +130,7 @@ custom_alignment_props = ( .set_index(["name"], drop=False) .sort_index() ) - if config["custom_alignment_properties"].get("tsv", "") + if is_activated("custom_alignment_properties") else None ) @@ -621,13 +629,6 @@ def get_all_group_observations(wildcards): ) -def is_activated(xpath): - c = config - for entry in xpath.split("/"): - c = c.get(entry, {}) - return bool(c.get("activate", False)) - - def get_star_read_group(wildcards): """Denote sample name and platform in read group.""" platform = extract_unique_sample_column_value(wildcards.sample, "platform") From 765d099f3c685fe4974a45ed53cac5851e73e674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Thu, 12 Dec 2024 10:11:22 +0100 Subject: [PATCH 3/5] remove print cmd --- workflow/rules/common.smk | 1 - 1 file changed, 1 deletion(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 22b7df280..7c018ea58 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -1616,7 +1616,6 @@ def get_alignment_props(): prop_name = extract_unique_sample_column_value( wildcards.sample, alignment_prop_column ) - print(prop_name) if pd.notna(prop_name): return custom_alignment_props.loc[prop_name, "path"] return "results/alignment-properties/{group}/{sample}.json" From df4d185ea3d53971d7bd87ce577cba6fb9a746d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Tue, 7 Jan 2025 08:10:51 +0100 Subject: [PATCH 4/5] remove inner function --- workflow/rules/calling.smk | 2 +- workflow/rules/common.smk | 21 +++++++++------------ workflow/rules/mapping.smk | 4 +--- workflow/rules/ref.smk | 9 ++++++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk index b7250e827..1f3e49800 100644 --- a/workflow/rules/calling.smk +++ b/workflow/rules/calling.smk @@ -47,7 +47,7 @@ rule varlociraptor_preprocess: candidates=get_candidate_calls, bam="results/recal/{sample}.bam", bai="results/recal/{sample}.bai", - alignment_props=get_alignment_props(), + alignment_props=get_alignment_props, output: "results/observations/{group}/{sample}.{caller}.{scatteritem}.bcf", params: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 7c018ea58..3848f0066 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -1609,15 +1609,12 @@ def get_delly_excluded_regions(): return [] -def get_alignment_props(): - def inner(wildcards): - if is_activated("custom_alignment_properties"): - alignment_prop_column = config["custom_alignment_properties"]["column"] - prop_name = extract_unique_sample_column_value( - wildcards.sample, alignment_prop_column - ) - if pd.notna(prop_name): - return custom_alignment_props.loc[prop_name, "path"] - return "results/alignment-properties/{group}/{sample}.json" - - return inner +def get_alignment_props(wildcards): + if is_activated("custom_alignment_properties"): + alignment_prop_column = config["custom_alignment_properties"]["column"] + prop_name = extract_unique_sample_column_value( + wildcards.sample, alignment_prop_column + ) + if pd.notna(prop_name): + return custom_alignment_props.loc[prop_name, "path"] + return f"results/alignment-properties/{wildcards.group}/{wildcards.sample}.json" diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index fab5df796..24608dd59 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -15,8 +15,6 @@ rule map_reads_bwa: "v3.8.0/bio/bwa/mem" -# Create distance and minimizer index before mapping -# Otherwise it will be performed on the first execution leading to race conditions for multiple samples rule map_reads_vg: input: reads=get_map_reads_input, @@ -31,7 +29,7 @@ rule map_reads_vg: extra="", sorting="fgbio", sort_order="queryname", - threads: 8 + threads: 64 wrapper: "v5.3.0/bio/vg/giraffe" diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 6d7d4725a..5299e150c 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -150,11 +150,12 @@ rule get_vep_plugins: rule get_pangenome_haplotypes: output: - temp(f"{pangenome_prefix}.vcf.gz"), + f"{pangenome_prefix}.vcf.gz", params: url=config["ref"]["pangenome"]["vcf"], log: "logs/pangenome/haplotypes.log", + cache: "omit-software" shell: "curl -o {output} {params.url} 2> {log}" @@ -163,11 +164,12 @@ rule rename_haplotype_contigs: input: f"{pangenome_prefix}.vcf.gz", output: - temp("resources/haplotype_contigs_renamed.tsv"), + "resources/haplotype_contigs_renamed.tsv", params: expressions=config["ref"]["pangenome"].get("rename_expressions", []), log: "logs/pangenome/chrom_replacement.log", + cache: "omit-software" conda: "../envs/pysam.yaml" script: @@ -179,9 +181,10 @@ rule rename_haplotype_chroms: vcf="resources/{pangenome}.vcf.gz", tsv="resources/haplotype_contigs_renamed.tsv", output: - temp("resources/{pangenome}.renamed.vcf.gz"), + "resources/{pangenome}.renamed.vcf.gz", log: "logs/pangenome/{pangenome}_renamed.log", + cache: "omit-software" conda: "../envs/bcftools.yaml" shell: From 29a15782218d376b8b50455673eb6a8de8815dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Wed, 15 Jan 2025 11:04:17 +0100 Subject: [PATCH 5/5] Update config/config.yaml --- config/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index e8719c703..3eee881b3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -286,9 +286,9 @@ params: extra: "" # If activated preprocessed alignment properties can be applied to each sample individually. -# Paths to the alignment properties json-files need to be set in a tsv-file containing a property name and path. -# Alignment properties will be derived from a customizable column in the sample sheet. -# If not property name is set for a sample the alignment properties will be estimated best on the samples mapping. +# Paths to the alignment properties json files need to be set in a tsv file containing a property name and path. +# Alignment properties names will be taken from a customizable column in the sample sheet. +# If no property name is set for a sample or custom_alignment_properties is deactivated the alignment properties will be estimated estimated on the sample's read alignments. custom_alignment_properties: activate: false column: "panel"