diff --git a/Snakefile b/Snakefile index ca18c4f..563f01e 100755 --- a/Snakefile +++ b/Snakefile @@ -64,10 +64,14 @@ files = rules.files.params def subtypes_by_subtype_wildcard(wildcards): + # TODO - this function does more than strictly subtype filtering as certain builds filter to + # GenoFLU constellation, and in the future this may be expanded. We should rename the function! # TODO XXX - move to configs (started in https://github.com/nextstrain/avian-flu/pull/104 but # We should make the entire query config-definable) if wildcards.subtype == 'h5n1-d1.1': return "genoflu in 'D1.1'" + elif wildcards.subtype == 'h5n1-cattle-outbreak': + return "genoflu in 'B3.13'" db = { 'h5nx': ['h5n1', 'h5n2', 'h5n3', 'h5n4', 'h5n5', 'h5n6', 'h5n7', 'h5n8', 'h5n9'], @@ -75,7 +79,6 @@ def subtypes_by_subtype_wildcard(wildcards): 'h7n9': ['h7n9'], 'h9n2': ['h9n2'], } - db['h5n1-cattle-outbreak'] = [*db['h5nx']] assert wildcards.subtype in db, (f"Subtype {wildcards.subtype!r} is not defined in the snakemake function " "`subtypes_by_subtype_wildcard` -- is there a typo in the subtype you are targetting?") return(f"subtype in [{', '.join([repr(s) for s in db[wildcards.subtype]])}]") diff --git a/config/h5n1-cattle-outbreak/auspice_config_h5n1-cattle-outbreak.json b/config/h5n1-cattle-outbreak/auspice_config_h5n1-cattle-outbreak.json index 09ac49e..e5b0e44 100755 --- a/config/h5n1-cattle-outbreak/auspice_config_h5n1-cattle-outbreak.json +++ b/config/h5n1-cattle-outbreak/auspice_config_h5n1-cattle-outbreak.json @@ -79,6 +79,11 @@ "title": "Subtype", "type": "categorical" }, + { + "key": "genoflu", + "title": "GenoFLU constellation", + "type": "categorical" + }, { "key": "h5_label_clade", "title": "Provisional LABEL Clade", diff --git a/config/h5n1-cattle-outbreak/dropped_strains_h5n1-cattle-outbreak.txt b/config/h5n1-cattle-outbreak/dropped_strains_h5n1-cattle-outbreak.txt index 647eb03..5da89e0 100755 --- a/config/h5n1-cattle-outbreak/dropped_strains_h5n1-cattle-outbreak.txt +++ b/config/h5n1-cattle-outbreak/dropped_strains_h5n1-cattle-outbreak.txt @@ -9,6 +9,10 @@ A/Cattle/USA/24-009027-002-v/2024 # Duplicate of A/cattle/Michigan/24-009027-002 A/PEFA/USA/24-005915-001-original/2024 # Duplicate of A/Peregrinefalcon/California/24-005915-001/2024 A/Skunk/USA/24-006483-001-original/2024 # Duplicate of A/skunk/NewMexico/24-006483-001/2024 +# Many of the following exclude strains were added prior to filtering on GenoFLU +# constellation B3.13 and thus may not be applicable any more. We can clean these up +# in the future if desired. + # Dropping these strains from include due to excess private mutations A/cattle/NorthCarolina/24-010327-002/2024 A/cattle/Texas/24-009495-007/2024 @@ -246,30 +250,3 @@ A/westerngull/California/24-004708-001/2024 A/WesternGull/USA/24-004708-001-original/2024 A/WesternSandpiper/USA/24-004707-001-original/2024 A/woodduck/NorthCarolina/W24-026/2024 - -# D1.1 spillover -A/StripedSkunk/WA/W240530074-2-1/2024 -A/CATTLE/USA/25-002645-006/2025 -A/CATTLE/USA/25-002645-005/2025 -A/CATTLE/USA/25-002645-004/2025 -A/CATTLE/USA/25-002645-003/2025 -A/chicken/AR/24-037983-003-original/2024 -A/chicken/AR/24-037983-001-original/2024 -A/chicken/AR/24-037983-002-original/2024 -A/chicken/MN/24-038159-002-original/2024 -A/Duck/MN/24-038159-001-original/2024 -A/goose/MN/24-038159-004-original/2024 -A/Turkey/MN/24-035355-002-original/2024 -A/Turkey/MN/24-035355-001-original/2024 -A/Turkey/MN/24-035521-001-original/2024 -A/Turkey/MN/24-035524-003-original/2024 -A/Turkey/MN/24-035524-004-original/2024 -A/Turkey/MN/24-035524-001-original/2024 -A/Turkey/MN/24-035524-002-original/2024 -A/Turkey/MN/24-034932-001-original/2024 -A/Turkey/MN/24-034932-003-original/2024 -A/Turkey/MN/24-034932-002-original/2024 -A/Turkey/MN/24-035355-003-original/2024 -A/Turkey/MN/24-036792-002-original/2024 -A/Turkey/MN/24-036792-003-original/2024 -A/Turkey/MN/24-036792-001-original/2024 \ No newline at end of file diff --git a/rules/cattle-flu.smk b/rules/cattle-flu.smk index f737b90..5197582 100644 --- a/rules/cattle-flu.smk +++ b/rules/cattle-flu.smk @@ -14,9 +14,6 @@ rule filter_segments_for_genome: exclude = config['dropped_strains'], output: sequences = "results/{subtype}/{segment}/{time}/filtered_{genome_seg}.fasta" - params: - min_date = "2024-01-01", - query = 'region == "North America"' wildcard_constraints: subtype = 'h5n1-cattle-outbreak|h5n1-d1.1', segment = 'genome', @@ -29,8 +26,6 @@ rule filter_segments_for_genome: --metadata {input.metadata} \ --include {input.include} \ --exclude {input.exclude} \ - --min-date {params.min_date} \ - --query {params.query:q} \ --output-log {log} \ --output-sequences {output.sequences} """