Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter cattle-outbreak using GenoFLU B3.13 #140

Merged
merged 1 commit into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,21 @@ files = rules.files.params

def subtypes_by_subtype_wildcard(wildcards):

# TODO - this function does more than strictly subtype filtering as certain builds filter to
# GenoFLU constellation, and in the future this may be expanded. We should rename the function!
# TODO XXX - move to configs (started in https://github.com/nextstrain/avian-flu/pull/104 but
# We should make the entire query config-definable)
if wildcards.subtype == 'h5n1-d1.1':
return "genoflu in 'D1.1'"
elif wildcards.subtype == 'h5n1-cattle-outbreak':
return "genoflu in 'B3.13'"

db = {
'h5nx': ['h5n1', 'h5n2', 'h5n3', 'h5n4', 'h5n5', 'h5n6', 'h5n7', 'h5n8', 'h5n9'],
'h5n1': ['h5n1'],
'h7n9': ['h7n9'],
'h9n2': ['h9n2'],
}
db['h5n1-cattle-outbreak'] = [*db['h5nx']]
assert wildcards.subtype in db, (f"Subtype {wildcards.subtype!r} is not defined in the snakemake function "
"`subtypes_by_subtype_wildcard` -- is there a typo in the subtype you are targetting?")
return(f"subtype in [{', '.join([repr(s) for s in db[wildcards.subtype]])}]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@
"title": "Subtype",
"type": "categorical"
},
{
"key": "genoflu",
"title": "GenoFLU constellation",
"type": "categorical"
},
{
"key": "h5_label_clade",
"title": "Provisional LABEL Clade",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ A/Cattle/USA/24-009027-002-v/2024 # Duplicate of A/cattle/Michigan/24-009027-002
A/PEFA/USA/24-005915-001-original/2024 # Duplicate of A/Peregrinefalcon/California/24-005915-001/2024
A/Skunk/USA/24-006483-001-original/2024 # Duplicate of A/skunk/NewMexico/24-006483-001/2024

# Many of the following exclude strains were added prior to filtering on GenoFLU
# constellation B3.13 and thus may not be applicable any more. We can clean these up
# in the future if desired.

# Dropping these strains from include due to excess private mutations
A/cattle/NorthCarolina/24-010327-002/2024
A/cattle/Texas/24-009495-007/2024
Expand Down Expand Up @@ -246,30 +250,3 @@ A/westerngull/California/24-004708-001/2024
A/WesternGull/USA/24-004708-001-original/2024
A/WesternSandpiper/USA/24-004707-001-original/2024
A/woodduck/NorthCarolina/W24-026/2024

# D1.1 spillover
A/StripedSkunk/WA/W240530074-2-1/2024
A/CATTLE/USA/25-002645-006/2025
A/CATTLE/USA/25-002645-005/2025
A/CATTLE/USA/25-002645-004/2025
A/CATTLE/USA/25-002645-003/2025
A/chicken/AR/24-037983-003-original/2024
A/chicken/AR/24-037983-001-original/2024
A/chicken/AR/24-037983-002-original/2024
A/chicken/MN/24-038159-002-original/2024
A/Duck/MN/24-038159-001-original/2024
A/goose/MN/24-038159-004-original/2024
A/Turkey/MN/24-035355-002-original/2024
A/Turkey/MN/24-035355-001-original/2024
A/Turkey/MN/24-035521-001-original/2024
A/Turkey/MN/24-035524-003-original/2024
A/Turkey/MN/24-035524-004-original/2024
A/Turkey/MN/24-035524-001-original/2024
A/Turkey/MN/24-035524-002-original/2024
A/Turkey/MN/24-034932-001-original/2024
A/Turkey/MN/24-034932-003-original/2024
A/Turkey/MN/24-034932-002-original/2024
A/Turkey/MN/24-035355-003-original/2024
A/Turkey/MN/24-036792-002-original/2024
A/Turkey/MN/24-036792-003-original/2024
A/Turkey/MN/24-036792-001-original/2024
5 changes: 0 additions & 5 deletions rules/cattle-flu.smk
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ rule filter_segments_for_genome:
exclude = config['dropped_strains'],
output:
sequences = "results/{subtype}/{segment}/{time}/filtered_{genome_seg}.fasta"
params:
min_date = "2024-01-01",
query = 'region == "North America"'
wildcard_constraints:
subtype = 'h5n1-cattle-outbreak|h5n1-d1.1',
segment = 'genome',
Expand All @@ -29,8 +26,6 @@ rule filter_segments_for_genome:
--metadata {input.metadata} \
--include {input.include} \
--exclude {input.exclude} \
--min-date {params.min_date} \
--query {params.query:q} \
--output-log {log} \
--output-sequences {output.sequences}
"""
Expand Down