-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The previous csvtk approach would drop "Not assigned" records because they didn't have results for all 8 segments. We now report the genome result as "Not assigned (too divergent)" and report the segment results where available.
- Loading branch information
1 parent
956dd12
commit 682df60
Showing
2 changed files
with
32 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
""" | ||
Takes a (modified) GenoFLU results TSV on STDIN and writes a TSV to STDOUT | ||
The input TSV is expected to have three fields: | ||
* strain | ||
* genoflu | ||
* details | ||
The output TSV exports 10 fields: | ||
* strain | ||
* genoflu | ||
* genoflu_<SEGMENT> (8 fields) | ||
""" | ||
|
||
|
||
from augur.io.metadata import read_table_to_dict | ||
from sys import stdin, stdout | ||
from csv import DictWriter | ||
|
||
if __name__ == "__main__": | ||
SEGMENTS = ["PB2", "PB1", "PA", "HA", "NP", "NA", "MP", "NS"] | ||
HEADER = ['strain', 'genoflu', *[f"genoflu_{s}" for s in SEGMENTS]] | ||
tsv_writer = DictWriter(stdout,HEADER,extrasaction='ignore',delimiter='\t',lineterminator='\n') | ||
tsv_writer.writeheader() | ||
for record in read_table_to_dict(stdin.buffer, ["\t"]): | ||
if record['details']: | ||
for segment_name, lineage in (parts.split(':') for parts in record['details'].split(", ")): | ||
record[f"genoflu_{segment_name}"] = lineage | ||
# Collapse all "not assigned" calls into a single metadata value | ||
if record['genoflu'].startswith("Not assigned:"): | ||
record['genoflu'] = "Not assigned (too divergent)" | ||
tsv_writer.writerow(record) |