From 3631e90e0aeb2847a025f2b4a251ed1a8b7fe075 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 23 Feb 2024 11:01:25 -0800 Subject: [PATCH] Document the notable columns in merging USVI and GenBank data In cases like this where a column name is ambiguous ('accession' and 'genbank_accession'), bring this context out of commit messages/PRs and into the code itself. --- phylogenetic/rules/merge_sequences_usvi.smk | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/phylogenetic/rules/merge_sequences_usvi.smk b/phylogenetic/rules/merge_sequences_usvi.smk index eb849f8..2dd36dd 100644 --- a/phylogenetic/rules/merge_sequences_usvi.smk +++ b/phylogenetic/rules/merge_sequences_usvi.smk @@ -22,7 +22,13 @@ This part of the workflow usually includes the following steps: """ rule append_usvi: - """Appending USVI sequences""" + """Appending USVI sequences + + Notable columns: + - accession: Either the GenBank accession or USVI accession. + - genbank_accession: GenBank accession for Auspice to generate a URL to the NCBI GenBank record. Empty for USVI sequences. + - url: URL used in Auspice, to either link to the USVI github repo (https://github.com/blab/zika-usvi/) or link to the NCBI GenBank record ('https://www.ncbi.nlm.nih.gov/nuccore/*') + """ input: sequences = "data/sequences.fasta", metadata = "data/metadata.tsv",