From caf96f236a0841aa84dc28300e5c5269d4e4f9c5 Mon Sep 17 00:00:00 2001 From: Mihai Lefter Date: Tue, 9 Apr 2024 10:28:19 +0200 Subject: [PATCH 1/2] Quick fix egquery down (#20) --- mutalyzer_retriever/sources/ncbi.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mutalyzer_retriever/sources/ncbi.py b/mutalyzer_retriever/sources/ncbi.py index 5ace568..bdda793 100644 --- a/mutalyzer_retriever/sources/ncbi.py +++ b/mutalyzer_retriever/sources/ncbi.py @@ -190,7 +190,13 @@ def fetch(reference_id, reference_type, size_on=True, timeout=1): :returns tuple: raw annotations, type ("gff3" or "genbank") """ - db = _get_database(reference_id) + db = "nuccore" + if ( + reference_id.startswith("NP") + or reference_id.startswith("XP") + or reference_id.startswith("WP") + ): + db = "protein" if reference_type in [None, "gff3"]: return fetch_gff3(reference_id, db, timeout), "gff3" elif reference_type == "fasta": From bf7a0da570013777eccc3ed0219eabc5548ff63c Mon Sep 17 00:00:00 2001 From: Mihai Lefter Date: Thu, 2 May 2024 15:12:10 +0200 Subject: [PATCH 2/2] Update protein prefixes --- mutalyzer_retriever/sources/ncbi.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mutalyzer_retriever/sources/ncbi.py b/mutalyzer_retriever/sources/ncbi.py index bdda793..a8db319 100644 --- a/mutalyzer_retriever/sources/ncbi.py +++ b/mutalyzer_retriever/sources/ncbi.py @@ -191,10 +191,18 @@ def fetch(reference_id, reference_type, size_on=True, timeout=1): :returns tuple: raw annotations, type ("gff3" or "genbank") """ db = "nuccore" + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4702849/ + # https://support.nlm.nih.gov/knowledgebase/article/KA-03437/ + # https://support.nlm.nih.gov/knowledgebase/article/KA-03434/ + # https://support.nlm.nih.gov/knowledgebase/article/KA-03389/ + # https://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/ if ( - reference_id.startswith("NP") - or reference_id.startswith("XP") - or reference_id.startswith("WP") + reference_id.startswith("AP_") + or reference_id.startswith("NP_") + or reference_id.startswith("WP_") + or reference_id.startswith("XP_") + or reference_id.startswith("YP_") + or reference_id.startswith("ZP_") ): db = "protein" if reference_type in [None, "gff3"]: