[utils] lift error handling to load_features

Both functions which call this would check for a return value of None (indicating that the file didn't exist) and cause the augur function to exit. It's cleaner to lift this into `load_features` and this makes it easier for that function to raise errors in the future (e.g. on malformed/empty reference files).
nextstrain · jameshadfield · Dec 20, 2023 · Dec 4, 2023 · Dec 5, 2023 · Dec 5, 2023
commit 3b27cdcb9242341520f42a009b605dac5a68f1e1
diff --git a/augur/ancestral.py b/augur/ancestral.py
@@ -323,8 +323,6 @@ def run(args):
         from .utils import load_features
         ## load features; only requested features if genes given
         features = load_features(args.annotation, args.genes)
-        if features is None:
-            raise AugurError("could not read features of reference sequence file")
         print("Read in {} features from reference sequence file".format(len(features)))
         for gene in args.genes:
             print(f"Processing gene: {gene}")

diff --git a/augur/translate.py b/augur/translate.py
@@ -403,9 +403,6 @@ def run(args):
 
     ## load features; only requested features if genes given
     features = load_features(args.reference_sequence, genes)
-    if features is None:
-        print("ERROR: could not read features of reference sequence file")
-        return 1
     print("Read in {} features from reference sequence file".format(len(features)))
 
     ## Read in sequences & for each sequence translate each feature _except for_ the source (nuc) feature

diff --git a/augur/utils.py b/augur/utils.py
@@ -11,6 +11,7 @@
 from augur.io.file import open_file
 
 from augur.types import ValidationMode
+from augur.errors import AugurError
 
 from augur.util_support.color_parser import ColorParser
 from augur.util_support.node_data_reader import NodeDataReader
@@ -156,15 +157,18 @@ def load_features(reference, feature_names=None):
 
     Returns
     -------
-    features : dict or None
+    features : dict
         keys: feature names, values: <class 'Bio.SeqFeature.SeqFeature'> Note
-        that feature names may not equivalent to GenBank feature keys None is
-        returned if the reference is not a valid file path
+        that feature names may not equivalent to GenBank feature keys
+
+    Raises
+    ------
+    AugurError
+        If the reference file doesn't exist
     """
     #checks explicitly for GFF otherwise assumes Genbank
     if not os.path.isfile(reference):
-        print("ERROR: reference sequence not found. looking for", reference)
-        return None
+        raise AugurError(f"reference sequence file {reference!r} not found")
 
     if '.gff' in reference.lower():
         return _read_gff(reference, feature_names)

diff --git a/tests/functional/translate/cram/basic-error-checking.t b/tests/functional/translate/cram/basic-error-checking.t
@@ -0,0 +1,16 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
+  $ export SCRIPTS="$TESTDIR/../../../../scripts"
+  $ export ANC_DATA="$TESTDIR/../../ancestral/data/simple-genome"
+  $ export DATA="$TESTDIR/../data/simple-genome"
+
+Missing reference file
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence $DATA/reference.doesnt-exist.gff \
+  >  --output-node-data "aa_muts.json" > /dev/null
+  ERROR: reference sequence file '.+/reference.doesnt-exist.gff' not found (re)
+  [2]