diff --git a/CHANGES.md b/CHANGES.md
index 02ad665f3..c3d2192dc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,8 +2,18 @@
 
 ## __NEXT__
 
+### Major Changes
+
+* ancestral, translate: GenBank files now require the (GFF mandatory) source feature to be present.[#1351][] (@jameshadfield)
+* ancestral, translate: For GFF files, we extract the genome/sequence coordinates by inspecting the sequence-region pragma, region type and/or source type. This information is now required. [#1351][] (@jameshadfield)
+
 ### Features
 
+* ancestral, translate: A range of improvements to how we parse GFF and GenBank reference files. [#1351][] (@jameshadfield)
+    * translate will now always export a 'nuc' annotation in the output JSON, allowing it to pass validation
+    * Gene/CDS names of 'nuc' are now forbidden.
+    * If a Gene/CDS in the GFF/GenBank file is unparsed we now print a warning.
+* utils::load_features: This function may now raise `AugurError`. [#1351][] (@jameshadfield)
 * ancestral: For VCF alignments, a VCF output file is now only created when requested via `--output-vcf`. [#1344][] (@jameshadfield)
 * ancestral: Improvements to command line arguments. [#1344][] (@jameshadfield)
      * Incompatible arguments are now checked, especially related to VCF vs FASTA inputs. 
@@ -16,9 +26,13 @@
 * translate: Improvements to command line arguments.  [#1348][] (@jameshadfield)
     * `--tree` and `--ancestral-sequences` are now required arguments.
     * separate VCF-only arguments into their own group
+* translate: Fixes a bug in the parsing behaviour of GFF files whereby the presence of the `--genes` command line argument would change how we read individual GFF lines. Issue [#1349][], PR [#1351][] (@jameshadfield)
+
 
 [#1344]: https://github.com/nextstrain/augur/pull/1344
 [#1348]: https://github.com/nextstrain/augur/pull/1348
+[#1351]: https://github.com/nextstrain/augur/pull/1351
+[#1349]: https://github.com/nextstrain/augur/issues/1349
 
 ## 23.1.1 (7 November 2023)
 
diff --git a/augur/ancestral.py b/augur/ancestral.py
index a15d1ef93..621faeb6a 100644
--- a/augur/ancestral.py
+++ b/augur/ancestral.py
@@ -323,8 +323,12 @@ def run(args):
         from .utils import load_features
         ## load features; only requested features if genes given
         features = load_features(args.annotation, args.genes)
-        if features is None:
-            raise AugurError("could not read features of reference sequence file")
+        # Ensure the already-created nuc annotation coordinates match those parsed from the reference file
+        if (features['nuc'].location.start+1 != anc_seqs['annotations']['nuc']['start'] or
+            features['nuc'].location.end != anc_seqs['annotations']['nuc']['end']):
+            raise AugurError(f"The 'nuc' annotation coordinates parsed from {args.annotation!r} ({features['nuc'].location.start+1}..{features['nuc'].location.end})"
+                f" don't match the provided sequence data coordinates ({anc_seqs['annotations']['nuc']['start']}..{anc_seqs['annotations']['nuc']['end']}).")
+        
         print("Read in {} features from reference sequence file".format(len(features)))
         for gene in args.genes:
             print(f"Processing gene: {gene}")
diff --git a/augur/translate.py b/augur/translate.py
index 8c7253c39..bc4a1bc13 100644
--- a/augur/translate.py
+++ b/augur/translate.py
@@ -403,19 +403,16 @@ def run(args):
 
     ## load features; only requested features if genes given
     features = load_features(args.reference_sequence, genes)
-    if features is None:
-        print("ERROR: could not read features of reference sequence file")
-        return 1
     print("Read in {} features from reference sequence file".format(len(features)))
 
-    ## Read in sequences & for each sequence translate each feature _except for_ the source (nuc) feature
-    ## Note that `load_features` _only_ extracts {'gene', 'source'} for GFF files, {'CDS', 'source'} for GenBank.
+    ## Read in sequences & for each sequence translate each feature _except for_ the 'nuc' feature name
+    ## Note that except for the 'nuc' annotation, `load_features` _only_ looks for 'gene' (GFF files) or 'CDS' (GenBank files)
     translations = {}
     if is_vcf:
         (sequences, ref) = sequences_vcf(args.vcf_reference, args.ancestral_sequences)
         features_without_variation = []
         for fname, feat in features.items():
-            if feat.type=='source':
+            if fname=='nuc':
                 continue
             try:
                 translations[fname] = translate_vcf_feature(sequences, ref, feat, fname)
@@ -425,26 +422,26 @@ def run(args):
             print("{} genes had no mutations and so have been be excluded.".format(len(features_without_variation)))  
     else:
         sequences = sequences_json(args.ancestral_sequences, tree)
-        translations = {fname: translate_feature(sequences, feat) for fname, feat in features.items() if feat.type != 'source'}
+        translations = {fname: translate_feature(sequences, feat) for fname, feat in features.items() if fname!='nuc'}
 
     ## glob the annotations for later auspice export
     #
     # Note that BioPython FeatureLocations use
     # "Pythonic" coordinates: [zero-origin, half-open)
     # Starting with augur v6 we use GFF coordinates: [one-origin, inclusive]
-    annotations = {}
+    annotations = {
+        'nuc': {'start': features['nuc'].location.start+1,
+                'end':   features['nuc'].location.end,
+                'strand': '+',
+                'type':  features['nuc'].type,     # (unused by auspice)
+                'seqid': args.reference_sequence}  # (unused by auspice)
+    }
     for fname, feat in features.items():
         annotations[fname] = {'seqid':args.reference_sequence,
                               'type':feat.type,
                               'start':int(feat.location.start)+1,
                               'end':int(feat.location.end),
                               'strand': {+1:'+', -1:'-', 0:'?', None:None}[feat.location.strand]}
-    if is_vcf: #need to add our own nuc
-        annotations['nuc'] = {'seqid':args.reference_sequence,
-                              'type':feat.type,
-                              'start': 1,
-                              'end': len(ref),
-                              'strand': '+'}
 
     ## determine amino acid mutations for each node
     try:
diff --git a/augur/utils.py b/augur/utils.py
index d687e62a5..2a94cc0d7 100644
--- a/augur/utils.py
+++ b/augur/utils.py
@@ -11,6 +11,7 @@
 from augur.io.file import open_file
 
 from augur.types import ValidationMode
+from augur.errors import AugurError
 
 from augur.util_support.color_parser import ColorParser
 from augur.util_support.node_data_reader import NodeDataReader
@@ -143,66 +144,267 @@ def default(self, obj):
 
 
 def load_features(reference, feature_names=None):
-    #read in appropriately whether GFF or Genbank
+    """
+    Parse a GFF/GenBank reference file. See the docstrings for _read_gff and
+    _read_genbank for details.
+
+    Parameters
+    ----------
+    reference : str
+        File path to GFF or GenBank (.gb) reference
+    feature_names : None or set or list (optional)
+        Restrict the genes we read to those in the set/list
+
+    Returns
+    -------
+    features : dict
+        keys: feature names, values: <class 'Bio.SeqFeature.SeqFeature'> Note
+        that feature names may not equivalent to GenBank feature keys
+
+    Raises
+    ------
+    AugurError
+        If the reference file doesn't exist, or is malformed / empty
+    """
     #checks explicitly for GFF otherwise assumes Genbank
     if not os.path.isfile(reference):
-        print("ERROR: reference sequence not found. looking for", reference)
-        return None
+        raise AugurError(f"reference sequence file {reference!r} not found")
 
-    features = {}
     if '.gff' in reference.lower():
-        #looks for 'gene' and 'gene' as best for TB
-        from BCBio import GFF
-        limit_info = dict( gff_type = ['gene', 'source'] )
-
-        with open(reference, encoding='utf-8') as in_handle:
-            for rec in GFF.parse(in_handle, limit_info=limit_info):
-                for feat in rec.features:
-                    # Check for gene names stored in qualifiers commonly used by
-                    # virus-specific gene maps first (e.g., 'gene',
-                    # 'gene_name'). Then, check for qualifiers used by non-viral
-                    # pathogens (e.g., 'locus_tag').
-                    if feature_names is not None:
-                        if "gene" in feat.qualifiers and feat.qualifiers["gene"][0] in feature_names:
-                            fname = feat.qualifiers["gene"][0]
-                        elif "gene_name" in feat.qualifiers and feat.qualifiers["gene_name"][0] in feature_names:
-                            fname = feat.qualifiers["gene_name"][0]
-                        elif "locus_tag" in feat.qualifiers and feat.qualifiers["locus_tag"][0] in feature_names:
-                            fname = feat.qualifiers["locus_tag"][0]
-                        else:
-                            fname = None
-                    else:
-                        if "gene" in feat.qualifiers:
-                            fname = feat.qualifiers["gene"][0]
-                        elif "gene_name" in feat.qualifiers:
-                            fname = feat.qualifiers["gene_name"][0]
-                        else:
-                            fname = feat.qualifiers["locus_tag"][0]
-                    if feat.type == "source":
-                        fname = "nuc"
-
-                    if fname:
-                        features[fname] = feat
-
-            if feature_names is not None:
-                for fe in feature_names:
-                    if fe not in features:
-                        print("Couldn't find gene {} in GFF or GenBank file".format(fe))
+        return _read_gff(reference, feature_names)
+    else:
+        return _read_genbank(reference, feature_names)
+
+def _read_nuc_annotation_from_gff(record, reference):
+    """
+    Looks for the ##sequence-region pragma as well as 'region' & 'source' GFF
+    types. Note that 'source' isn't really a GFF feature type, but is used
+    widely in the Nextstrain ecosystem. If there are multiple we check that the
+    coordinates agree.
+    
+    Parameters
+    ----------
+    record : <class 'Bio.SeqRecord.SeqRecord'>
+    reference: string
+        File path to GFF reference
+
+    Returns
+    -------
+    <class 'Bio.SeqFeature.SeqFeature'>
 
+    Raises
+    ------
+    AugurError
+        If no information on the genome / seqid length is available or if the
+        information is contradictory
+    """
+    nuc = {}
+    # Attempt to parse the sequence-region pragma to learn the genome
+    # length (in the absence of record/source we'll use this for 'nuc')
+    sequence_regions = record.annotations.get('sequence-region', [])
+    if len(sequence_regions)>1:
+        raise AugurError(f"Reference {reference!r} contains multiple ##sequence-region pragma lines. Augur can only handle GFF files with a single one.")
+    elif sequence_regions:
+        from Bio.SeqFeature import SeqFeature, FeatureLocation
+        (name, start, stop) = sequence_regions[0]
+        nuc['pragma'] = SeqFeature(
+            FeatureLocation(start, stop),
+            strand=1,
+            type='##sequence-region pragma',
+            id=name,
+        )
+    for feat in record.features:
+        if feat.type == "region":
+            nuc['region'] = feat
+        elif feat.type == "source":
+            nuc['source'] = feat
+
+    # ensure they all agree on coordinates, if there are multiple
+    if len(nuc.values())>1:
+        coords = [(name, int(feat.location.start), int(feat.location.end)) for name,feat in nuc.items()]
+        if not all(el[1]==coords[0][1] and el[2]==coords[0][2] for el in coords):
+            raise AugurError(f"Reference {reference!r} contained contradictory coordinates for the seqid/genome. We parsed the following coordinates: " + 
+                             ', '.join([f"{el[0]}: [{el[1]+1}, {el[2]}]" for el in coords]) # +1 on the first coord to shift to one-based GFF representation
+                             )
+
+    if 'pragma' in nuc: ## the pragma is GFF's preferred way to define nuc coords
+        return nuc['pragma']
+    elif 'region' in nuc:
+        return nuc['region']
+    elif 'source' in nuc:
+        return nuc['source']
     else:
-        from Bio import SeqIO
-        for feat in SeqIO.read(reference, 'genbank').features:
-            if feat.type=='CDS':
-                if "locus_tag" in feat.qualifiers:
-                    fname = feat.qualifiers["locus_tag"][0]
-                    if feature_names is None or fname in feature_names:
-                        features[fname] = feat
-                elif "gene" in feat.qualifiers:
+        raise AugurError(f"Reference {reference!r} didn't define any information we can use to create the 'nuc' annotation. You can use a line with a 'record' or 'source' GFF type or a ##sequence-region pragma.")
+
+
+def _read_gff(reference, feature_names):
+    """
+    Read a GFF file. We only read GFF IDs 'gene' or 'source' (the latter may not technically
+    be a valid GFF field, but is used widely within the Nextstrain ecosystem).
+    Only the first entry in the GFF file is parsed.
+    We create a "feature name" via:
+    - for 'source' IDs use 'nuc'
+    - for 'gene' IDs use the 'gene', 'gene_name' or 'locus_tag'.
+      If none are specified, the intention is to silently ignore but there are bugs here.
+
+    Parameters
+    ----------
+    reference : string
+        File path to GFF reference
+    feature_names : None or set or list
+        Restrict the genes we read to those in the set/list
+
+    Returns
+    -------
+    features : dict
+        keys: feature names, values: <class 'Bio.SeqFeature.SeqFeature'>
+        Note that feature names may not equivalent to GenBank feature keys
+
+    Raises
+    ------
+    AugurError
+        If the reference file contains no IDs or multiple different seqids
+        If a gene is found with the name 'nuc'
+    """
+    from BCBio import GFF
+    valid_types = ['gene', 'source', 'region']
+    features = {}
+
+    with open(reference, encoding='utf-8') as in_handle:
+        # Note that `GFF.parse` doesn't always yield GFF records in the order
+        # one may expect, but since we raise AugurError if there are multiple
+        # this doesn't matter.
+        gff_entries = list(GFF.parse(in_handle, limit_info={'gff_type': valid_types}))
+        if len(gff_entries) == 0:
+            raise AugurError(f"Reference {reference!r} contains no valid data rows. Valid GFF types (3rd column) are {', '.join(valid_types)}.")
+        elif len(gff_entries) > 1:
+            raise AugurError(f"Reference {reference!r} contains multiple seqids (first column). Augur can only handle GFF files with a single seqid.")
+        else:
+            rec = gff_entries[0]
+
+        features['nuc'] = _read_nuc_annotation_from_gff(rec, reference)
+        features_skipped = 0
+
+        for feat in rec.features:
+            if feat.type == "gene":
+                # Check for gene names stored in qualifiers commonly used by
+                # virus-specific gene maps first (e.g., 'gene',
+                # 'gene_name'). Then, check for qualifiers used by non-viral
+                # pathogens (e.g., 'locus_tag').
+                if "gene" in feat.qualifiers:
                     fname = feat.qualifiers["gene"][0]
-                    if feature_names is None or fname in feature_names:
-                        features[fname] = feat
-            elif feat.type=='source': #read 'nuc' as well for annotations - need start/end of whole!
-                features['nuc'] = feat
+                elif "gene_name" in feat.qualifiers:
+                    fname = feat.qualifiers["gene_name"][0]
+                elif "locus_tag" in feat.qualifiers:
+                    fname = feat.qualifiers["locus_tag"][0]
+                else:
+                    features_skipped+=1
+                    fname = None
+
+                if fname == 'nuc':
+                    raise AugurError(f"Reference {reference!r} contains a gene with the name 'nuc'. This is not allowed.")
+
+                if feature_names is not None and fname not in feature_names:
+                    # Skip (don't store) this feature
+                    continue
+
+                if fname:
+                    features[fname] = feat
+
+        if feature_names is not None:
+            for fe in feature_names:
+                if fe not in features:
+                    print("Couldn't find gene {} in GFF or GenBank file".format(fe))
+
+        if features_skipped:
+            print(f"WARNING: {features_skipped} GFF rows of type=gene skipped as they didn't have a gene, gene_name or locus_tag attribute.")
+
+    return features
+
+def _read_nuc_annotation_from_genbank(record, reference):
+    """
+    Extracts the mandatory 'source' feature. If the sequence is present we check
+    the length agrees with the source. (The 'ORIGIN' may be left blank,
+    according to <https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html>.)
+
+    See <https://www.insdc.org/submitting-standards/feature-table/> for more.
+    
+    Parameters
+    ----------
+    record : <class 'Bio.SeqRecord.SeqRecord'> reference: string
+
+    Returns
+    -------
+    <class 'Bio.SeqFeature.SeqFeature'>
+
+    Raises
+    ------
+    AugurError
+        If 'source' not defined or if coords contradict.
+    """
+    nuc = None
+    for feat in record.features:
+        if feat.type=='source':
+            nuc = feat
+    if not nuc:
+        raise AugurError(f"Reference {reference!r} did not define the mandatory source feature.")
+    if nuc.location.start!=0: # this is a '1' in the GenBank file
+        raise AugurError(f"Reference {reference!r} source feature did not start at 1.")
+    if record.seq and len(record.seq)!=nuc.location.end:
+        raise AugurError(f"Reference {reference!r} source feature was length {nuc.location.end} but the included sequence was length {len(record.seq)}.")
+    return nuc
+
+def _read_genbank(reference, feature_names):
+    """
+    Read a GenBank file. We only read GenBank feature keys 'CDS' or 'source'.
+    We create a "feature name" via:
+    - for 'source' features use 'nuc'
+    - for 'CDS' features use the locus_tag or the gene. If neither, then silently ignore. 
+
+    Parameters
+    ----------
+    reference : string
+        File path to GenBank reference
+    feature_names : None or set or list
+        Restrict the CDSs we read to those in the set/list
+
+    Returns
+    -------
+    features : dict
+        keys: feature names, values: <class 'Bio.SeqFeature.SeqFeature'>
+        Note that feature names may not equivalent to GenBank feature keys
+
+    Raises
+    ------
+    AugurError
+        If 'nuc' annotation not parsed
+        If a CDS feature is given the name 'nuc'
+    """
+    from Bio import SeqIO
+    gb = SeqIO.read(reference, 'genbank')
+    features = {
+        'nuc': _read_nuc_annotation_from_genbank(gb, reference)
+    }
+
+    features_skipped = 0
+    for feat in gb.features:
+        if feat.type=='CDS':
+            fname = None
+            if "locus_tag" in feat.qualifiers:
+                fname = feat.qualifiers["locus_tag"][0]
+            elif "gene" in feat.qualifiers:
+                fname = feat.qualifiers["gene"][0]
+            else:
+                features_skipped+=1
+
+            if fname == 'nuc':
+                raise AugurError(f"Reference {reference!r} contains a CDS with the name 'nuc'. This is not allowed.")
+
+            if fname and (feature_names is None or fname in feature_names):
+                features[fname] = feat
+
+    if features_skipped:
+        print(f"WARNING: {features_skipped} CDS features skipped as they didn't have a locus_tag or gene qualifier.")
 
     return features
 
diff --git a/tests/functional/translate/cram/basic-error-checking.t b/tests/functional/translate/cram/basic-error-checking.t
new file mode 100644
index 000000000..789cf89c0
--- /dev/null
+++ b/tests/functional/translate/cram/basic-error-checking.t
@@ -0,0 +1,16 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
+  $ export SCRIPTS="$TESTDIR/../../../../scripts"
+  $ export ANC_DATA="$TESTDIR/../../ancestral/data/simple-genome"
+  $ export DATA="$TESTDIR/../data/simple-genome"
+
+Missing reference file
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence $DATA/reference.doesnt-exist.gff \
+  >  --output-node-data "aa_muts.json" > /dev/null
+  ERROR: reference sequence file '.+/reference.doesnt-exist.gff' not found (re)
+  [2]
diff --git a/tests/functional/translate/cram/genbank.t b/tests/functional/translate/cram/genbank.t
new file mode 100644
index 000000000..120e32cd1
--- /dev/null
+++ b/tests/functional/translate/cram/genbank.t
@@ -0,0 +1,33 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
+  $ export ANC_DATA="$TESTDIR/../../ancestral/data/simple-genome"
+  $ export DATA="$TESTDIR/../data/simple-genome"
+
+These tests are intended to test variants of GenBank reference file formatting
+
+
+Remove the mandatory source feature from the file
+  $ sed '5,6d' "$DATA/reference.gb" > "reference.no-source.gb"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.no-source.gb" \
+  >  --output-node-data "aa_muts.json"
+  ERROR: Reference .+ did not define the mandatory source feature. (re)
+  [2]
+
+Remove a nucleotide from the ORIGIN sequence so the coordinates don't match the source
+
+  $ sed 's/TGACCATAAA/TGACCATAA/' "$DATA/reference.gb" > "reference.short-origin.gb"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.short-origin.gb" \
+  >  --output-node-data "aa_muts.json"
+  .+ BiopythonParserWarning: .+ (re)
+  .+ (re)
+  ERROR: Reference .+ (re)
+  [2]
diff --git a/tests/functional/translate/cram/general.t b/tests/functional/translate/cram/general.t
index fa389dbcb..09ef166f7 100644
--- a/tests/functional/translate/cram/general.t
+++ b/tests/functional/translate/cram/general.t
@@ -13,7 +13,7 @@ which validate the output will fail as it's missing a 'nuc' annotation.
   $ ${AUGUR} translate \
   >  --tree "$ANC_DATA/tree.nwk" \
   >  --ancestral-sequences "$ANC_DATA/nt_muts.ref-seq.json" \
-  >  --reference-sequence "$DATA/reference.source.gff" \
+  >  --reference-sequence "$DATA/reference.gff" \
   >  --output-node-data "aa_muts.json" > /dev/null
 
   $ python3 "$SCRIPTS/diff_jsons.py" \
diff --git a/tests/functional/translate/cram/genes.t b/tests/functional/translate/cram/genes.t
index 3a5b866bd..126b6f904 100644
--- a/tests/functional/translate/cram/genes.t
+++ b/tests/functional/translate/cram/genes.t
@@ -12,7 +12,7 @@ as a feature ('nuc' in this case)
   $ ${AUGUR} translate \
   >  --tree "$ANC_DATA/tree.nwk" \
   >  --ancestral-sequences "$ANC_DATA/nt_muts.ref-seq.json" \
-  >  --reference-sequence "$DATA/reference.source.gff" \
+  >  --reference-sequence "$DATA/reference.gff" \
   >  --genes gene2 gene3 \
   >  --output-node-data "aa_muts.genes-args.json"
   Couldn't find gene gene3 in GFF or GenBank file
@@ -33,7 +33,7 @@ Using a text file rather than command line arguments
   $ ${AUGUR} translate \
   >  --tree "$ANC_DATA/tree.nwk" \
   >  --ancestral-sequences "$ANC_DATA/nt_muts.ref-seq.json" \
-  >  --reference-sequence "$DATA/reference.source.gff" \
+  >  --reference-sequence "$DATA/reference.gff" \
   >  --genes "genes.txt" \
   >  --output-node-data "aa_muts.genes-txt.json"
   Read in 2 specified genes to translate.
diff --git a/tests/functional/translate/cram/gff.t b/tests/functional/translate/cram/gff.t
new file mode 100644
index 000000000..aec3eb6fd
--- /dev/null
+++ b/tests/functional/translate/cram/gff.t
@@ -0,0 +1,93 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
+  $ export SCRIPTS="$TESTDIR/../../../../scripts"
+  $ export ANC_DATA="$TESTDIR/../../ancestral/data/simple-genome"
+  $ export DATA="$TESTDIR/../data/simple-genome"
+
+These tests are intended to test variants of GFF formatting
+
+
+GFF file with no valid rows
+
+  $ head -n 3  $DATA/reference.gff > "reference.empty.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.empty.gff" \
+  >  --output-node-data "aa_muts.json" > /dev/null
+  ERROR: Reference 'reference.empty.gff' contains no valid data rows. .+ (re)
+  [2]
+
+GFF file with an extra record
+
+  $ cp $DATA/reference.gff "reference.double.gff"
+
+  $ echo -e "additional\tRefSeq\tsource\t1\t10\t.\t+\t.\tID=additional" >> "reference.double.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.double.gff" \
+  >  --output-node-data "aa_muts.json"
+  ERROR: Reference 'reference.double.gff' contains multiple seqids .+ (re)
+  [2]
+
+
+GFF file with data row GFF type 'region' replaced by 'source' _and_ the
+##sequence-region pragma removed. This essentially mimics the information
+augur 23.1.1 and earlier would use, before augur started parsing region and/or
+the ##sequence-region pragma.
+  $ grep -v '##sequence-region' "$DATA/reference.gff" |
+  >  sed 's/\tregion\t/\tsource\t/' > "reference-only.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference-only.gff" \
+  >  --output-node-data "aa_muts-only.json" > /dev/null
+
+  $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \
+  >   "$DATA/aa_muts.json" \
+  >   "aa_muts-only.json" \
+  >   --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]"
+  {'values_changed': {"root['annotations']['nuc']['type']": {'new_value': 'source', 'old_value': '##sequence-region pragma'}}}
+
+GFF file with data row added with GFF type 'source' with coordinates which don't match
+  $ sed '5s/^/reference_name\tRefSeq\tsource\t1\t70\t.\t+\t.\tID=reference_name\n/' \
+  >   "$DATA/reference.gff" > "reference-contradicts.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference-contradicts.gff" \
+  >  --output-node-data "aa_muts.json"
+  ERROR: Reference .+ contained contradictory coordinates .+ (re)
+  [2]
+
+GFF file with 'region' removed, so the only genome information is the ##sequence-region pragma
+  $ egrep -v '\tregion\t' "$DATA/reference.gff" > "reference.pragma-only.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.pragma-only.gff" \
+  >  --output-node-data "aa_muts.pragma-only.json" > /dev/null
+
+  $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \
+  >   "$DATA/aa_muts.json" \
+  >   "aa_muts.pragma-only.json" \
+  >   --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]"
+  {}
+
+GFF file with no genome coordinate information
+  $ egrep -v 'region' "$DATA/reference.gff" > "reference.no-nuc-info.gff"
+
+  $ ${AUGUR} translate \
+  >  --tree $ANC_DATA/tree.nwk \
+  >  --ancestral-sequences $ANC_DATA/nt_muts.ref-seq.json \
+  >  --reference-sequence "reference.no-nuc-info.gff" \
+  >  --output-node-data "aa_muts.json"
+  ERROR: Reference .+ didn't define any information we can use to create the 'nuc' annotation. .+ (re)
+  [2]
\ No newline at end of file
diff --git a/tests/functional/translate/cram/translate-with-genbank.t b/tests/functional/translate/cram/translate-with-genbank.t
index b68b134f9..ed2fb54ad 100644
--- a/tests/functional/translate/cram/translate-with-genbank.t
+++ b/tests/functional/translate/cram/translate-with-genbank.t
@@ -12,6 +12,7 @@ Translate amino acids for genes using a GenBank file.
   >   --reference-sequence "$DATA/zika/zika_outgroup.gb" \
   >   --genes CA PRO \
   >   --output-node-data aa_muts.json
+  WARNING: 1 CDS features skipped as they didn't have a locus_tag or gene qualifier.
   Read in 3 features from reference sequence file
   Validating schema of '.+nt_muts.json'... (re)
   amino acid mutations written to .* (re)
diff --git a/tests/functional/translate/cram/translate-with-gff-and-gene-name.t b/tests/functional/translate/cram/translate-with-gff-and-gene-name.t
index 9cb273bc5..1534245aa 100644
--- a/tests/functional/translate/cram/translate-with-gff-and-gene-name.t
+++ b/tests/functional/translate/cram/translate-with-gff-and-gene-name.t
@@ -18,7 +18,7 @@ Translate amino acids for genes using a GFF3 file where the gene names are store
   >   --ancestral-sequences "${DATA}/zika/nt_muts.json" \
   >   --reference-sequence "genemap.gff" \
   >   --output-node-data aa_muts.json
-  Read in 2 features from reference sequence file
+  Read in 3 features from reference sequence file
   Validating schema of '.+/nt_muts.json'... (re)
   amino acid mutations written to .* (re)
 
diff --git a/tests/functional/translate/cram/translate-with-gff-and-gene.t b/tests/functional/translate/cram/translate-with-gff-and-gene.t
index 2c7d1d016..ec0bee6d6 100644
--- a/tests/functional/translate/cram/translate-with-gff-and-gene.t
+++ b/tests/functional/translate/cram/translate-with-gff-and-gene.t
@@ -18,7 +18,7 @@ Translate amino acids for genes using a GFF3 file where the gene names are store
   >   --ancestral-sequences "${DATA}/zika/nt_muts.json" \
   >   --reference-sequence genemap.gff \
   >   --output-node-data aa_muts.json
-  Read in 2 features from reference sequence file
+  Read in 3 features from reference sequence file
   Validating schema of '.+/nt_muts.json'... (re)
   amino acid mutations written to .* (re)
 
diff --git a/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t b/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t
index e58ea8979..ee1c5975a 100644
--- a/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t
+++ b/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t
@@ -17,7 +17,7 @@ Translate amino acids for genes using a GFF3 file where the gene names are store
   >   --vcf-reference-output translations_reference.fasta
   Gene length of 'rrs' is not a multiple of 3. will pad with N
   Read in 187 specified genes to translate.
-  Read in 187 features from reference sequence file
+  Read in 188 features from reference sequence file
   162 genes had no mutations and so have been be excluded.
   amino acid mutations written to .* (re)
 
diff --git a/tests/functional/translate/data/simple-genome/aa_muts.json b/tests/functional/translate/data/simple-genome/aa_muts.json
index aa3c82801..8fe9db91a 100644
--- a/tests/functional/translate/data/simple-genome/aa_muts.json
+++ b/tests/functional/translate/data/simple-genome/aa_muts.json
@@ -2,24 +2,24 @@
   "annotations": {
     "gene1": {
       "end": 24,
-      "seqid": "data/reference.source.gff",
+      "seqid": "data/reference.gff",
       "start": 10,
       "strand": "+",
       "type": "gene"
     },
     "gene2": {
       "end": 47,
-      "seqid": "data/reference.source.gff",
+      "seqid": "data/reference.gff",
       "start": 36,
       "strand": "-",
       "type": "gene"
     },
     "nuc": {
       "end": 50,
-      "seqid": "data/reference.source.gff",
+      "seqid": "data/reference.gff",
       "start": 1,
       "strand": "+",
-      "type": "source"
+      "type": "##sequence-region pragma"
     }
   },
   "generated_by": {
diff --git a/tests/functional/translate/data/simple-genome/reference.source.gff b/tests/functional/translate/data/simple-genome/reference.gff
similarity index 59%
rename from tests/functional/translate/data/simple-genome/reference.source.gff
rename to tests/functional/translate/data/simple-genome/reference.gff
index bbe1084a7..c614d2dd6 100644
--- a/tests/functional/translate/data/simple-genome/reference.source.gff
+++ b/tests/functional/translate/data/simple-genome/reference.gff
@@ -2,6 +2,5 @@
 ##created by james hadfield for testing NextStrain (December 2023)
 ##sequence-region reference_name 1 50
 reference_name	RefSeq	region	1	50	.	+	.	ID=reference_name
-reference_name	RefSeq	source	1	50	.	+	.	ID=reference_name;locus_tag="https://github.com/nextstrain/augur/issues/1349";Note1="Source isn't really a GFF ID, but is required for Nextstrain to function correctly"
 reference_name	RefSeq	gene	10	24	.	+	.	Name=gene1;gene=gene1
 reference_name	RefSeq	gene	36	47	.	-	.	Name=gene2;gene=gene2
diff --git a/tests/functional/translate/data/tb/aa_muts.json b/tests/functional/translate/data/tb/aa_muts.json
index 75a40fd01..cdd97e67b 100644
--- a/tests/functional/translate/data/tb/aa_muts.json
+++ b/tests/functional/translate/data/tb/aa_muts.json
@@ -1055,7 +1055,7 @@
       "seqid": "translate/data/tb/Mtb_H37Rv_NCBI_Annot.gff",
       "start": 1,
       "strand": "+",
-      "type": "gene"
+      "type": "##sequence-region pragma"
     },
     "opcA": {
       "end": 1625365,