From 0e7a3351d4041c5f01694c630b26633121413764 Mon Sep 17 00:00:00 2001
From: James Stevenson <james.stevenson@nationwidechildrens.org>
Date: Wed, 3 Jan 2024 08:59:03 -0500
Subject: [PATCH] feat: remove associated_with as distinct group (merge w/
 xrefs)

---
 docs/source/index.rst                         |   2 +-
 docs/source/normalizing_data/sources.rst      |   9 +-
 src/gene/database/dynamodb.py                 |   3 +-
 src/gene/database/postgresql.py               |  35 ++----
 src/gene/database/postgresql/add_fkeys.sql    |   2 -
 src/gene/database/postgresql/add_indexes.sql  |   2 -
 .../postgresql/create_record_lookup_view.sql  |   6 -
 .../database/postgresql/create_tables.sql     |   6 -
 .../postgresql/delete_normalized_concepts.sql |   1 -
 src/gene/database/postgresql/drop_fkeys.sql   |   1 -
 src/gene/database/postgresql/drop_indexes.sql |   1 -
 src/gene/etl/ensembl.py                       | 118 ++++++++++--------
 src/gene/etl/hgnc.py                          |  31 ++---
 src/gene/etl/merge.py                         |  14 ++-
 src/gene/etl/ncbi.py                          |  52 ++++----
 src/gene/query.py                             |   3 +-
 src/gene/schemas.py                           |  10 +-
 tests/conftest.py                             |   1 -
 tests/unit/test_database_and_etl.py           |   6 -
 tests/unit/test_ensembl_source.py             |  11 +-
 tests/unit/test_hgnc_source.py                |  73 ++++++-----
 tests/unit/test_ncbi_source.py                |  59 ++++-----
 tests/unit/test_query.py                      |  40 +++---
 tests/unit/test_schemas.py                    |   9 --
 24 files changed, 209 insertions(+), 286 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7c6e5926..d64bbe4d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -12,7 +12,7 @@ Gene Normalizer |version|
      :alt: citation
      :target: https://zenodo.org/badge/latestdoi/309797998
 
-The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from `NCBI Gene <https://www.ncbi.nlm.nih.gov/gene/>`_, `Ensembl <https://useast.ensembl.org/index.html>`_, and `HGNC <https://www.genenames.org/>`_, it designates a `CURIE <https://en.wikipedia.org/wiki/CURIE>`_, and provides additional metadata like current and previously-used symbols, aliases, database cross-references and associations, and coordinates.
+The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from `NCBI Gene <https://www.ncbi.nlm.nih.gov/gene/>`_, `Ensembl <https://useast.ensembl.org/index.html>`_, and `HGNC <https://www.genenames.org/>`_, it designates a `CURIE <https://en.wikipedia.org/wiki/CURIE>`_, and provides additional metadata like current and previously-used symbols, aliases, database cross-references, and coordinates.
 
 A `public REST instance of the service <https://normalize.cancervariants.org/gene>`_ is available for programmatic queries:
 
diff --git a/docs/source/normalizing_data/sources.rst b/docs/source/normalizing_data/sources.rst
index 591e582c..0de9cf57 100644
--- a/docs/source/normalizing_data/sources.rst
+++ b/docs/source/normalizing_data/sources.rst
@@ -33,9 +33,7 @@ HGNC
      "previous_symbols": [],
      "xrefs": [
        "ensembl:ENSG00000157764",
-       "ncbigene:673"
-     ],
-     "associated_with": [
+       "ncbigene:673",
        "uniprot:P15056",
        "pubmed:2284096",
        "omim:164757",
@@ -99,7 +97,6 @@ Ensembl
      "xrefs": [
        "hgnc:1097"
      ],
-     "associated_with": [],
      "gene_type": "protein_coding",
      "match_type": 100
    }
@@ -143,9 +140,7 @@ The `NCBI Gene Database <https://www.ncbi.nlm.nih.gov/gene/>`_ is a service prov
       "previous_symbols": [],
       "xrefs": [
         "ensembl:ENSG00000157764",
-        "hgnc:1097"
-      ],
-      "associated_with": [
+        "hgnc:1097",
         "omim:164757"
       ],
       "gene_type": "protein-coding",
diff --git a/src/gene/database/dynamodb.py b/src/gene/database/dynamodb.py
index 6f7b0ee7..629059c8 100644
--- a/src/gene/database/dynamodb.py
+++ b/src/gene/database/dynamodb.py
@@ -434,8 +434,7 @@ def _add_ref_record(
 
         :param str term: referent term
         :param str concept_id: concept ID to refer to
-        :param str ref_type: one of {'alias', 'label', 'xref',
-            'associated_with'}
+        :param str ref_type: one of {'alias', 'label', 'xref'}
         :param src_name: name of source for record
         """
         label_and_type = f"{term.lower()}##{ref_type}"
diff --git a/src/gene/database/postgresql.py b/src/gene/database/postgresql.py
index 6638645c..66a43132 100644
--- a/src/gene/database/postgresql.py
+++ b/src/gene/database/postgresql.py
@@ -97,7 +97,6 @@ def list_tables(self) -> List[str]:
     _drop_db_query = b"""
     DROP MATERIALIZED VIEW IF EXISTS record_lookup_view;
     DROP TABLE IF EXISTS
-        gene_associations,
         gene_symbols,
         gene_previous_symbols,
         gene_aliases,
@@ -324,12 +323,11 @@ def _format_source_record(self, source_row: Tuple) -> Dict:
             "locations": source_row[5],
             "gene_type": source_row[6],
             "aliases": source_row[7],
-            "associated_with": source_row[8],
-            "previous_symbols": source_row[9],
-            "symbol": source_row[10],
-            "xrefs": source_row[11],
-            "src_name": source_row[12],
-            "merge_ref": source_row[13],
+            "previous_symbols": source_row[8],
+            "symbol": source_row[9],
+            "xrefs": source_row[10],
+            "src_name": source_row[11],
+            "merge_ref": source_row[12],
             "item_type": RecordType.IDENTITY.value,
         }
         return {k: v for k, v in gene_record.items() if v}
@@ -373,8 +371,7 @@ def _format_merged_record(self, merged_row: Tuple) -> Dict:
             "hgnc_locus_type": merged_row[11],
             "ncbi_gene_type": merged_row[12],
             "aliases": merged_row[13],
-            "associated_with": merged_row[14],
-            "xrefs": merged_row[15],
+            "xrefs": merged_row[14],
             "item_type": RecordType.MERGER.value,
         }
         return {k: v for k, v in merged_record.items() if v}
@@ -421,7 +418,6 @@ def get_record_by_id(
         RefType.PREVIOUS_SYMBOLS: b"SELECT concept_id FROM gene_previous_symbols WHERE lower(prev_symbol) = %s;",  # noqa: E501
         RefType.ALIASES: b"SELECT concept_id FROM gene_aliases WHERE lower(alias) = %s;",  # noqa: E501
         RefType.XREFS: b"SELECT concept_id FROM gene_xrefs WHERE lower(xref) = %s;",
-        RefType.ASSOCIATED_WITH: b"SELECT concept_id FROM gene_associations WHERE lower(associated_with) = %s;",  # noqa: E501
     }
 
     def get_refs_by_type(self, search_term: str, ref_type: RefType) -> List[str]:
@@ -558,9 +554,6 @@ def add_source_metadata(self, src_name: SourceName, meta: SourceMeta) -> None:
     )
     _ins_alias_query = b"INSERT INTO gene_aliases (alias, concept_id) VALUES (%s, %s);"
     _ins_xref_query = b"INSERT INTO gene_xrefs (xref, concept_id) VALUES (%s, %s);"
-    _ins_assoc_query = (
-        b"INSERT INTO gene_associations (associated_with, concept_id) VALUES (%s, %s);"
-    )
 
     def add_record(self, record: Dict, src_name: SourceName) -> None:
         """Add new record to database.
@@ -591,8 +584,6 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
                     cur.execute(self._ins_alias_query, [a, concept_id])
                 for x in record.get("xrefs", []):
                     cur.execute(self._ins_xref_query, [x, concept_id])
-                for a in record.get("associated_with", []):
-                    cur.execute(self._ins_assoc_query, [a, concept_id])
                 for p in record.get("previous_symbols", []):
                     cur.execute(self._ins_prev_symbol_query, [p, concept_id])
                 if record.get("symbol"):
@@ -606,10 +597,9 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
     INSERT INTO gene_merged (
         concept_id, symbol, symbol_status, previous_symbols, label, strand,
         location_annotations, ensembl_locations, hgnc_locations, ncbi_locations,
-        hgnc_locus_type, ensembl_biotype, ncbi_gene_type, aliases, associated_with,
-        xrefs
+        hgnc_locus_type, ensembl_biotype, ncbi_gene_type, aliases, xrefs
     )
-    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
     """
 
     def add_merged_record(self, record: Dict) -> None:
@@ -644,7 +634,6 @@ def add_merged_record(self, record: Dict) -> None:
                     record.get("ensembl_biotype"),
                     record.get("ncbi_gene_type"),
                     record.get("aliases"),
-                    record.get("associated_with"),
                     record.get("xrefs"),
                 ],
             )
@@ -702,13 +691,6 @@ def delete_normalized_concepts(self) -> None:
         WHERE gc.source = %s
     );
     """
-    _drop_associations_query = b"""
-    DELETE FROM gene_associations WHERE id IN (
-        SELECT ga.id FROM gene_associations ga LEFT JOIN gene_concepts gc
-            ON gc.concept_id = ga.concept_id
-        WHERE gc.source = %s
-    );
-    """
     _drop_prev_symbols_query = b"""
     DELETE FROM gene_previous_symbols WHERE id IN (
         SELECT gps.id FROM gene_previous_symbols gps LEFT JOIN gene_concepts gc
@@ -750,7 +732,6 @@ def delete_source(self, src_name: SourceName) -> None:
         """
         with self.conn.cursor() as cur:
             cur.execute(self._drop_aliases_query, [src_name.value])
-            cur.execute(self._drop_associations_query, [src_name.value])
             cur.execute(self._drop_prev_symbols_query, [src_name.value])
             cur.execute(self._drop_symbols_query, [src_name.value])
             cur.execute(self._drop_xrefs_query, [src_name.value])
diff --git a/src/gene/database/postgresql/add_fkeys.sql b/src/gene/database/postgresql/add_fkeys.sql
index f93459b3..28e1a88f 100644
--- a/src/gene/database/postgresql/add_fkeys.sql
+++ b/src/gene/database/postgresql/add_fkeys.sql
@@ -1,7 +1,5 @@
 ALTER TABLE gene_aliases ADD CONSTRAINT gene_aliases_concept_id_fkey
     FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id);
-ALTER TABLE gene_associations ADD CONSTRAINT gene_associations_concept_id_fkey
-    FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id);
 ALTER TABLE gene_previous_symbols
     ADD CONSTRAINT gene_previous_symbols_concept_id_fkey
     FOREIGN KEY (concept_id) REFERENCES gene_concepts (concept_id);
diff --git a/src/gene/database/postgresql/add_indexes.sql b/src/gene/database/postgresql/add_indexes.sql
index b96df534..805ad71b 100644
--- a/src/gene/database/postgresql/add_indexes.sql
+++ b/src/gene/database/postgresql/add_indexes.sql
@@ -7,7 +7,5 @@ CREATE INDEX idx_gps_symbol_low
     ON gene_previous_symbols (lower(prev_symbol));
 CREATE INDEX idx_ga_alias_low ON gene_aliases (lower(alias));
 CREATE INDEX idx_gx_xref_low ON gene_xrefs (lower(xref));
-CREATE INDEX idx_g_as_association_low
-    ON gene_associations (lower(associated_with));
 CREATE INDEX idx_rlv_concept_id_low
     ON record_lookup_view (lower(concept_id));
diff --git a/src/gene/database/postgresql/create_record_lookup_view.sql b/src/gene/database/postgresql/create_record_lookup_view.sql
index 1e33977f..7474a07a 100644
--- a/src/gene/database/postgresql/create_record_lookup_view.sql
+++ b/src/gene/database/postgresql/create_record_lookup_view.sql
@@ -7,7 +7,6 @@ SELECT gc.concept_id,
        gc.locations,
        gc.gene_type,
        ga.aliases,
-       gas.associated_with,
        gps.previous_symbols,
        gs.symbol,
        gx.xrefs,
@@ -20,11 +19,6 @@ FULL JOIN (
     FROM gene_aliases ga_1
     GROUP BY ga_1.concept_id
 ) ga ON gc.concept_id::text = ga.concept_id::text
-FULL JOIN (
-    SELECT gas_1.concept_id, array_agg(gas_1.associated_with) AS associated_with
-    FROM gene_associations gas_1
-    GROUP BY gas_1.concept_id
-) gas ON gc.concept_id::text = gas.concept_id::text
 FULL JOIN (
     SELECT gps_1.concept_id, array_agg(gps_1.prev_symbol) AS previous_symbols
     FROM gene_previous_symbols gps_1
diff --git a/src/gene/database/postgresql/create_tables.sql b/src/gene/database/postgresql/create_tables.sql
index 83198199..9100e553 100644
--- a/src/gene/database/postgresql/create_tables.sql
+++ b/src/gene/database/postgresql/create_tables.sql
@@ -26,7 +26,6 @@ CREATE TABLE gene_merged (
     hgnc_locus_type TEXT [],
     ncbi_gene_type TEXT [],
     aliases TEXT [],
-    associated_with TEXT [],
     xrefs TEXT []
 );
 CREATE TABLE gene_concepts (
@@ -60,8 +59,3 @@ CREATE TABLE gene_xrefs (
     xref TEXT NOT NULL,
     concept_id VARCHAR(127) NOT NULL REFERENCES gene_concepts (concept_id)
 );
-CREATE TABLE gene_associations (
-    id SERIAL PRIMARY KEY,
-    associated_with TEXT NOT NULL,
-    concept_ID VARCHAR(127) NOT NULL REFERENCES gene_concepts (concept_id)
-);
diff --git a/src/gene/database/postgresql/delete_normalized_concepts.sql b/src/gene/database/postgresql/delete_normalized_concepts.sql
index 5141c841..e5e1bdce 100644
--- a/src/gene/database/postgresql/delete_normalized_concepts.sql
+++ b/src/gene/database/postgresql/delete_normalized_concepts.sql
@@ -19,7 +19,6 @@ CREATE TABLE gene_merged (
     hgnc_locus_type TEXT [],
     ncbi_gene_type TEXT [],
     aliases TEXT [],
-    associated_with TEXT [],
     xrefs TEXT []
 );
 ALTER TABLE gene_concepts ADD CONSTRAINT gene_concepts_merge_ref_fkey
diff --git a/src/gene/database/postgresql/drop_fkeys.sql b/src/gene/database/postgresql/drop_fkeys.sql
index f804ca1e..ba2aeef5 100644
--- a/src/gene/database/postgresql/drop_fkeys.sql
+++ b/src/gene/database/postgresql/drop_fkeys.sql
@@ -1,5 +1,4 @@
 ALTER TABLE gene_aliases DROP CONSTRAINT gene_aliases_concept_id_fkey;
-ALTER TABLE gene_associations DROP CONSTRAINT gene_associations_concept_id_fkey;
 ALTER TABLE gene_previous_symbols
     DROP CONSTRAINT gene_previous_symbols_concept_id_fkey;
 ALTER TABLE gene_symbols DROP CONSTRAINT gene_symbols_concept_id_fkey;
diff --git a/src/gene/database/postgresql/drop_indexes.sql b/src/gene/database/postgresql/drop_indexes.sql
index 7c9743d0..dd9156dc 100644
--- a/src/gene/database/postgresql/drop_indexes.sql
+++ b/src/gene/database/postgresql/drop_indexes.sql
@@ -4,5 +4,4 @@ DROP INDEX IF EXISTS idx_gs_symbol_low;
 DROP INDEX IF EXISTS idx_gps_symbol_low;
 DROP INDEX IF EXISTS idx_gx_xref_low;
 DROP INDEX IF EXISTS idx_ga_alias_low;
-DROP INDEX IF EXISTS idx_g_as_association_low;
 DROP INDEX IF EXISTS idx_rlv_concept_id_low;
diff --git a/src/gene/etl/ensembl.py b/src/gene/etl/ensembl.py
index bb590047..aef9d388 100644
--- a/src/gene/etl/ensembl.py
+++ b/src/gene/etl/ensembl.py
@@ -1,7 +1,7 @@
 """Defines the Ensembl ETL methods."""
 import logging
 import re
-from typing import Dict
+from typing import Dict, Optional
 
 import gffutils
 from gffutils.feature import Feature
@@ -90,73 +90,83 @@ def _add_gene(self, f: Feature, accession_numbers: Dict) -> Dict:
         return gene_params
 
     def _add_attributes(self, f: Feature, gene: Dict) -> None:
-        """Add concept_id, symbol, xrefs, and associated_with to a gene record.
+        """Add concept_id, symbol, and xrefs to a gene record.
 
         :param f: A gene from the data
         :param gene: A transformed gene record
         """
-        attributes = {
+        attributes_map = {
             "ID": "concept_id",
             "Name": "symbol",
             "description": "xrefs",
             "biotype": "gene_type",
         }
 
-        for attribute in f.attributes.items():
-            key = attribute[0]
-
-            if key in attributes.keys():
-                val = attribute[1]
-
-                if len(val) == 1:
-                    val = val[0]
-                    if key == "ID":
-                        if val.startswith("gene"):
-                            val = (
-                                f"{NamespacePrefix.ENSEMBL.value}:"
-                                f"{val.split(':')[1]}"
-                            )
-
-                if key == "description":
-                    gene["label"] = val.split("[")[0].strip()
-                    if "Source:" in val:
-                        src_name = (
-                            val.split("[")[-1]
-                            .split("Source:")[-1]
-                            .split("Acc")[0]
-                            .split(";")[0]
-                        )
-                        src_id = val.split("Acc:")[-1].split("]")[0]
-                        if ":" in src_id:
-                            src_id = src_id.split(":")[-1]
-                        source = self._get_xref_associated_with(src_name, src_id)
-                        if "xrefs" in source:
-                            gene["xrefs"] = source["xrefs"]
-                        elif "associated_with" in source:
-                            gene["associated_with"] = source["associated_with"]
-                    continue
-
-                gene[attributes[key]] = val
-
-    def _get_xref_associated_with(self, src_name: str, src_id: str) -> Dict:
-        """Get xref or associated_with concept.
+        for key, value in f.attributes.items():
+            if key not in attributes_map:
+                continue
+
+            if key == "ID" and value[0].startswith("gene"):
+                gene[
+                    "concept_id"
+                ] = f"{NamespacePrefix.ENSEMBL.value}:{value[0].split(':')[1]}"
+            elif key == "description":
+                pattern = "^(.*) \\[Source:.*;Acc:(.*):(.*)\\]$"
+                matches = re.findall(pattern, value[0])
+                if matches:
+                    gene["label"] = matches[0][0]
+                    gene["xrefs"] = [self._get_xref(matches[0][1], matches[0][2])]
+            else:
+                gene[attributes_map[key]] = value
+            # key = attribute[0]
+            #
+            # if key in attributes_map.keys():
+            #     val = attribute[1]
+            #
+            #     if len(val) == 1:
+            #         val = val[0]
+            #         if key == "ID":
+            #             if val.startswith("gene"):
+            #                 val = (
+            #                     f"{NamespacePrefix.ENSEMBL.value}:"
+            #                     f"{val.split(':')[1]}"
+            #                 )
+            #
+            #     if key == "description":
+            #         gene["label"] = val.split("[")[0].strip()
+            #         if "Source:" in val:
+            #             src_name = (
+            #                 val.split("[")[-1]
+            #                 .split("Source:")[-1]
+            #                 .split("Acc")[0]
+            #                 .split(";")[0]
+            #             )
+            #             src_id = val.split("Acc:")[-1].split("]")[0]
+            #             if ":" in src_id:
+            #                 src_id = src_id.split(":")[-1]
+            #             gene["xrefs"] = self._get_xref(src_name, src_id)
+            #         continue
+            #
+            #     gene[attributes_map[key]] = val
+
+    def _get_xref(self, src_name: str, src_id: str) -> Optional[str]:
+        """Get xref.
 
         :param src_name: Source name
         :param src_id: The source's accession number
-        :return: A dict containing an other identifier or xref
+        :return: xref, if successfully parsed
         """
-        source = dict()
-        if src_name.startswith("HGNC"):
-            source["xrefs"] = [f"{NamespacePrefix.HGNC.value}:{src_id}"]
-        elif src_name.startswith("NCBI"):
-            source["xrefs"] = [f"{NamespacePrefix.NCBI.value}:{src_id}"]
-        elif src_name.startswith("UniProt"):
-            source["associated_with"] = [f"{NamespacePrefix.UNIPROT.value}:{src_id}"]
-        elif src_name.startswith("miRBase"):
-            source["associated_with"] = [f"{NamespacePrefix.MIRBASE.value}:{src_id}"]
-        elif src_name.startswith("RFAM"):
-            source["associated_with"] = [f"{NamespacePrefix.RFAM.value}:{src_id}"]
-        return source
+        for prefix, constrained_prefix in (
+            ("HGNC", NamespacePrefix.HGNC),
+            ("NCBI", NamespacePrefix.NCBI),
+            ("UniProt", NamespacePrefix.UNIPROT),
+            ("miRBase", NamespacePrefix.MIRBASE),
+            ("RFAM", NamespacePrefix.RFAM),
+        ):
+            if src_name.startswith(prefix):
+                return f"{constrained_prefix.value}:{src_id}"
+        _logger.warning("Unrecognized source name: %:%", src_name, src_id)
+        return None
 
     def _add_meta(self) -> None:
         """Add Ensembl metadata.
diff --git a/src/gene/etl/hgnc.py b/src/gene/etl/hgnc.py
index 1f060935..805fbe37 100644
--- a/src/gene/etl/hgnc.py
+++ b/src/gene/etl/hgnc.py
@@ -6,7 +6,6 @@
 
 from gene.etl.base import Base, GeneNormalizerEtlError
 from gene.schemas import (
-    PREFIX_LOOKUP,
     Annotation,
     Chromosome,
     DataLicenseAttributes,
@@ -42,9 +41,9 @@ def _transform_data(self) -> None:
                 elif r["status"] == "Entry Withdrawn":
                     gene["symbol_status"] = SymbolStatus.WITHDRAWN.value
 
-            # store alias, xref, associated_with, prev_symbols, location
+            # store alias, xref, prev_symbols, location
             self._get_aliases(r, gene)
-            self._get_xrefs_associated_with(r, gene)
+            self._get_xrefs(r, gene)
             if "prev_symbol" in r:
                 self._get_previous_symbols(r, gene)
             if "location" in r:
@@ -81,14 +80,13 @@ def _get_previous_symbols(self, r: Dict, gene: Dict) -> None:
         if prev_symbols:
             gene["previous_symbols"] = list(set(prev_symbols))
 
-    def _get_xrefs_associated_with(self, record: Dict, gene: Dict) -> None:
-        """Store xrefs and/or associated_with refs in a gene record.
+    def _get_xrefs(self, record: Dict, gene: Dict) -> None:
+        """Store xrefs in a gene record.
 
         :param record: A gene record in the HGNC data file
         :param gene: A transformed gene record
         """
         xrefs = list()
-        associated_with = list()
         sources = [
             "entrez_id",
             "ensembl_gene_id",
@@ -128,37 +126,28 @@ def _get_xrefs_associated_with(self, record: Dict, gene: Dict) -> None:
                     key = src
 
                 if key.upper() in NamespacePrefix.__members__:
-                    if NamespacePrefix[key.upper()].value in PREFIX_LOOKUP.keys():
-                        self._get_xref_associated_with(key, src, record, xrefs)
-                    else:
-                        self._get_xref_associated_with(
-                            key, src, record, associated_with
-                        )
+                    self._get_xref(key, src, record, xrefs)
                 else:
                     _logger.warning(f"{key} not in schemas.py")
 
         if xrefs:
             gene["xrefs"] = xrefs
-        if associated_with:
-            gene["associated_with"] = associated_with
 
-    def _get_xref_associated_with(
-        self, key: str, src: str, r: Dict, src_type: List[str]
-    ) -> None:
-        """Add an xref or associated_with ref to a gene record.
+    def _get_xref(self, key: str, src: str, r: Dict, xrefs: List[str]) -> None:
+        """Add an xref to a gene record.
 
         :param key: The source's name
         :param src: HGNC's source field
         :param r: A gene record in the HGNC data file
-        :param src_type: Either xrefs or associated_with list
+        :param xrefs: xrefs list
         """
         if isinstance(r[src], list):
             for xref in r[src]:
-                src_type.append(f"{NamespacePrefix[key.upper()].value}:{xref}")
+                xrefs.append(f"{NamespacePrefix[key.upper()].value}:{xref}")
         else:
             if isinstance(r[src], str) and ":" in r[src]:
                 r[src] = r[src].split(":")[-1].strip()
-            src_type.append(f"{NamespacePrefix[key.upper()].value}" f":{r[src]}")
+            xrefs.append(f"{NamespacePrefix[key.upper()].value}" f":{r[src]}")
 
     def _get_location(self, r: Dict, gene: Dict) -> None:
         """Store GA4GH VRS ChromosomeLocation in a gene record.
diff --git a/src/gene/etl/merge.py b/src/gene/etl/merge.py
index d065be73..57ca439c 100644
--- a/src/gene/etl/merge.py
+++ b/src/gene/etl/merge.py
@@ -5,7 +5,7 @@
 
 from gene.database import AbstractDatabase
 from gene.database.database import DatabaseWriteError
-from gene.schemas import GeneTypeFieldName, RecordType, SourcePriority
+from gene.schemas import GeneTypeFieldName, NamespacePrefix, RecordType, SourcePriority
 
 _logger = logging.getLogger(__name__)
 
@@ -98,7 +98,14 @@ def _create_record_id_set(
             if not record_xrefs:
                 return observed_id_set | {db_record["concept_id"]}
             else:
-                local_id_set = set(record_xrefs)
+                local_id_set = set()
+                for xref in record_xrefs:
+                    if (
+                        xref.startswith(NamespacePrefix.NCBI.value)
+                        or xref.startswith(NamespacePrefix.ENSEMBL.value)
+                        or xref.startswith(NamespacePrefix.HGNC.value)
+                    ):
+                        local_id_set.add(xref)
             merged_id_set = {record_id} | observed_id_set
             for local_record_id in local_id_set - observed_id_set:
                 merged_id_set |= self._create_record_id_set(
@@ -145,7 +152,6 @@ def record_order(record: Dict) -> Tuple:
         merged_attrs = {
             "concept_id": records[0]["concept_id"],
             "aliases": set(),
-            "associated_with": set(),
             "previous_symbols": set(),
             "hgnc_locus_type": set(),
             "ncbi_gene_type": set(),
@@ -156,7 +162,7 @@ def record_order(record: Dict) -> Tuple:
             merged_attrs["xrefs"] = list({r["concept_id"] for r in records[1:]})
 
         # merge from constituent records
-        set_fields = ["aliases", "associated_with", "previous_symbols", "strand"]
+        set_fields = ["aliases", "previous_symbols", "strand"]
         scalar_fields = ["symbol", "symbol_status", "label", "location_annotations"]
         for record in records:
             for field in set_fields:
diff --git a/src/gene/etl/ncbi.py b/src/gene/etl/ncbi.py
index 427d57e6..a5954bbe 100644
--- a/src/gene/etl/ncbi.py
+++ b/src/gene/etl/ncbi.py
@@ -98,14 +98,13 @@ def _get_prev_symbols(self) -> Dict[str, str]:
         history_file.close()
         return prev_symbols
 
-    def _add_xrefs_associated_with(self, val: List[str], params: Dict) -> None:
-        """Add xrefs and associated_with refs to a transformed gene.
+    def _add_xrefs(self, val: List[str], params: Dict) -> None:
+        """Add xrefs to a transformed gene.
 
         :param val: A list of source ids for a given gene
         :param params: A transformed gene record
         """
         params["xrefs"] = []
-        params["associated_with"] = []
         for src in val:
             src_name = src.split(":")[0].upper()
             src_id = src.split(":")[-1]
@@ -125,16 +124,12 @@ def _add_xrefs_associated_with(self, val: List[str], params: Dict) -> None:
                     prefix = NamespacePrefix.IMGT_GENE_DB.value
                 elif src_name.startswith("MIRBASE"):
                     prefix = NamespacePrefix.MIRBASE.value
-                else:
-                    prefix = None
-                if prefix:
-                    params["associated_with"].append(f"{prefix}:{src_id}")
                 else:
                     _logger.info(f"{src_name} is not in NameSpacePrefix.")
+                    continue
+                params["xrefs"].append(f"{prefix}:{src_id}")
         if not params["xrefs"]:
             del params["xrefs"]
-        if not params["associated_with"]:
-            del params["associated_with"]
 
     def _get_gene_info(self, prev_symbols: Dict[str, str]) -> Dict[str, str]:
         """Store genes from NCBI info file.
@@ -158,10 +153,10 @@ def _get_gene_info(self, prev_symbols: Dict[str, str]) -> Dict[str, str]:
                 params["aliases"] = row[4].split("|")
             else:
                 params["aliases"] = []
-            # get associated_with
+            # get xrefs
             if row[5] != "-":
-                associated_with = row[5].split("|")
-                self._add_xrefs_associated_with(associated_with, params)
+                xrefs = row[5].split("|")
+                self._add_xrefs(xrefs, params)
             # get chromosome location
             vrs_chr_location = self._get_vrs_chr_location(row, params)
             if "exclude" in vrs_chr_location:
@@ -223,7 +218,7 @@ def _add_gff_gene(
         return params
 
     def _add_attributes(self, f: gffutils.feature.Feature, gene: Dict) -> None:
-        """Add concept_id, symbol, and xrefs/associated_with to a gene record.
+        """Add concept_id, symbol, and xrefs to a gene record.
 
         :param gffutils.feature.Feature f: A gene from the data
         :param gene: A transformed gene record
@@ -239,7 +234,7 @@ def _add_attributes(self, f: gffutils.feature.Feature, gene: Dict) -> None:
                     val = val[0]
 
                 if key == "Dbxref":
-                    self._add_xrefs_associated_with(val, gene)
+                    self._add_xrefs(val, gene)
                 elif key == "Name":
                     gene["symbol"] = val
 
@@ -258,25 +253,24 @@ def _get_vrs_sq_location(
         params["strand"] = gene.strand
         return self._build_sequence_location(gene.seqid, gene, params["concept_id"])
 
-    def _get_xref_associated_with(self, src_name: str, src_id: str) -> Dict:
-        """Get xref or associated_with ref.
+    def _get_xref(self, src_name: str, src_id: str) -> Dict:
+        """Get xref.
 
         :param src_name: Source name
         :param src_id: The source's accession number
-        :return: A dict containing an xref or associated_with ref
+        :return: A dict containing an xref
         """
-        source = dict()
-        if src_name.startswith("HGNC"):
-            source["xrefs"] = [f"{NamespacePrefix.HGNC.value}:{src_id}"]
-        elif src_name.startswith("NCBI"):
-            source["xrefs"] = [f"{NamespacePrefix.NCBI.value}:{src_id}"]
-        elif src_name.startswith("UniProt"):
-            source["associated_with"] = [f"{NamespacePrefix.UNIPROT.value}:{src_id}"]
-        elif src_name.startswith("miRBase"):
-            source["associated_with"] = [f"{NamespacePrefix.MIRBASE.value}:{src_id}"]
-        elif src_name.startswith("RFAM"):
-            source["associated_with"] = [f"{NamespacePrefix.RFAM.value}:{src_id}"]
-        return source
+        for prefix, constrained_prefix in (
+            ("HGNC", NamespacePrefix.HGNC),
+            ("NCBI", NamespacePrefix.NCBI),  # ?
+            ("UniProt", NamespacePrefix.UNIPROT),
+            ("miRBase", NamespacePrefix.MIRBASE),
+            ("RFAM", NamespacePrefix.RFAM),
+        ):
+            if src_name.startswith(prefix):
+                return {"xrefs": [f"{constrained_prefix.value}:{src_id}"]}
+        _logger.warning("Unrecognized source name: %:%", src_name, src_id)
+        return {}
 
     def _get_vrs_chr_location(self, row: List[str], params: Dict) -> List:
         """Store GA4GH VRS ChromosomeLocation in a gene record.
diff --git a/src/gene/query.py b/src/gene/query.py
index 8c100446..0a57be43 100644
--- a/src/gene/query.py
+++ b/src/gene/query.py
@@ -375,9 +375,8 @@ def _add_gene(
         )
 
         # mappings
-        source_ids = record.get("xrefs", []) + record.get("associated_with", [])
         mappings = []
-        for source_id in source_ids:
+        for source_id in record.get("xrefs", []):
             system, code = source_id.split(":")
             mappings.append(
                 core_models.Mapping(
diff --git a/src/gene/schemas.py b/src/gene/schemas.py
index d2c17d84..96b6fc4b 100644
--- a/src/gene/schemas.py
+++ b/src/gene/schemas.py
@@ -58,7 +58,6 @@ class MatchType(IntEnum):
     PREV_SYMBOL = 80
     ALIAS = 60
     XREF = 60
-    ASSOCIATED_WITH = 60
     FUZZY_MATCH = 20
     NO_MATCH = 0
 
@@ -102,7 +101,6 @@ class BaseGene(BaseModel):
     aliases: List[StrictStr] = []
     previous_symbols: List[StrictStr] = []
     xrefs: List[CURIE] = []
-    associated_with: List[CURIE] = []
     gene_type: Optional[StrictStr] = None
 
 
@@ -242,7 +240,6 @@ class RefType(str, Enum):
     PREVIOUS_SYMBOLS = "prev_symbol"
     ALIASES = "alias"
     XREFS = "xref"
-    ASSOCIATED_WITH = "associated_with"
 
 
 # collective name to singular name, e.g. {"previous_symbols": "prev_symbol"}
@@ -561,8 +558,9 @@ class UnmergedNormalizationService(BaseNormalizationService):
                                 ],
                                 "aliases": ["3.1.1.7"],
                                 "previous_symbols": ["YT"],
-                                "xrefs": ["ncbigene:43", "ensembl:ENSG00000087085"],
-                                "associated_with": [
+                                "xrefs": [
+                                    "ncbigene:43",
+                                    "ensembl:ENSG00000087085",
                                     "ucsc:uc003uxi.4",
                                     "vega:OTTHUMG00000157033",
                                     "merops:S09.979",
@@ -671,8 +669,6 @@ class UnmergedNormalizationService(BaseNormalizationService):
                                 "xrefs": [
                                     "hgnc:108",
                                     "ensembl:ENSG00000087085",
-                                ],
-                                "associated_with": [
                                     "omim:100740",
                                 ],
                                 "gene_type": "protein-coding",
diff --git a/tests/conftest.py b/tests/conftest.py
index ad1a14a2..923d71ac 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -41,7 +41,6 @@ def _compare_records(normalized_gene, test_gene, match_type):
     assert set(normalized_gene.xrefs) == set(test_gene.xrefs)
     assert normalized_gene.symbol_status == test_gene.symbol_status
     assert set(normalized_gene.previous_symbols) == set(test_gene.previous_symbols)
-    assert set(normalized_gene.associated_with) == set(test_gene.associated_with)
     assert normalized_gene.symbol == test_gene.symbol
     assert len(normalized_gene.locations) == len(test_gene.locations)
     for loc in normalized_gene.locations:
diff --git a/tests/unit/test_database_and_etl.py b/tests/unit/test_database_and_etl.py
index 092cc6c3..62a4154e 100644
--- a/tests/unit/test_database_and_etl.py
+++ b/tests/unit/test_database_and_etl.py
@@ -76,7 +76,6 @@ def test_tables_created(db_fixture):
     existing_tables = db_fixture.db.list_tables()
     if db_fixture.db_name == "PostgresDatabase":
         assert set(existing_tables) == {
-            "gene_associations",
             "gene_symbols",
             "gene_previous_symbols",
             "gene_aliases",
@@ -150,11 +149,6 @@ def test_item_type(db_fixture):
     assert "item_type" in item
     assert item["item_type"] == "alias"
 
-    filter_exp = Key("label_and_type").eq("omim:606689##associated_with")
-    item = db_fixture.db.genes.query(KeyConditionExpression=filter_exp)["Items"][0]
-    assert "item_type" in item
-    assert item["item_type"] == "associated_with"
-
     filter_exp = Key("label_and_type").eq("ensembl:ensg00000268895##xref")
     item = db_fixture.db.genes.query(KeyConditionExpression=filter_exp)["Items"][0]
     assert "item_type" in item
diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py
index 1ab55430..e9980579 100644
--- a/tests/unit/test_ensembl_source.py
+++ b/tests/unit/test_ensembl_source.py
@@ -47,7 +47,6 @@ def ddx11l1():
             }
         ],
         "strand": "+",
-        "associated_with": [],
         "gene_type": "transcribed_unprocessed_pseudogene",
     }
     return Gene(**params)
@@ -79,7 +78,6 @@ def tp53():
             }
         ],
         "strand": "-",
-        "associated_with": [],
         "gene_type": "protein_coding",
     }
     return Gene(**params)
@@ -111,7 +109,6 @@ def ATP6AP1_DT():  # noqa: N802
             }
         ],
         "strand": "-",
-        "associated_with": [],
         "gene_type": "lncRNA",
     }
     return Gene(**params)
@@ -127,7 +124,6 @@ def hsa_mir_1253():
         "label": "hsa-mir-1253",
         "previous_symbols": [],
         "aliases": [],
-        "xrefs": [],
         "symbol_status": None,
         "location_annotations": [],
         "locations": [
@@ -143,7 +139,7 @@ def hsa_mir_1253():
             }
         ],
         "strand": "+",
-        "associated_with": ["mirbase:MI0006387"],
+        "xrefs": ["mirbase:MI0006387"],
         "gene_type": "lncRNA",
     }
     return Gene(**params)
@@ -175,7 +171,6 @@ def spry3():
             }
         ],
         "strand": "+",
-        "associated_with": [],
         "gene_type": "protein_coding",
     }
     return Gene(**params)
@@ -254,9 +249,9 @@ def test_hsa_mir_1253(check_resp_single_record, ensembl, hsa_mir_1253):
     resp = ensembl.search("hsa-mir-1253")
     check_resp_single_record(resp, hsa_mir_1253, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = ensembl.search("mirbase:MI0006387")
-    check_resp_single_record(resp, hsa_mir_1253, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, hsa_mir_1253, MatchType.XREF)
 
 
 def test_spry3(check_resp_single_record, ensembl, spry3):
diff --git a/tests/unit/test_hgnc_source.py b/tests/unit/test_hgnc_source.py
index 185809fe..2d35c028 100644
--- a/tests/unit/test_hgnc_source.py
+++ b/tests/unit/test_hgnc_source.py
@@ -49,7 +49,9 @@ def a1bg_as1():
         "previous_symbols": ["NCRNA00181", "A1BGAS", "A1BG-AS"],
         "aliases": ["FLJ23569"],
         "symbol_status": "approved",
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000268895",
+            "ncbigene:503538",
             "vega:OTTHUMG00000183508",
             "ucsc:uc002qse.3",
             "refseq:NR_015380",
@@ -57,7 +59,6 @@ def a1bg_as1():
             "refseq:NR_015380",
             "ena.embl:BC040926",
         ],
-        "xrefs": ["ensembl:ENSG00000268895", "ncbigene:503538"],
         "gene_type": "RNA, long non-coding",
     }
     return Gene(**params)
@@ -86,7 +87,7 @@ def tp53():
         "previous_symbols": [],
         "aliases": ["p53", "LFS1"],
         "symbol_status": "approved",
-        "associated_with": [
+        "xrefs": [
             "vega:OTTHUMG00000162125",
             "refseq:NM_000546",
             "cosmic:TP53",
@@ -110,8 +111,9 @@ def tp53():
             "pubmed:6396087",
             "pubmed:3456488",
             "pubmed:2047879",
+            "ensembl:ENSG00000141510",
+            "ncbigene:7157",
         ],
-        "xrefs": ["ensembl:ENSG00000141510", "ncbigene:7157"],
         "gene_type": "gene with protein product",
     }
     return Gene(**params)
@@ -140,8 +142,9 @@ def a3galt2():
         "previous_symbols": ["A3GALT2P"],
         "aliases": ["IGBS3S", "IGB3S"],
         "symbol_status": "approved",
-        "xrefs": ["ensembl:ENSG00000184389", "ncbigene:127550"],
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000184389",
+            "ncbigene:127550",
             "vega:OTTHUMG00000004125",
             "vega:OTTHUMG00000004125",
             "ucsc:uc031plq.1",
@@ -180,8 +183,9 @@ def wdhd1():
         "previous_symbols": [],
         "aliases": ["AND-1", "CTF4", "CHTF4"],
         "symbol_status": "approved",
-        "xrefs": ["ensembl:ENSG00000198554", "ncbigene:11169"],
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000198554",
+            "ncbigene:11169",
             "vega:OTTHUMG00000140304",
             "refseq:NM_007086",
             "omim:608126",
@@ -212,8 +216,12 @@ def g6pr():
         "previous_symbols": [],
         "aliases": ["GSD1aSP"],
         "symbol_status": "approved",
-        "xrefs": ["ncbigene:2541"],
-        "associated_with": ["pubmed:2172641", "pubmed:7814621", "pubmed:2996501"],
+        "xrefs": [
+            "ncbigene:2541",
+            "pubmed:2172641",
+            "pubmed:7814621",
+            "pubmed:2996501",
+        ],
         "gene_type": "unknown",
     }
     return Gene(**params)
@@ -233,8 +241,7 @@ def pirc24():
         "previous_symbols": [],
         "aliases": [],
         "symbol_status": "approved",
-        "xrefs": ["ncbigene:100313810"],
-        "associated_with": ["pubmed:17881367"],
+        "xrefs": ["ncbigene:100313810", "pubmed:17881367"],
         "gene_type": "RNA, cluster",
     }
     return Gene(**params)
@@ -263,8 +270,8 @@ def gage4():
         "previous_symbols": [],
         "aliases": ["CT4.4"],
         "symbol_status": "approved",
-        "xrefs": ["ncbigene:2576"],
-        "associated_with": [
+        "xrefs": [
+            "ncbigene:2576",
             "refseq:NM_001474",
             "omim:300597",
             "uniprot:P0DSO3",
@@ -290,8 +297,9 @@ def mafip():
         "previous_symbols": [],
         "aliases": ["FLJ35473", "FLJ00219", "FLJ39633", "MIP", "pp5644", "TEKT4P4"],
         "symbol_status": "approved",
-        "xrefs": ["ensembl:ENSG00000274847", "ncbigene:727764"],
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000274847",
+            "ncbigene:727764",
             "vega:OTTHUMG00000188065",
             "refseq:NR_046439",
             "uniprot:Q8WZ33",
@@ -319,8 +327,7 @@ def mt_7sdna():
         "previous_symbols": ["MT7SDNA"],
         "aliases": [],
         "symbol_status": "approved",
-        "xrefs": [],
-        "associated_with": ["pubmed:24709344", "pubmed:273237"],
+        "xrefs": ["pubmed:24709344", "pubmed:273237"],
         "gene_type": "region",
     }
     return Gene(**params)
@@ -350,7 +357,6 @@ def cecr():
         "aliases": [],
         "symbol_status": "approved",
         "xrefs": ["ncbigene:1055"],
-        "associated_with": [],
         "gene_type": "region",
     }
     return Gene(**params)
@@ -387,8 +393,9 @@ def csf2ra():
         "previous_symbols": ["CSF2R"],
         "aliases": ["CD116", "alphaGMR"],
         "symbol_status": "approved",
-        "xrefs": ["ensembl:ENSG00000198223", "ncbigene:1438"],
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000198223",
+            "ncbigene:1438",
             "vega:OTTHUMG00000012533",
             "refseq:NM_001161529",
             "orphanet:209477",
@@ -435,8 +442,7 @@ def rps24p5():
         "previous_symbols": [],
         "aliases": [],
         "symbol_status": "approved",
-        "xrefs": ["ncbigene:100271094"],
-        "associated_with": ["refseq:NG_011274", "pubmed:19123937"],
+        "xrefs": ["ncbigene:100271094", "refseq:NG_011274", "pubmed:19123937"],
         "gene_type": "pseudogene",
     }
     return Gene(**params)
@@ -465,8 +471,7 @@ def trl_cag2_1():
         "previous_symbols": ["TRNAL13"],
         "aliases": ["tRNA-Leu-CAG-2-1"],
         "symbol_status": "approved",
-        "xrefs": ["ncbigene:100189130"],
-        "associated_with": ["ena.embl:HG983896"],
+        "xrefs": ["ncbigene:100189130", "ena.embl:HG983896"],
         "gene_type": "RNA, transfer",
     }
     return Gene(**params)
@@ -495,8 +500,9 @@ def myo5b():
         "previous_symbols": [],
         "aliases": ["KIAA1119"],
         "symbol_status": "approved",
-        "xrefs": ["ensembl:ENSG00000167306", "ncbigene:4645"],
-        "associated_with": [
+        "xrefs": [
+            "ensembl:ENSG00000167306",
+            "ncbigene:4645",
             "vega:OTTHUMG00000179843",
             "refseq:NM_001080467",
             "omim:606540",
@@ -539,7 +545,7 @@ def gstt1():
         "previous_symbols": [],
         "aliases": ["2.5.1.18"],
         "symbol_status": "approved",
-        "associated_with": [
+        "xrefs": [
             "refseq:NM_000853",
             "omim:600436",
             "ucsc:uc002zze.4",
@@ -547,8 +553,9 @@ def gstt1():
             "orphanet:470418",
             "ena.embl:KI270879",
             "pubmed:8617495",
+            "ensembl:ENSG00000277656",
+            "ncbigene:2952",
         ],
-        "xrefs": ["ensembl:ENSG00000277656", "ncbigene:2952"],
         "gene_type": "gene with protein product",
     }
     return Gene(**params)
@@ -772,9 +779,9 @@ def test_myo5b(check_resp_single_record, myo5b, hgnc):
     resp = hgnc.search("MYO5B")
     check_resp_single_record(resp, myo5b, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = hgnc.search("refseq:NM_001080467")
-    check_resp_single_record(resp, myo5b, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, myo5b, MatchType.XREF)
 
 
 def test_gstt1(check_resp_single_record, gstt1, hgnc):
@@ -787,9 +794,9 @@ def test_gstt1(check_resp_single_record, gstt1, hgnc):
     resp = hgnc.search("GSTT1")
     check_resp_single_record(resp, gstt1, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = hgnc.search("omim:600436")
-    check_resp_single_record(resp, gstt1, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, gstt1, MatchType.XREF)
 
 
 def test_no_match(hgnc):
diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py
index f7b7508c..95504401 100644
--- a/tests/unit/test_ncbi_source.py
+++ b/tests/unit/test_ncbi_source.py
@@ -22,7 +22,6 @@ def check_ncbi_discontinued_gene(normalizer_response, concept_id, symbol, match_
     assert resp.aliases == []
     assert resp.previous_symbols == []
     assert resp.xrefs == []
-    assert resp.associated_with == []
 
 
 @pytest.fixture(scope="module")
@@ -50,9 +49,8 @@ def dpf1():
         "concept_id": "ncbigene:8193",
         "symbol": "DPF1",
         "aliases": ["BAF45b", "NEUD4", "neuro-d4", "SMARCG1"],
-        "xrefs": ["hgnc:20225", "ensembl:ENSG00000011332"],
         "previous_symbols": [],
-        "associated_with": ["omim:601670"],
+        "xrefs": ["hgnc:20225", "ensembl:ENSG00000011332", "omim:601670"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": "-",
@@ -90,9 +88,8 @@ def pdp1_symbol():
         "concept_id": "ncbigene:54704",
         "symbol": "PDP1",
         "aliases": ["PDH", "PDP", "PDPC", "PPM2A", "PPM2C"],
-        "xrefs": ["hgnc:9279", "ensembl:ENSG00000164951"],
+        "xrefs": ["hgnc:9279", "ensembl:ENSG00000164951", "omim:605993"],
         "previous_symbols": ["LOC157663", "PPM2C"],
-        "associated_with": ["omim:605993"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": "+",
@@ -130,9 +127,8 @@ def pdp1_alias():
         "concept_id": "ncbigene:403313",
         "symbol": "PLPP6",
         "aliases": ["PDP1", "PSDP", "PPAPDC2", "bA6J24.6", "LPRP-B", "PA-PSP"],
-        "xrefs": ["hgnc:23682", "ensembl:ENSG00000205808"],
+        "xrefs": ["hgnc:23682", "ensembl:ENSG00000205808", "omim:611666"],
         "previous_symbols": [],
-        "associated_with": ["omim:611666"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": "+",
@@ -171,9 +167,8 @@ def spry3():
         "concept_id": "ncbigene:10251",
         "symbol": "SPRY3",
         "aliases": ["spry-3"],
-        "xrefs": ["hgnc:11271", "ensembl:ENSG00000168939"],
+        "xrefs": ["hgnc:11271", "ensembl:ENSG00000168939", "omim:300531"],
         "previous_symbols": ["LOC170187", "LOC253479"],
-        "associated_with": ["omim:300531"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": "+",
@@ -232,7 +227,6 @@ def adcp1():
         "aliases": [],
         "xrefs": ["hgnc:229"],
         "previous_symbols": [],
-        "associated_with": [],
         "symbol_status": None,
         "strand": None,
         "location_annotations": ["6"],
@@ -252,9 +246,8 @@ def afa():
         "concept_id": "ncbigene:170",
         "symbol": "AFA",
         "aliases": [],
-        "xrefs": [],
         "previous_symbols": [],
-        "associated_with": ["omim:106250"],
+        "xrefs": ["omim:106250"],
         "symbol_status": None,
         "strand": None,
         "location_annotations": [],
@@ -274,9 +267,8 @@ def znf84():
         "concept_id": "ncbigene:7637",
         "symbol": "ZNF84",
         "aliases": ["HPF2"],
-        "xrefs": ["hgnc:13159", "ensembl:ENSG00000198040"],
+        "xrefs": ["hgnc:13159", "ensembl:ENSG00000198040", "omim:618554"],
         "previous_symbols": ["LOC100287429"],
-        "associated_with": ["omim:618554"],
         "symbol_status": None,
         "location_annotations": ["map from Rosati ref via FISH [AFS]"],
         "strand": "+",
@@ -315,9 +307,14 @@ def slc25a6():
         "concept_id": "ncbigene:293",
         "symbol": "SLC25A6",
         "aliases": ["AAC3", "ANT", "ANT 2", "ANT 3", "ANT3", "ANT3Y"],
-        "xrefs": ["hgnc:10992", "ensembl:ENSG00000169100", "ensembl:ENSG00000292334"],
+        "xrefs": [
+            "hgnc:10992",
+            "ensembl:ENSG00000169100",
+            "ensembl:ENSG00000292334",
+            "omim:300151",
+            "omim:403000",
+        ],
         "previous_symbols": ["ANT3Y"],
-        "associated_with": ["omim:300151", "omim:403000"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": "-",
@@ -376,7 +373,6 @@ def loc106783576():
         "aliases": [],
         "xrefs": [],
         "previous_symbols": [],
-        "associated_with": [],
         "symbol_status": None,
         "location_annotations": [],
         "strand": None,
@@ -405,9 +401,8 @@ def glc1b():
         "concept_id": "ncbigene:2722",
         "symbol": "GLC1B",
         "aliases": [],
-        "xrefs": [],
         "previous_symbols": [],
-        "associated_with": ["omim:606689"],
+        "xrefs": ["omim:606689"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": None,
@@ -436,9 +431,8 @@ def hdpa():
         "concept_id": "ncbigene:50829",
         "symbol": "HDPA",
         "aliases": [],
-        "xrefs": [],
         "previous_symbols": [],
-        "associated_with": ["omim:300221"],
+        "xrefs": ["omim:300221"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": None,
@@ -470,7 +464,6 @@ def prkrap1():
         "aliases": [],
         "xrefs": ["hgnc:33447"],
         "previous_symbols": ["LOC100289695"],
-        "associated_with": [],
         "symbol_status": None,
         "location_annotations": ["alternate reference locus"],
         "strand": "+",
@@ -519,9 +512,8 @@ def mhb():
         "concept_id": "ncbigene:619511",
         "symbol": "MHB",
         "aliases": [],
-        "xrefs": [],
         "previous_symbols": [],
-        "associated_with": ["omim:255160"],
+        "xrefs": ["omim:255160"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": None,
@@ -550,9 +542,8 @@ def spg37():
         "concept_id": "ncbigene:100049159",
         "symbol": "SPG37",
         "aliases": [],
-        "xrefs": [],
         "previous_symbols": [],
-        "associated_with": ["omim:611945"],
+        "xrefs": ["omim:611945"],
         "symbol_status": None,
         "location_annotations": [],
         "strand": None,
@@ -607,9 +598,9 @@ def test_dpf1(check_resp_single_record, ncbi, dpf1):
     resp = ncbi.search("neuro-d4")
     check_resp_single_record(resp, dpf1, MatchType.ALIAS)
 
-    # associated_with
+    # xref
     resp = ncbi.search("omim:601670")
-    check_resp_single_record(resp, dpf1, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, dpf1, MatchType.XREF)
 
     # No Match
     resp = ncbi.search("DPF 1")
@@ -751,9 +742,9 @@ def test_glc1b(check_resp_single_record, ncbi, glc1b):
     resp = ncbi.search("GLC1B")
     check_resp_single_record(resp, glc1b, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = ncbi.search("omim:606689")
-    check_resp_single_record(resp, glc1b, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, glc1b, MatchType.XREF)
 
 
 def test_hdpa(check_resp_single_record, ncbi, hdpa):
@@ -792,9 +783,9 @@ def test_mhb(check_resp_single_record, ncbi, mhb):
     resp = ncbi.search("MHB")
     check_resp_single_record(resp, mhb, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = ncbi.search("OMIM:255160")
-    check_resp_single_record(resp, mhb, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, mhb, MatchType.XREF)
 
 
 def test_spg37(check_resp_single_record, ncbi, spg37):
@@ -807,9 +798,9 @@ def test_spg37(check_resp_single_record, ncbi, spg37):
     resp = ncbi.search("SPG37")
     check_resp_single_record(resp, spg37, MatchType.SYMBOL)
 
-    # associated_with
+    # xref
     resp = ncbi.search("omim:611945")
-    check_resp_single_record(resp, spg37, MatchType.ASSOCIATED_WITH)
+    check_resp_single_record(resp, spg37, MatchType.XREF)
 
 
 def test_discontinued_genes(ncbi):
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index bfb11460..f9b08927 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -702,7 +702,6 @@ def normalize_unmerged_loc_653303():
                         "aliases": [],
                         "previous_symbols": ["LOC196266", "LOC731196", "LOC654080"],
                         "xrefs": [],
-                        "associated_with": [],
                         "gene_type": "pseudo",
                     }
                 ]
@@ -745,8 +744,9 @@ def normalize_unmerged_chaf1a():
                             "CAF-1",
                         ],
                         "previous_symbols": [],
-                        "xrefs": ["ensembl:ENSG00000167670", "ncbigene:10036"],
-                        "associated_with": [
+                        "xrefs": [
+                            "ensembl:ENSG00000167670",
+                            "ncbigene:10036",
                             "vega:OTTHUMG00000181922",
                             "ccds:CCDS32875",
                             "ucsc:uc002mal.4",
@@ -784,7 +784,6 @@ def normalize_unmerged_chaf1a():
                         "aliases": [],
                         "previous_symbols": [],
                         "xrefs": ["hgnc:1910"],
-                        "associated_with": [],
                         "gene_type": "protein_coding",
                     }
                 ],
@@ -820,8 +819,11 @@ def normalize_unmerged_chaf1a():
                         ],
                         "aliases": ["CAF1P150", "P150", "CAF1", "CAF1B", "CAF-1"],
                         "previous_symbols": ["LOC107985297"],
-                        "xrefs": ["ensembl:ENSG00000167670", "hgnc:1910"],
-                        "associated_with": ["omim:601246"],
+                        "xrefs": [
+                            "ensembl:ENSG00000167670",
+                            "hgnc:1910",
+                            "omim:601246",
+                        ],
                         "gene_type": "protein-coding",
                     }
                 ]
@@ -867,8 +869,7 @@ def normalize_unmerged_ache():
                         ],
                         "aliases": ["YT", "ARACHE", "ACEE", "N-ACHE"],
                         "previous_symbols": ["ACEE"],
-                        "xrefs": ["hgnc:108", "ensembl:ENSG00000087085"],
-                        "associated_with": ["omim:100740"],
+                        "xrefs": ["hgnc:108", "ensembl:ENSG00000087085", "omim:100740"],
                         "gene_type": "protein-coding",
                     }
                 ],
@@ -897,7 +898,6 @@ def normalize_unmerged_ache():
                         "aliases": [],
                         "previous_symbols": [],
                         "xrefs": ["hgnc:108"],
-                        "associated_with": [],
                         "gene_type": "protein_coding",
                     }
                 ]
@@ -923,8 +923,9 @@ def normalize_unmerged_ache():
                         ],
                         "aliases": ["3.1.1.7"],
                         "previous_symbols": ["YT"],
-                        "xrefs": ["ncbigene:43", "ensembl:ENSG00000087085"],
-                        "associated_with": [
+                        "xrefs": [
+                            "ncbigene:43",
+                            "ensembl:ENSG00000087085",
                             "ucsc:uc003uxi.4",
                             "vega:OTTHUMG00000157033",
                             "merops:S09.979",
@@ -1050,7 +1051,6 @@ def compare_unmerged_record(gene, test_gene):
     assert set(gene.xrefs) == set(test_gene.xrefs)
     assert gene.symbol_status == test_gene.symbol_status
     assert set(gene.previous_symbols) == set(test_gene.previous_symbols)
-    assert set(gene.associated_with) == set(test_gene.associated_with)
     assert gene.symbol == test_gene.symbol
     assert len(gene.locations) == len(test_gene.locations)
     for loc in gene.locations:
@@ -1259,7 +1259,7 @@ def test_ache_query(query_handler, num_sources, normalized_ache, source_meta):
     compare_normalize_resp(
         resp,
         q,
-        MatchType.ASSOCIATED_WITH,
+        MatchType.XREF,
         normalized_ache,
         expected_source_meta=source_meta,
     )
@@ -1337,7 +1337,7 @@ def test_braf_query(query_handler, num_sources, normalized_braf, source_meta):
     compare_normalize_resp(
         resp,
         q,
-        MatchType.ASSOCIATED_WITH,
+        MatchType.XREF,
         normalized_braf,
         expected_source_meta=source_meta,
     )
@@ -1439,7 +1439,7 @@ def test_abl1_query(query_handler, num_sources, normalized_abl1, source_meta):
     compare_normalize_resp(
         resp,
         q,
-        MatchType.ASSOCIATED_WITH,
+        MatchType.XREF,
         normalized_abl1,
         expected_source_meta=source_meta,
     )
@@ -1572,18 +1572,14 @@ def test_normalize_unmerged(
     resp = query_handler.normalize_unmerged(q)
     compare_unmerged_response(resp, q, [], MatchType.ALIAS, normalize_unmerged_chaf1a)
 
-    # assoc with
+    # xref
     q = "omim:100740"
     resp = query_handler.normalize_unmerged(q)
-    compare_unmerged_response(
-        resp, q, [], MatchType.ASSOCIATED_WITH, normalize_unmerged_ache
-    )
+    compare_unmerged_response(resp, q, [], MatchType.XREF, normalize_unmerged_ache)
 
     q = "uniprot:Q13111"
     resp = query_handler.normalize_unmerged(q)
-    compare_unmerged_response(
-        resp, q, [], MatchType.ASSOCIATED_WITH, normalize_unmerged_chaf1a
-    )
+    compare_unmerged_response(resp, q, [], MatchType.XREF, normalize_unmerged_chaf1a)
 
 
 def test_invalid_queries(query_handler):
diff --git a/tests/unit/test_schemas.py b/tests/unit/test_schemas.py
index 3d5fceed..afe56b84 100644
--- a/tests/unit/test_schemas.py
+++ b/tests/unit/test_schemas.py
@@ -78,15 +78,6 @@ def test_gene(gene, sequence_location):
             xrefs=["hgnc", "hgnc:1"],
         )
 
-    # associated_with not a valid curie
-    with pytest.raises(pydantic.ValidationError):
-        Gene(
-            match_type=100,
-            concept_id="hgnc:1096",
-            symbol="BRAF",
-            associated_with=["hgnc", "hgnc:1"],
-        )
-
     # symbol status invalid
     with pytest.raises(pydantic.ValidationError):
         Gene(