Feat/algorithm configuration schema (#31)

## Description In models.py add new classes for storing data related to the available algorithms to run. ```mermaid erDiagram Algorithm { int id bool is_default "a check should be added to guarentee that only 1 row in the table is marked as the default" string label "should be unique" string description } AlgorithmPass { int id int algorithm_id int[] blockingkeys "a list of values from the BlockingKey table" string[] evaluators "a list of matching functions and values to use" string rule "the evaluation rule function" float cluster_ratio json kwargs "extra parameters to pass to the evalator functions" } Algorithm ||--o{ AlgorithmPass: "has" ``` ## Related Issues closes #13 ## Additional Notes [Add any additional context or notes that reviewers should know about.] ## Checklist Please review and complete the following checklist before submitting your pull request: - [x] I have ensured that the pull request is of a manageable size, allowing it to be reviewed within a single session. - [x] I have reviewed my changes to ensure they are clear, concise, and well-documented. - [x] I have updated the documentation, if applicable. - [x] I have added or updated test cases to cover my changes, if applicable. - [x] I have minimized the number of reviewers to include only those essential for the review. - [x] I have notified teammates in the review thread to build awareness. ## Checklist for Reviewers Please review and complete the following checklist during the review process: - [ ] The code follows best practices and conventions. - [ ] The changes implement the desired functionality or fix the reported issue. - [ ] The tests cover the new changes and pass successfully. - [ ] Any potential edge cases or error scenarios have been considered.
CDCgov · Sep 20, 2024 · 6cc3772 · 6cc3772
1 parent 30da916
commit 6cc3772
Show file tree

Hide file tree

Showing 4 changed files with 179 additions and 5 deletions.
diff --git a/.env b/.env
@@ -1 +1 @@
-DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
+DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
diff --git a/alembic/versions/0c90faa0378f_create_algorithm_tables.py b/alembic/versions/0c90faa0378f_create_algorithm_tables.py
@@ -0,0 +1,53 @@
+"""create algorithm tables
+
+Revision ID: 0c90faa0378f
+Revises: 6052c193a26a
+Create Date: 2024-09-20 11:41:13.377954
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '0c90faa0378f'
+down_revision: Union[str, None] = '6052c193a26a'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('algorithm',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('is_default', sa.Boolean(), nullable=False),
+    sa.Column('label', sa.String(length=255), nullable=False),
+    sa.Column('description', sa.Text(), nullable=False),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('label')
+    )
+    op.create_index(op.f('ix_algorithm_is_default'), 'algorithm', ['is_default'], unique=False)
+    op.create_table('algorithm_pass',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('algorithm_id', sa.Integer(), nullable=False),
+    sa.Column('blocking_keys', sa.JSON(), nullable=False),
+    sa.Column('evaluators', sa.JSON(), nullable=False),
+    sa.Column('rule', sa.String(length=255), nullable=False),
+    sa.Column('cluster_ratio', sa.Float(), nullable=False),
+    sa.Column('kwargs', sa.JSON(), nullable=False),
+    sa.ForeignKeyConstraint(['algorithm_id'], ['algorithm.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_mpi_blocking_key_key'), 'mpi_blocking_key', ['key'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_mpi_blocking_key_key'), table_name='mpi_blocking_key')
+    op.drop_table('algorithm_pass')
+    op.drop_index(op.f('ix_algorithm_is_default'), table_name='algorithm')
+    op.drop_table('algorithm')
+    # ### end Alembic commands ###
diff --git a/src/recordlinker/linkage/models.py b/src/recordlinker/linkage/models.py
@@ -1,22 +1,22 @@
 import uuid
 
+from sqlalchemy import event
 from sqlalchemy import ForeignKey
 from sqlalchemy import JSON
 from sqlalchemy import orm
 from sqlalchemy import String
+from sqlalchemy import Text
 
 
 class Base(orm.DeclarativeBase):
     pass
 
-
 class Person(Base):
     __tablename__ = "mpi_person"
 
     id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
     internal_id: orm.Mapped[uuid.UUID] = orm.mapped_column(default=uuid.uuid4)
 
-
 class ExternalPerson(Base):
     __tablename__ = "mpi_external_person"
 
@@ -25,15 +25,13 @@ class ExternalPerson(Base):
     external_id: orm.Mapped[str] = orm.mapped_column(String(255))
     source: orm.Mapped[str] = orm.mapped_column(String(255))
 
-
 class Patient(Base):
     __tablename__ = "mpi_patient"
 
     id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
     person_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_person.id"))
     data: orm.Mapped[dict] = orm.mapped_column(JSON)
 
-
 class BlockingKey(Base):
     __tablename__ = "mpi_blocking_key"
 
@@ -47,3 +45,49 @@ class BlockingValue(Base):
     patient_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_patient.id"))
     blockingkey_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_blocking_key.id"))
     value: orm.Mapped[str] = orm.mapped_column(String(50), index=True)
+
+class Algorithm(Base):
+    __tablename__ = "algorithm"
+
+    id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
+    is_default: orm.Mapped[bool] = orm.mapped_column(default=False, index=True)
+    label: orm.Mapped[str] = orm.mapped_column(String(255), unique=True)
+    description: orm.Mapped[str] = orm.mapped_column(Text())
+
+def check_only_one_default(mapping, connection, target):
+    """
+    Check if there is already a default algorithm before inserting or updating.
+    If another default algorithm exists, an exception is raised to prevent the operation.
+
+    Parameters:
+    connection: The database connection being used for the operation.
+    target: The instance of the Algorithm class being inserted or updated.
+    
+    Raises:
+    ValueError: If another algorithm is already marked as default.
+    """
+
+    session = orm.Session.object_session(target)
+
+    if target.is_default:
+        # ruff linting rule E712 ignored on this line. 
+        # ruff wants to enforce using the 'is' operator over '=='. 
+        # However since we only want to compare the truth value of the SQL query result we need to use '=='.
+        existing = session.query(Algorithm).filter(Algorithm.is_default == True).first()    # noqa: E712
+
+        if existing and existing.id != target.id:
+            raise ValueError("There can only be one default algorithm")
+
+event.listen(Algorithm, 'before_insert', check_only_one_default)
+event.listen(Algorithm, 'before_update', check_only_one_default)
+
+class AlgorithmPass(Base):
+    __tablename__ = "algorithm_pass"
+
+    id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
+    algorithm_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("algorithm.id"))
+    blocking_keys: orm.Mapped[list[int]] = orm.mapped_column(JSON)
+    evaluators: orm.Mapped[list[str]] = orm.mapped_column(JSON)     
+    rule: orm.Mapped[str] = orm.mapped_column(String(255))
+    cluster_ratio: orm.Mapped[float]
+    kwargs: orm.Mapped[dict] = orm.mapped_column(JSON)
diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py
@@ -0,0 +1,77 @@
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.orm import scoped_session
+
+from recordlinker.linkage.models import Base, Algorithm, check_only_one_default
+
+MOCK_SETTINGS = {"db_uri": "sqlite:///:memory:"}
+
+# Create an in-memory SQLite database for testing
+@pytest.fixture(scope="function")
+def setup_database():
+    engine = create_engine(MOCK_SETTINGS["db_uri"])  # In-memory database
+    Session = scoped_session(sessionmaker(bind=engine))
+    Base.metadata.create_all(engine)  # Create tables
+
+    yield Session  # Provide the session object to tests
+
+    # Cleanup after tests
+    Base.metadata.drop_all(engine)
+    Session.remove()
+
+def test_single_default_algorithm(setup_database):
+    """
+    Tests that only one algorithm can be default in the Algorithm table
+    """
+
+    session = setup_database()
+
+    # first algorithm is_default set to True
+    algo1 = Algorithm(label="Algorithm 1", is_default=True, description="First algorithm")
+    session.add(algo1)
+    session.commit()
+
+    # create another algorithm and try to set is_default as True
+    algo2 = Algorithm(label="Algorithm 2", is_default=True, description="Second algorithm")
+    session.add(algo2)
+
+    with pytest.raises(ValueError, match="There can only be one default algorithm"):
+        session.commit()
+
+def test_set_default_when_none_exists(setup_database):
+    """
+    Tests that you can update an algorithm to be the default if no other default exists
+    """
+
+    session = setup_database()
+
+    # is_default set to false   
+    algo1 = Algorithm(label="Algorithm 1", is_default=False, description="First algorithm")
+    session.add(algo1)
+    session.commit()
+
+    # try setting it as the default
+    algo1.is_default = True
+    session.add(algo1)
+
+    session.commit()
+
+def test_update_existing_default(setup_database):
+    """
+    Tests that updating the default algorithm do not raise ValueErrors
+    """
+
+    session = setup_database()
+
+    # algorithm is_default set to True
+    algo1 = Algorithm(label="Algorithm 1", is_default=True, description="First algorithm")
+    session.add(algo1)
+    session.commit()
+
+    # update the same algorithm 
+    algo1.description = "Updated algorithm"
+    session.add(algo1)
+
+    # should not raise any value errors
+    session.commit()