Skip to content

Commit

Permalink
Feat/algorithm configuration schema (#31)
Browse files Browse the repository at this point in the history
## Description
In models.py add new classes for storing data related to the available
algorithms to run.

```mermaid
erDiagram
    Algorithm {
        int id
        bool is_default "a check should be added to guarentee that only 1 row in the table is marked as the default"
        string label "should be unique"
        string description
    }

    AlgorithmPass {
        int id
        int algorithm_id
        int[] blockingkeys "a list of values from the BlockingKey table"
        string[] evaluators "a list of matching functions and values to use"
        string rule "the evaluation rule function"
        float cluster_ratio
        json kwargs "extra parameters to pass to the evalator functions"
    }

    Algorithm ||--o{ AlgorithmPass: "has"
```

## Related Issues
closes #13

## Additional Notes
[Add any additional context or notes that reviewers should know about.]

## Checklist
Please review and complete the following checklist before submitting
your pull request:

- [x] I have ensured that the pull request is of a manageable size,
allowing it to be reviewed within a single session.
- [x] I have reviewed my changes to ensure they are clear, concise, and
well-documented.
- [x] I have updated the documentation, if applicable.
- [x] I have added or updated test cases to cover my changes, if
applicable.
- [x] I have minimized the number of reviewers to include only those
essential for the review.
- [x] I have notified teammates in the review thread to build awareness.

## Checklist for Reviewers
Please review and complete the following checklist during the review
process:

- [ ] The code follows best practices and conventions.
- [ ] The changes implement the desired functionality or fix the
reported issue.
- [ ] The tests cover the new changes and pass successfully.
- [ ] Any potential edge cases or error scenarios have been considered.
  • Loading branch information
cbrinson-rise8 committed Sep 20, 2024
1 parent 30da916 commit 6cc3772
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1 +1 @@
DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
53 changes: 53 additions & 0 deletions alembic/versions/0c90faa0378f_create_algorithm_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""create algorithm tables
Revision ID: 0c90faa0378f
Revises: 6052c193a26a
Create Date: 2024-09-20 11:41:13.377954
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '0c90faa0378f'
down_revision: Union[str, None] = '6052c193a26a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('algorithm',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('is_default', sa.Boolean(), nullable=False),
sa.Column('label', sa.String(length=255), nullable=False),
sa.Column('description', sa.Text(), nullable=False),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('label')
)
op.create_index(op.f('ix_algorithm_is_default'), 'algorithm', ['is_default'], unique=False)
op.create_table('algorithm_pass',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('algorithm_id', sa.Integer(), nullable=False),
sa.Column('blocking_keys', sa.JSON(), nullable=False),
sa.Column('evaluators', sa.JSON(), nullable=False),
sa.Column('rule', sa.String(length=255), nullable=False),
sa.Column('cluster_ratio', sa.Float(), nullable=False),
sa.Column('kwargs', sa.JSON(), nullable=False),
sa.ForeignKeyConstraint(['algorithm_id'], ['algorithm.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_mpi_blocking_key_key'), 'mpi_blocking_key', ['key'], unique=False)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_mpi_blocking_key_key'), table_name='mpi_blocking_key')
op.drop_table('algorithm_pass')
op.drop_index(op.f('ix_algorithm_is_default'), table_name='algorithm')
op.drop_table('algorithm')
# ### end Alembic commands ###
52 changes: 48 additions & 4 deletions src/recordlinker/linkage/models.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
import uuid

from sqlalchemy import event
from sqlalchemy import ForeignKey
from sqlalchemy import JSON
from sqlalchemy import orm
from sqlalchemy import String
from sqlalchemy import Text


class Base(orm.DeclarativeBase):
pass


class Person(Base):
__tablename__ = "mpi_person"

id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
internal_id: orm.Mapped[uuid.UUID] = orm.mapped_column(default=uuid.uuid4)


class ExternalPerson(Base):
__tablename__ = "mpi_external_person"

Expand All @@ -25,15 +25,13 @@ class ExternalPerson(Base):
external_id: orm.Mapped[str] = orm.mapped_column(String(255))
source: orm.Mapped[str] = orm.mapped_column(String(255))


class Patient(Base):
__tablename__ = "mpi_patient"

id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
person_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_person.id"))
data: orm.Mapped[dict] = orm.mapped_column(JSON)


class BlockingKey(Base):
__tablename__ = "mpi_blocking_key"

Expand All @@ -47,3 +45,49 @@ class BlockingValue(Base):
patient_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_patient.id"))
blockingkey_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("mpi_blocking_key.id"))
value: orm.Mapped[str] = orm.mapped_column(String(50), index=True)

class Algorithm(Base):
__tablename__ = "algorithm"

id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
is_default: orm.Mapped[bool] = orm.mapped_column(default=False, index=True)
label: orm.Mapped[str] = orm.mapped_column(String(255), unique=True)
description: orm.Mapped[str] = orm.mapped_column(Text())

def check_only_one_default(mapping, connection, target):
"""
Check if there is already a default algorithm before inserting or updating.
If another default algorithm exists, an exception is raised to prevent the operation.
Parameters:
connection: The database connection being used for the operation.
target: The instance of the Algorithm class being inserted or updated.
Raises:
ValueError: If another algorithm is already marked as default.
"""

session = orm.Session.object_session(target)

if target.is_default:
# ruff linting rule E712 ignored on this line.
# ruff wants to enforce using the 'is' operator over '=='.
# However since we only want to compare the truth value of the SQL query result we need to use '=='.
existing = session.query(Algorithm).filter(Algorithm.is_default == True).first() # noqa: E712

if existing and existing.id != target.id:
raise ValueError("There can only be one default algorithm")

event.listen(Algorithm, 'before_insert', check_only_one_default)
event.listen(Algorithm, 'before_update', check_only_one_default)

class AlgorithmPass(Base):
__tablename__ = "algorithm_pass"

id: orm.Mapped[int] = orm.mapped_column(primary_key=True)
algorithm_id: orm.Mapped[int] = orm.mapped_column(ForeignKey("algorithm.id"))
blocking_keys: orm.Mapped[list[int]] = orm.mapped_column(JSON)
evaluators: orm.Mapped[list[str]] = orm.mapped_column(JSON)
rule: orm.Mapped[str] = orm.mapped_column(String(255))
cluster_ratio: orm.Mapped[float]
kwargs: orm.Mapped[dict] = orm.mapped_column(JSON)
77 changes: 77 additions & 0 deletions tests/unit/test_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import scoped_session

from recordlinker.linkage.models import Base, Algorithm, check_only_one_default

MOCK_SETTINGS = {"db_uri": "sqlite:///:memory:"}

# Create an in-memory SQLite database for testing
@pytest.fixture(scope="function")
def setup_database():
engine = create_engine(MOCK_SETTINGS["db_uri"]) # In-memory database
Session = scoped_session(sessionmaker(bind=engine))
Base.metadata.create_all(engine) # Create tables

yield Session # Provide the session object to tests

# Cleanup after tests
Base.metadata.drop_all(engine)
Session.remove()

def test_single_default_algorithm(setup_database):
"""
Tests that only one algorithm can be default in the Algorithm table
"""

session = setup_database()

# first algorithm is_default set to True
algo1 = Algorithm(label="Algorithm 1", is_default=True, description="First algorithm")
session.add(algo1)
session.commit()

# create another algorithm and try to set is_default as True
algo2 = Algorithm(label="Algorithm 2", is_default=True, description="Second algorithm")
session.add(algo2)

with pytest.raises(ValueError, match="There can only be one default algorithm"):
session.commit()

def test_set_default_when_none_exists(setup_database):
"""
Tests that you can update an algorithm to be the default if no other default exists
"""

session = setup_database()

# is_default set to false
algo1 = Algorithm(label="Algorithm 1", is_default=False, description="First algorithm")
session.add(algo1)
session.commit()

# try setting it as the default
algo1.is_default = True
session.add(algo1)

session.commit()

def test_update_existing_default(setup_database):
"""
Tests that updating the default algorithm do not raise ValueErrors
"""

session = setup_database()

# algorithm is_default set to True
algo1 = Algorithm(label="Algorithm 1", is_default=True, description="First algorithm")
session.add(algo1)
session.commit()

# update the same algorithm
algo1.description = "Updated algorithm"
session.add(algo1)

# should not raise any value errors
session.commit()

0 comments on commit 6cc3772

Please sign in to comment.