Skip to content

Commit

Permalink
new schema mpi fuctions (#30)
Browse files Browse the repository at this point in the history
## Description
Implementing the MPI functions, `get_block_data` and
`insert_matched_patient`, using the new schema.

## Related Issues
closes #10 

## Additional Notes

- A couple of large changes to the DB schema
- The BlockingKey table has been replaced with an enum. With the
direction that we're going, we'll want to lock these in place. Using a
DB to store them is fine, but I think that opens up the possibility that
a user could add another one (say directly through some SQL query and
skip the app), that would throw off all are existing BlockingValues. I
think it's safer to store this directly in code, to protect our users
from potentially making this mistake. Additionally, each one of these
values needs a way to transform a collection of Patient PII into a list
of Blocking Values specific to a key, so some code was going to be
necessary no matter what.
- The ExternalPerson table was deleted. With the current design some
fidelity is being lost here that doesn't seem ideal. Yes, we are storing
all the external person identifiers received by the documents, but once
a Person cluster grows beyond 2 Patients, we have no way to trace an
external_person_id back to the Patient it was originally attached to. By
moving this field over to the Patient table, we retain the ability to
track document external_person_id with patient_id.
- Patient.external_patient_id has been added. In the current schema,
Patient.id is either internal or external, just depends on if a Patient
resource id is present in the FHIR bundle. If want to continue to track
that external patient resource id that is sometimes present, we need a
column in the patient table to store that info.
- The python-dotenv dependency has been removed, `settings.db_uri` now
has the information that we used dotenv to load, so we don't need this
anymore
  • Loading branch information
ericbuckley committed Sep 24, 2024
1 parent 6cc3772 commit 0431771
Show file tree
Hide file tree
Showing 16 changed files with 751 additions and 92 deletions.
3 changes: 2 additions & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
DB_URI="postgresql+psycopg2://postgres:pw@localhost:5432/postgres"
TEST_DB_URI="sqlite:///:memory:"
2 changes: 2 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
## Additional Notes
[Add any additional context or notes that reviewers should know about.]

<--------------------- REMOVE THE LINES BELOW BEFORE MERGING --------------------->

## Checklist
Please review and complete the following checklist before submitting your pull request:

Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ temp/
*.tmp

# macOS
.DS_Store
# ignore all files ending in .DS_Store
**/.DS_Store

# Python-Virtual Environments
.venv
Expand Down
5 changes: 2 additions & 3 deletions alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

from alembic import context

import dotenv
ENV = dotenv.dotenv_values()

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand All @@ -16,7 +14,8 @@
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
config.set_main_option("sqlalchemy.url", ENV["DB_URI"])
from recordlinker.config import settings
config.set_main_option("sqlalchemy.url", settings.DB_URI)

# add your model's MetaData object here
# for 'autogenerate' support
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""external person id should be nullable
Revision ID: 64ed9566f189
Revises: bfbd015ca466
Create Date: 2024-09-18 20:22:07.510203
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '64ed9566f189'
down_revision: Union[str, None] = 'bfbd015ca466'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_mpi_blocking_value_value', table_name='mpi_blocking_value')
op.alter_column('mpi_patient', 'external_person_id',
existing_type=sa.VARCHAR(length=255),
nullable=True)
op.alter_column('mpi_patient', 'external_person_source',
existing_type=sa.VARCHAR(length=100),
nullable=True)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('mpi_patient', 'external_person_source',
existing_type=sa.VARCHAR(length=100),
nullable=False)
op.alter_column('mpi_patient', 'external_person_id',
existing_type=sa.VARCHAR(length=255),
nullable=False)
op.create_index('ix_mpi_blocking_value_value', 'mpi_blocking_value', ['value'], unique=False)
# ### end Alembic commands ###
57 changes: 57 additions & 0 deletions alembic/versions/ad18f1d41fad_convert_blockingkey_into_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""convert BlockingKey into enum
Revision ID: ad18f1d41fad
Revises: 6052c193a26a
Create Date: 2024-09-17 21:15:37.714595
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'ad18f1d41fad'
down_revision: Union[str, None] = '6052c193a26a'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('mpi_blocking_key')
op.drop_table('mpi_blocking_value')

op.create_table('mpi_blocking_value',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('patient_id', sa.Integer(), nullable=False),
sa.Column('blockingkey', sa.Integer(), nullable=False),
sa.Column('value', sa.String(length=50), nullable=False),
sa.ForeignKeyConstraint(['patient_id'], ['mpi_patient.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_blocking_value_patient_key_value', 'mpi_blocking_value', ['patient_id', 'blockingkey', 'value'], unique=False)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('mpi_blocking_value')

op.create_table('mpi_blocking_key',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('key', sa.String(length=50), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_table('mpi_blocking_value',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('patient_id', sa.Integer(), nullable=False),
sa.Column('blockingkey_id', sa.Integer(), nullable=False),
sa.Column('value', sa.String(length=50), nullable=False),
sa.ForeignKeyConstraint(['blockingkey_id'], ['mpi_blocking_key.id'], ),
sa.ForeignKeyConstraint(['patient_id'], ['mpi_patient.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_mpi_blocking_value_value'), 'mpi_blocking_value', ['value'], unique=False)
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""moving ExternalPerson to Patient table
Revision ID: bfbd015ca466
Revises: ad18f1d41fad
Create Date: 2024-09-18 20:10:30.193941
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'bfbd015ca466'
down_revision: Union[str, None] = 'ad18f1d41fad'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('mpi_external_person')
op.create_index(op.f('ix_mpi_blocking_value_value'), 'mpi_blocking_value', ['value'], unique=False)
op.add_column('mpi_patient', sa.Column('external_person_id', sa.String(length=255), nullable=False))
op.add_column('mpi_patient', sa.Column('external_person_source', sa.String(length=100), nullable=False))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('mpi_patient', 'external_person_source')
op.drop_column('mpi_patient', 'external_person_id')
op.drop_index(op.f('ix_mpi_blocking_value_value'), table_name='mpi_blocking_value')
op.create_table('mpi_external_person',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('person_id', sa.INTEGER(), nullable=False),
sa.Column('external_id', sa.VARCHAR(length=255), nullable=False),
sa.Column('source', sa.VARCHAR(length=255), nullable=False),
sa.ForeignKeyConstraint(['person_id'], ['mpi_person.id'], ),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###
Binary file removed assets/.DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ dependencies = [
"fastapi",
"pydantic",
"pydantic-settings",
"python-dateutil==2.9.0",
"sqlalchemy",
"fhirpathpy",
"rapidfuzz",
Expand All @@ -37,7 +38,6 @@ dev = [
"pyarrow",
"httpx",
"alembic",
"python-dotenv"
]
prod = [
# List any additional production-only dependencies here
Expand Down Expand Up @@ -65,6 +65,7 @@ select = ["E4", "E7", "E9", "F", "I", "D102", "D103", "D104", "D105", "D106"] #

[tool.ruff.lint.per-file-ignores]
"**/__init__.py" = ["D"]
"tests/*.py" = ["D102", "D103"] # Ignore the public docstring rules in test files

[tool.ruff.lint.isort]
# The following settings reduce the number of changes from reorder-python-imports
Expand Down
Binary file removed src/.DS_Store
Binary file not shown.
Binary file removed src/recordlinker/.DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions src/recordlinker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ class Settings(pydantic_settings.BaseSettings):
)

db_uri: str = pydantic.Field(description="The URI for the MPI database")
# FIXME: A separate URI for testing is temporary, this is necessary right now because
# the old schema only works with postgresql. Once the old schema is removed we can consolidate
test_db_uri: str = pydantic.Field(description="The URI for the MPI database to run tests against")
connection_pool_size: typing.Optional[int] = pydantic.Field(
description="The number of MPI database connections in the connection pool",
default=5,
Expand Down
Loading

0 comments on commit 0431771

Please sign in to comment.