diff --git a/.fides/db_dataset.yml b/.fides/db_dataset.yml index 43ea3a0e05..68ec99159d 100644 --- a/.fides/db_dataset.yml +++ b/.fides/db_dataset.yml @@ -2340,3 +2340,17 @@ dataset: data_categories: [system.operations] - name: updated_at data_categories: [system.operations] + - name: dbcache + fields: + - name: id + data_categories: [system.operations] + - name: namespace + data_categories: [system.operations] + - name: cache_key + data_categories: [system.operations] + - name: cache_value + data_categories: [system.operations] + - name: created_at + data_categories: [system.operations] + - name: updated_at + data_categories: [system.operations] diff --git a/CHANGELOG.md b/CHANGELOG.md index d1e8deb8f8..10aee4d193 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The types of changes are: - Added event based communication example to the Cookie House sample app [#5597](https://github.com/ethyca/fides/pull/5597) - Added new erasure tests for BigQuery Enterprise [#5554](https://github.com/ethyca/fides/pull/5554) - Added new `has_next` parameter for the `link` pagination strategy [#5596](https://github.com/ethyca/fides/pull/5596) +- Added a `DBCache` model for database-backed caching [#5613](https://github.com/ethyca/fides/pull/5613) ### Changed - Adjusted Ant's Select component colors and icon [#5594](https://github.com/ethyca/fides/pull/5594) diff --git a/src/fides/api/alembic/migrations/versions/e5ec30dfcd87_add_dbcache_table.py b/src/fides/api/alembic/migrations/versions/e5ec30dfcd87_add_dbcache_table.py new file mode 100644 index 0000000000..2decf67000 --- /dev/null +++ b/src/fides/api/alembic/migrations/versions/e5ec30dfcd87_add_dbcache_table.py @@ -0,0 +1,63 @@ +"""Add DBCache table + +Revision ID: e5ec30dfcd87 +Revises: c90d46f6d3f2 +Create Date: 2024-12-17 16:48:04.006190 + +""" + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "e5ec30dfcd87" +down_revision = "c90d46f6d3f2" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "dbcache", + sa.Column("id", sa.String(length=255), nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=True, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=True, + ), + sa.Column("namespace", sa.String(), nullable=False), + sa.Column("cache_key", sa.String(), nullable=False), + sa.Column("cache_value", postgresql.BYTEA(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_dbcache_id"), "dbcache", ["id"], unique=False) + op.create_index( + op.f("ix_dbcache_namespace"), "dbcache", ["namespace"], unique=False + ) + # Index over (namespace, cache_key), with unique set to True to ensure + # unique cache keys within a namespace + op.create_index( + op.f("ix_dbcache_namespace_cache_key"), + "dbcache", + ["namespace", "cache_key"], + unique=True, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_dbcache_namespace"), table_name="dbcache") + op.drop_index(op.f("ix_dbcache_id"), table_name="dbcache") + op.drop_index(op.f("ix_dbcache_namespace_cache_key"), table_name="dbcache") + op.drop_table("dbcache") + # ### end Alembic commands ### diff --git a/src/fides/api/db/base.py b/src/fides/api/db/base.py index 1ebc8a90fd..9743c8dc29 100644 --- a/src/fides/api/db/base.py +++ b/src/fides/api/db/base.py @@ -12,6 +12,7 @@ from fides.api.models.custom_connector_template import CustomConnectorTemplate from fides.api.models.custom_report import CustomReport from fides.api.models.datasetconfig import DatasetConfig +from fides.api.models.db_cache import DBCache from fides.api.models.detection_discovery import MonitorConfig, StagedResource from fides.api.models.experience_notices import ExperienceNotices from fides.api.models.fides_cloud import FidesCloud diff --git a/src/fides/api/models/db_cache.py b/src/fides/api/models/db_cache.py new file mode 100644 index 0000000000..4102a0d942 --- /dev/null +++ b/src/fides/api/models/db_cache.py @@ -0,0 +1,89 @@ +from enum import Enum +from typing import ByteString, Optional + +from sqlalchemy import Column, Index, String +from sqlalchemy.dialects.postgresql import BYTEA +from sqlalchemy.orm import Session + +from fides.api.db.base_class import Base + + +class DBCacheNamespace(Enum): + """Namespaces for the DBCache""" + + LIST_PRIVACY_EXPERIENCE = "list-privacy-experience" + + +class DBCache(Base): + """ + Cache table for storing arbitrary data, useful when in-memory caches aren't enough. + For example if cache contents need to be persisted across server restarts, + or if the cache needs to be shared across different Fides instances. + + Warning: Cache contents are NOT encrypted, this shouldn't be used for storing + any sort of personal data or sensitive information. + """ + + namespace = Column( + String, nullable=False, index=True + ) # Add a namespace since the same cache key could technically be used for different contexts + cache_key = Column(String, nullable=False) + cache_value = Column(BYTEA, nullable=False) + + __table_args__ = ( + Index("ix_dbcache_namespace_cache_key", "namespace", "cache_key"), + ) + + @classmethod + def get_cache_entry( + cls, + db: Session, + namespace: DBCacheNamespace, + cache_key: str, + ) -> Optional["DBCache"]: + """ + Retrieves the cache entry for the given cache_key + """ + return ( + db.query(cls) + .filter(cls.namespace == namespace.value, cls.cache_key == cache_key) + .first() + ) + + @classmethod + def get_cache_value( + cls, + db: Session, + namespace: DBCacheNamespace, + cache_key: str, + ) -> Optional[ByteString]: + """ + Retrieves the cache value for the given cache_key + """ + cache_entry = cls.get_cache_entry(db, namespace, cache_key) + + return cache_entry.cache_value if cache_entry else None + + @classmethod + def set_cache_value( + cls, + db: Session, + namespace: DBCacheNamespace, + cache_key: str, + cache_value: ByteString, + ) -> "DBCache": + """ + Upserts the cache value for the given cache_key + """ + db_cache_entry = cls.get_cache_entry(db, namespace, cache_key) + if db_cache_entry: + db_cache_entry.cache_value = cache_value + else: + db_cache_entry = cls( + namespace=namespace.value, cache_key=cache_key, cache_value=cache_value + ) + + db.add(db_cache_entry) + db.commit() + db.refresh(db_cache_entry) + return db_cache_entry diff --git a/tests/ops/models/test_dbcache.py b/tests/ops/models/test_dbcache.py new file mode 100644 index 0000000000..a1c48aec42 --- /dev/null +++ b/tests/ops/models/test_dbcache.py @@ -0,0 +1,53 @@ +from fides.api.models.db_cache import DBCache, DBCacheNamespace + + +class TestDBCacheModel: + def test_get_nonexisting_entry(self, db): + cache_value = DBCache.get_cache_value( + db, DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, "nonexisting" + ) + assert cache_value is None + + def test_set_and_get_cache_value(self, db): + cache_value = DBCache.set_cache_value( + db, + DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, + "some-key", + '{"some": "value", "another": "value"}'.encode(), + ) + assert ( + cache_value.cache_value.decode() == '{"some": "value", "another": "value"}' + ) + + cache_value = DBCache.get_cache_value( + db, DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, "some-key" + ) + assert cache_value.decode() == '{"some": "value", "another": "value"}' + + def test_update_cache_value(self, db): + # First we set a value + cache_value = DBCache.set_cache_value( + db, + DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, + "some-key", + "value 1".encode(), + ) + assert cache_value.cache_value.decode() == "value 1" + assert ( + DBCache.get_cache_value( + db, DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, "some-key" + ).decode() + == "value 1" + ) + + # Update the cache value + cache_value = DBCache.set_cache_value( + db, DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, "some-key", "value 2".encode() + ) + assert cache_value.cache_value.decode() == "value 2" + + # Check the value was actually updated + updated_value = DBCache.get_cache_value( + db, DBCacheNamespace.LIST_PRIVACY_EXPERIENCE, "some-key" + ) + assert updated_value.decode() == "value 2"