Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow masking without primary keys #5575

Merged
merged 30 commits into from
Jan 10, 2025
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8bd2f74
Allow masking without primary keys
galvana Dec 9, 2024
cadcdb7
Updating tests
galvana Dec 9, 2024
de3ce24
Separating overlapping keys in update value map
galvana Dec 9, 2024
93974b8
Fixing data type
galvana Dec 9, 2024
f274ae0
Sorting update map keys
galvana Dec 9, 2024
357b6ec
Removing primary keys from sample and test datasets
galvana Dec 9, 2024
fc1aacc
Simplifying generate_update_stmt and fixing tests
galvana Dec 9, 2024
0e11551
More cleanup
galvana Dec 9, 2024
6a10d87
Misc fixes
galvana Dec 9, 2024
001b8ec
Renaming identity_or_reference_fields_paths to incoming_field_paths
galvana Dec 9, 2024
f31fd3a
Merge branch 'main' into LA-95-masking-without-primary-keys
galvana Dec 9, 2024
8816be7
Re-adding continue on error
galvana Dec 9, 2024
cae8493
Adding individual timeouts to tests
galvana Dec 10, 2024
bb7714a
Fixing datasets
galvana Dec 10, 2024
0d43401
Fixing some tests
galvana Dec 10, 2024
b13632b
Fixing MongoDB dataset
galvana Dec 10, 2024
b0ef57d
Re-adding primary key to mongo_test.customer_details
galvana Dec 10, 2024
77a5770
Splitting out query configs and tests
galvana Dec 10, 2024
5d26b2f
Splitting out tests
galvana Dec 10, 2024
fb7e566
Merge branch 'split-query-config-files' into LA-95-masking-without-pr…
galvana Dec 10, 2024
647586f
Reverting most of the removal of primary keys + misc files
galvana Dec 10, 2024
7600ab4
Removing primary key requirement for BigQuery erasures
galvana Dec 10, 2024
dd8a3ad
Setting requires_primary_keys for select connectors + updating tests
galvana Dec 11, 2024
3c3c63c
Revert setting requires_primary_keys to False for SaaS connectors
galvana Dec 11, 2024
74f6b22
Merge branch 'split-query-config-files' into LA-95-masking-without-pr…
galvana Dec 19, 2024
191910d
Reverting SaaS dataset changes
galvana Jan 6, 2025
a78cbec
Reverting SaaS dataset changes
galvana Jan 6, 2025
bf03464
Fixing tests
galvana Jan 6, 2025
8d552b0
Adding tests
galvana Jan 7, 2025
dec9b21
Removing continue-on-error
galvana Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Splitting out query configs and tests
galvana committed Dec 10, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 77a5770a897c5d647308c2449bb46c64bbd5e855
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ def generate_update_stmt(
where_clauses: Dict[str, Any] = filter_nonempty_values(
{
field_path.string_path: field.cast(row[field_path.string_path])
for field_path, field in self.incoming_field_paths.items()
for field_path, field in self.primary_key_field_paths.items()
}
)

17 changes: 13 additions & 4 deletions src/fides/api/service/connectors/query_configs/query_config.py
Original file line number Diff line number Diff line change
@@ -101,7 +101,7 @@ def primary_key_field_paths(self) -> Dict[FieldPath, Field]:
}

@property
def incoming_field_paths(self) -> Dict[FieldPath, Field]:
def reference_field_paths(self) -> Dict[FieldPath, Field]:
"""Mapping of FieldPaths to Fields that have incoming identity or dataset references"""
return {
field_path: field
@@ -447,10 +447,19 @@ def generate_update_stmt(
) -> Optional[T]:
"""Returns an update statement in generic SQL-ish dialect."""
update_value_map: Dict[str, Any] = self.update_value_map(row, policy, request)

non_empty_primary_key_fields: Dict[str, Field] = filter_nonempty_values(
{
fpath.string_path: fld.cast(row[fpath.string_path])
for fpath, fld in self.primary_key_field_paths.items()
if fpath.string_path in row
}
)

non_empty_reference_fields: Dict[str, Field] = filter_nonempty_values(
{
fpath.string_path: fld.cast(row[fpath.string_path])
for fpath, fld in self.incoming_field_paths.items()
for fpath, fld in self.reference_field_paths.items()
if fpath.string_path in row
}
)
@@ -463,10 +472,10 @@ def generate_update_stmt(

update_clauses = self.get_update_clauses(
{k: f"masked_{k}" for k in update_value_map},
non_empty_reference_fields,
non_empty_primary_key_fields or non_empty_reference_fields,
)
where_clauses = self.format_key_map_for_update_stmt(
{k: k for k in non_empty_reference_fields}
{k: k for k in non_empty_primary_key_fields or non_empty_reference_fields}
)

valid = len(where_clauses) > 0 and len(update_clauses) > 0
129 changes: 129 additions & 0 deletions tests/ops/service/connectors/test_dynamodb_query_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from datetime import datetime, timezone

import pytest
from boto3.dynamodb.types import TypeDeserializer
from fideslang.models import Dataset

from fides.api.graph.config import CollectionAddress
from fides.api.graph.graph import DatasetGraph
from fides.api.graph.traversal import Traversal
from fides.api.models.datasetconfig import convert_dataset_to_graph
from fides.api.models.privacy_request import PrivacyRequest
from fides.api.service.connectors.query_configs.dynamodb_query_config import (
DynamoDBQueryConfig,
)

privacy_request = PrivacyRequest(id="234544")


class TestDynamoDBQueryConfig:
@pytest.fixture(scope="function")
def identity(self):
identity = {"email": "[email protected]"}
return identity

@pytest.fixture(scope="function")
def dataset_graph(self, integration_dynamodb_config, example_datasets):
dataset = Dataset(**example_datasets[11])
dataset_graph = convert_dataset_to_graph(
dataset, integration_dynamodb_config.key
)

return DatasetGraph(*[dataset_graph])

@pytest.fixture(scope="function")
def traversal(self, identity, dataset_graph):
dynamo_traversal = Traversal(dataset_graph, identity)
return dynamo_traversal

@pytest.fixture(scope="function")
def customer_node(self, traversal):
return traversal.traversal_node_dict[
CollectionAddress("dynamodb_example_test_dataset", "customer")
].to_mock_execution_node()

@pytest.fixture(scope="function")
def customer_identifier_node(self, traversal):
return traversal.traversal_node_dict[
CollectionAddress("dynamodb_example_test_dataset", "customer_identifier")
].to_mock_execution_node()

@pytest.fixture(scope="function")
def customer_row(self):
row = {
"customer_email": {"S": "[email protected]"},
"name": {"S": "John Customer"},
"address_id": {"L": [{"S": "1"}, {"S": "2"}]},
"personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
"id": {"S": "1"},
}
return row

@pytest.fixture(scope="function")
def deserialized_customer_row(self, customer_row):
deserialized_customer_row = {}
deserializer = TypeDeserializer()
for key, value in customer_row.items():
deserialized_customer_row[key] = deserializer.deserialize(value)
return deserialized_customer_row

@pytest.fixture(scope="function")
def customer_identifier_row(self):
row = {
"customer_id": {"S": "[email protected]"},
"email": {"S": "[email protected]"},
"name": {"S": "Customer 1"},
"created": {"S": datetime.now(timezone.utc).isoformat()},
}
return row

@pytest.fixture(scope="function")
def deserialized_customer_identifier_row(self, customer_identifier_row):
deserialized_customer_identifier_row = {}
deserializer = TypeDeserializer()
for key, value in customer_identifier_row.items():
deserialized_customer_identifier_row[key] = deserializer.deserialize(value)
return deserialized_customer_identifier_row

def test_get_query_param_formatting_single_key(
self,
resources_dict,
customer_node,
) -> None:
input_data = {
"fidesops_grouped_inputs": [],
"email": ["[email protected]"],
}
attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
item = query_config.generate_query(
input_data=input_data, policy=resources_dict["policy"]
)
assert item["ExpressionAttributeValues"] == {
":value": {"S": "[email protected]"}
}
assert item["KeyConditionExpression"] == "email = :value"

def test_put_query_param_formatting_single_key(
self,
erasure_policy,
customer_node,
deserialized_customer_row,
) -> None:
input_data = {
"fidesops_grouped_inputs": [],
"email": ["[email protected]"],
}
attribute_definitions = [{"AttributeName": "email", "AttributeType": "S"}]
query_config = DynamoDBQueryConfig(customer_node, attribute_definitions)
update_item = query_config.generate_update_stmt(
deserialized_customer_row, erasure_policy, privacy_request
)

assert update_item == {
"customer_email": {"S": "[email protected]"},
"name": {"NULL": True},
"address_id": {"L": [{"S": "1"}, {"S": "2"}]},
"personal_info": {"M": {"gender": {"S": "male"}, "age": {"S": "99"}}},
"id": {"S": "1"},
}
Loading

Unchanged files with check annotations Beta

expect(cmpApi.getGppString()).toEqual("DBABLA~BAAAAAAAAWA.QA");
});
it.skip("can set some to provided", () => {

Check warning on line 173 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 173 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 173 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const notices = [
mockPrivacyNotice({
expect(cmpApi.getGppString()).toEqual("DBABLA~BVAAAAAAAWA.QA");
});
it.skip("can set all to provided", () => {

Check warning on line 222 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 222 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 222 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const notices = [
mockPrivacyNotice({
expect(cmpApi.getGppString()).toEqual("DBABLA~BAAAAAAAAWA.QA");
});
it.skip("can set fields when there is a partial consent object in cookie", () => {

Check warning on line 417 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 417 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 417 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const cookie = mockFidesCookie({
consent: { data_sales_and_sharing: true },
expect(cmpApi.getGppString()).toEqual("DBABLA~BAAoAAAAAWA.QA");
});
it.skip("can set all fields to not opted out for consent object in cookie", () => {

Check warning on line 456 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 456 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 456 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const cookie = mockFidesCookie({
consent: {
expect(cmpApi.getGppString()).toEqual("DBABLA~BAAqqqqqqWA.QA");
});
it.skip("can set all fields to opted out for consent object in cookie", () => {

Check warning on line 507 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 507 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 507 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const cookie = mockFidesCookie({
consent: {
expect(cmpApi.getGppString()).toEqual("DBABLA~BAAVVVVVVWA.QA");
});
it.skip("can use US gpp fields when gpp is set to national", () => {

Check warning on line 558 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 558 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 558 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const cookie = mockFidesCookie({
consent: {
expect(cmpApi.getSection("usny")).toBe(null);
});
it.skip("can use US gpp fields when gpp is set to all", () => {

Check warning on line 685 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, privacy-center)

Skipped test

Check warning on line 685 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, fides-js)

Skipped test

Check warning on line 685 in clients/fides-js/__tests__/lib/gpp/us-notices.ts

GitHub Actions / Clients-Unit (20.x, admin-ui)

Skipped test
const cmpApi = new CmpApi(1, 1);
const cookie = mockFidesCookie({
consent: {