diff --git a/CHANGELOG.md b/CHANGELOG.md index 705562dd..a379255b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,25 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fideslang/compare/1.1.0...main) +### Added + +* The `DataFlow` resource model defines a resource with which a `System` resource may communicate [#85](https://github.com/ethyca/fideslang/pull/85) +* `PrivacyDeclaration`s may define `egress` and `ingress`, to contextualize communications with other resources [#85](https://github.com/ethyca/fideslang/pull/85) + +### Deprecated + +* The `dataset_references` field of `PrivacyDeclaration` resources [#85](https://github.com/ethyca/fideslang/pull/85) +* The `system_dependencies` field of `System` resources [#85](https://github.com/ethyca/fideslang/pull/85) + +### Developer Experience + +* The `DataFlow` resource model is exposed when importing `fideslang` [#85](https://github.com/ethyca/fideslang/pull/85) + ### Fixed * Fixed broken links in docs [#74](https://github.com/ethyca/fideslang/pull/74) * Pydantic 1.10.0 was causing issues so specified the pydantic version needs to be less than 1.10.0 [#79](https://github.com/ethyca/fideslang/pull/79) +* Resolved a circular import in `default_taxonomy.py` [#85](https://github.com/ethyca/fideslang/pull/85) ## [1.2.0](https://github.com/ethyca/fideslang/compare/1.1.0...1.2.0) diff --git a/demo_resources/demo_system.yml b/demo_resources/demo_system.yml index d4ea7df8..72448167 100644 --- a/demo_resources/demo_system.yml +++ b/demo_resources/demo_system.yml @@ -12,6 +12,9 @@ system: is_required: True progress: Complete link: https://example.org/analytics_system_data_protection_impact_assessment + ingress: + - fides_key: demo_users_dataset + type: dataset privacy_declarations: - name: Analyze customer behaviour for improvements. data_categories: @@ -21,7 +24,7 @@ system: data_subjects: - customer data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - dataset_references: + ingress: - demo_users_dataset - fides_key: demo_marketing_system diff --git a/mkdocs/docs/resources/system.md b/mkdocs/docs/resources/system.md index 90e6c1b6..5c0f28ee 100644 --- a/mkdocs/docs/resources/system.md +++ b/mkdocs/docs/resources/system.md @@ -54,6 +54,14 @@ The array of properties that declare the requirement for and information surroun Information will be exported as part of the data map or Record of Processing Activites (RoPA) +**egress**     [array]      + +The resources to which the System sends data. + +**ingress**     [array]      + +The resources from which the System receives data. + **privacy_declarations**     [array]      The array of declarations describing the types of data in your system. This is a list of the privcy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`) for each of your systems. @@ -88,6 +96,16 @@ system: is_required: True progress: Complete link: https://example.org/analytics_system_data_protection_impact_assessment + egress: + - fides_key: another_demo_system + type: system + data_categories: + - user.contact + ingress: + - fides_key: yet_another_demo_system + type: system + data_categories: + - user.device.cookie_id privacy_declarations: - name: Analyze customer behaviour for improvements. data_categories: @@ -97,8 +115,10 @@ system: data_subjects: - customer data_qualifier: identified_data - dataset_references: - - demo_users_dataset + egress: + - another_demo_system + ingress: + - yet_another_demo_system ``` **Demo manifest file:** `/fides/fidesctl/demo_resources/demo_system.yml` @@ -121,6 +141,20 @@ system: "email": "controller@acmeinc.com", "phone": "+1 555 555 5555" }, + "egress": [ + { + "fides_key": "another_demo_system", + "type": "system", + "data_categories": ["user.contact"] + } + ], + "ingress": [ + { + "fides_key": "yet_another_demo_system", + "type": "system", + "data_categories": ["user.device.cookie_id"] + } + ], "privacy_declarations": [ { "name": "Analyze customer behaviour for improvements.", @@ -133,9 +167,8 @@ system: "customer" ], "data_qualifier": "identified_data", - "dataset_references": [ - "demo_users_dataset" - ] + "egress": ["another_demo_system"], + "ingress": ["yet_another_demo_system"] } ] } diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py index 55e04bea..441b8822 100644 --- a/src/fideslang/__init__.py +++ b/src/fideslang/__init__.py @@ -4,9 +4,13 @@ from typing import Dict, Type, Union +from .default_fixtures import COUNTRY_CODES +from .default_taxonomy import DEFAULT_TAXONOMY + # Export the Models from .models import ( DataCategory, + DataFlow, DataQualifier, Dataset, DatasetField, @@ -25,9 +29,6 @@ Taxonomy, ) -from .default_fixtures import COUNTRY_CODES -from .default_taxonomy import DEFAULT_TAXONOMY - FidesModelType = Union[Type[FidesModel], Type[Evaluation]] model_map: Dict[str, FidesModelType] = { "data_category": DataCategory, diff --git a/src/fideslang/default_taxonomy.py b/src/fideslang/default_taxonomy.py index 3e1795fa..9cb09358 100644 --- a/src/fideslang/default_taxonomy.py +++ b/src/fideslang/default_taxonomy.py @@ -1,6 +1,6 @@ """This module contains the the default resources that Fideslang ships with.""" -from fideslang import ( +from .models import ( DataCategory, DataQualifier, DataSubject, diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 46a061c0..ea96ec12 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -5,6 +5,7 @@ from enum import Enum from typing import Dict, List, Optional +from warnings import warn from pydantic import AnyUrl, BaseModel, Field, HttpUrl, root_validator, validator @@ -645,6 +646,27 @@ class PrivacyDeclaration(BaseModel): dataset_references: Optional[List[FidesKey]] = Field( description="Referenced Dataset fides keys used by the system.", ) + egress: Optional[List[FidesKey]] = Field( + description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + ) + ingress: Optional[List[FidesKey]] = Field( + description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + ) + + @validator("dataset_references") + @classmethod + def deprecate_dataset_references(cls, value: List[FidesKey]) -> List[FidesKey]: + """ + Warn that the `dataset_references` field is deprecated, if set. + """ + + if value is not None: + warn( + "The dataset_references field is deprecated, and will be removed in a future version of fideslang. Use the 'egress' and 'ingress` fields instead.", + DeprecationWarning, + ) + + return value class SystemMetadata(BaseModel): @@ -665,6 +687,51 @@ class SystemMetadata(BaseModel): ) +class FlowableResources(str, Enum): + """ + The resource types with which DataFlows can be created. + """ + + DATASET = "dataset" + SYSTEM = "system" + USER = "user" + + +class DataFlow(BaseModel): + """ + The DataFlow resource model. + + Describes a resource model with which a given System resource communicates. + """ + + fides_key: FidesKey = Field( + ..., + description="Identifies the System or Dataset resource with which the communication occurs. May also be 'user', to represent communication with the user(s) of a System.", + ) + type: FlowableResources = Field( + ..., + description="Specifies the resource model class for which the `fides_key` applies. May be any of 'dataset', 'system', or 'user'.", + ) + data_categories: Optional[List[FidesKey]] = Field( + description="An array of data categories describing the data in transit.", + ) + + @root_validator() + @classmethod + def user_special_case(cls, values: Dict) -> Dict: + """ + If either the `fides_key` or the `type` are set to "user", + then the other must also be set to "user". + """ + + if values["fides_key"] == "user" or values["type"] == "user": + assert ( + values["fides_key"] == "user" and values["type"] == "user" + ), "The 'user' fides_key is required for, and requires, the type 'user'" + + return values + + class System(FidesModel): """ The System resource model. @@ -688,6 +755,12 @@ class System(FidesModel): default=DataResponsibilityTitle.CONTROLLER, description=DataResponsibilityTitle.__doc__, ) + egress: Optional[List[DataFlow]] = Field( + description="The resources to which the System sends data." + ) + ingress: Optional[List[DataFlow]] = Field( + description="The resources from which the System receives data." + ) privacy_declarations: List[PrivacyDeclaration] = Field( description=PrivacyDeclaration.__doc__, ) @@ -719,6 +792,49 @@ class System(FidesModel): _check_valid_country_code: classmethod = country_code_validator + @validator("privacy_declarations", each_item=True) + @classmethod + def privacy_declarations_reference_data_flows( + cls, + value: PrivacyDeclaration, + values: Dict, + ) -> PrivacyDeclaration: + """ + Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must + only reference the `fides_key`s of defined `DataFlow`s in said field(s). + """ + + for direction in ["egress", "ingress"]: + fides_keys = getattr(value, direction, None) + if fides_keys is not None: + data_flows = values[direction] + system = values["fides_key"] + assert ( + data_flows is not None and len(data_flows) > 0 + ), f"PrivacyDeclaration '{value.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." + + for fides_key in fides_keys: + assert fides_key in [ + data_flow.fides_key for data_flow in data_flows + ], f"PrivacyDeclaration '{value.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." + + return value + + @validator("system_dependencies") + @classmethod + def deprecate_system_dependencies(cls, value: List[FidesKey]) -> List[FidesKey]: + """ + Warn that the `system_dependencies` field is deprecated, if set. + """ + + if value is not None: + warn( + "The system_dependencies field is deprecated, and will be removed in a future version of fideslang. Use the 'egress' and 'ingress` fields instead.", + DeprecationWarning, + ) + + return value + class Config: "Class for the System config" use_enum_values = True diff --git a/tests/conftest.py b/tests/conftest.py index 00aa3396..35b98972 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -118,10 +118,8 @@ def resources_dict(): data_use="provide", data_subjects=[], data_qualifier="aggregated_data", - dataset_references=[], ) ], - system_dependencies=[], ), } yield resources_dict diff --git a/tests/data/failing_dataset_collection_taxonomy.yml b/tests/data/failing_dataset_collection_taxonomy.yml index 62e23284..6b0ae18e 100644 --- a/tests/data/failing_dataset_collection_taxonomy.yml +++ b/tests/data/failing_dataset_collection_taxonomy.yml @@ -20,6 +20,9 @@ system: name: Customer Data Sharing System description: Share data about our users with third-parties for advertising system_type: Service + ingress: + - fides_key: test_db_dataset_failing_dataset + type: dataset privacy_declarations: - name: Share Political Opinions data_categories: @@ -28,9 +31,8 @@ system: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer - dataset_references: + ingress: - test_db_dataset_failing_dataset - system_dependencies: [] policy: - fides_key: primary_privacy_policy diff --git a/tests/data/failing_dataset_field_taxonomy.yml b/tests/data/failing_dataset_field_taxonomy.yml index fa97818e..9891efcc 100644 --- a/tests/data/failing_dataset_field_taxonomy.yml +++ b/tests/data/failing_dataset_field_taxonomy.yml @@ -21,6 +21,9 @@ system: name: Customer Data Sharing System description: Share data about our users with third-parties for advertising system_type: Service + ingress: + - fides_key: test_db_dataset_failing_dataset + type: dataset privacy_declarations: - name: Share Political Opinions data_categories: @@ -29,9 +32,8 @@ system: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer - dataset_references: + ingress: - test_db_dataset_failing_dataset - system_dependencies: [] policy: - fides_key: primary_privacy_policy diff --git a/tests/data/failing_dataset_taxonomy.yml b/tests/data/failing_dataset_taxonomy.yml index 284d88a3..61e2fb92 100644 --- a/tests/data/failing_dataset_taxonomy.yml +++ b/tests/data/failing_dataset_taxonomy.yml @@ -20,6 +20,9 @@ system: name: Customer Data Sharing System description: Share data about our users with third-parties for advertising system_type: Service + ingress: + - fides_key: test_db_dataset_failing_dataset + type: dataset privacy_declarations: - name: Share Political Opinions data_categories: @@ -28,9 +31,8 @@ system: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer - dataset_references: + ingress: - test_db_dataset_failing_dataset - system_dependencies: [] policy: - fides_key: primary_privacy_policy diff --git a/tests/data/failing_declaration_taxonomy.yml b/tests/data/failing_declaration_taxonomy.yml index 8ef56afa..53b1a42c 100644 --- a/tests/data/failing_declaration_taxonomy.yml +++ b/tests/data/failing_declaration_taxonomy.yml @@ -11,7 +11,6 @@ system: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer - system_dependencies: [] policy: - fides_key: primary_privacy_policy diff --git a/tests/data/failing_nested_dataset.yml b/tests/data/failing_nested_dataset.yml index 7f3f992c..a0e005c2 100644 --- a/tests/data/failing_nested_dataset.yml +++ b/tests/data/failing_nested_dataset.yml @@ -24,6 +24,9 @@ system: name: Client Usage Analytics description: Use aggregated and anonymous data to measure usage system_type: Service + ingress: + - fides_key: test_failing_nested_dataset_field + type: dataset privacy_declarations: - name: Mesaure usage of users data_categories: @@ -32,7 +35,7 @@ system: data_subjects: - customer data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - dataset_references: + ingress: - test_failing_nested_dataset_field policy: diff --git a/tests/data/passing_declaration_taxonomy.yml b/tests/data/passing_declaration_taxonomy.yml index 8ecb63ec..d2ede112 100644 --- a/tests/data/passing_declaration_taxonomy.yml +++ b/tests/data/passing_declaration_taxonomy.yml @@ -11,7 +11,6 @@ system: data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified data_subjects: - customer - system_dependencies: [] policy: - fides_key: primary_privacy_policy diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 7e4a0131..6e627019 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -1,32 +1,238 @@ -import pytest +from pytest import deprecated_call, mark, raises -import fideslang as models +from fideslang import DataFlow, PrivacyDeclaration, System + +pytestmark = mark.unit + + +class TestDataFlow: + def test_dataflow_valid(self) -> None: + assert DataFlow( + fides_key="test_system_1", + type="system", + data_categories=[], + ) + + def test_dataflow_user_fides_key_no_user_type(self) -> None: + with raises(ValueError): + assert DataFlow(fides_key="user", type="system") + + def test_dataflow_user_type_no_user_fides_key(self) -> None: + with raises(ValueError): + assert DataFlow(fides_key="test_system_1", type="user") + + +class TestPrivacyDeclaration: + def test_privacydeclaration_valid(self) -> None: + assert PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + egress=[], + ingress=[], + name="declaration-name", + ) + + def test_dataset_references_deprecation(self) -> None: + with deprecated_call(match="dataset_references"): + assert PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + dataset_references=[], + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) -@pytest.mark.unit class TestSystem: def test_system_valid(self) -> None: - system = ( - models.System( + assert System( + description="Test Policy", + egress=[ + DataFlow( + fides_key="test_system_2", + type="system", + data_categories=[], + ) + ], + fides_key="test_system", + ingress=[ + DataFlow( + fides_key="test_system_3", + type="system", + data_categories=[], + ) + ], + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key=1, + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + + def test_system_dependencies_deprecation(self) -> None: + with deprecated_call(match="system_dependencies"): + assert System( + description="Test Policy", + egress=[ + DataFlow( + fides_key="test_system_2", + type="system", + data_categories=[], + ) + ], + fides_key="test_system", + ingress=[ + DataFlow( + fides_key="test_system_3", + type="system", + data_categories=[], + ) + ], + meta={"some": "meta stuff"}, + name="Test System", organization_fides_key=1, + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) + ], registry_id=1, - meta={"some": "meta stuff"}, - fides_key="test_system", + system_dependencies=[], system_type="SYSTEM", - name="Test System", tags=["some", "tags"], + ) + + def test_system_valid_no_egress_or_ingress(self) -> None: + assert System( + description="Test Policy", + fides_key="test_system", + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key=1, + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + + def test_system_no_egress(self) -> None: + with raises(ValueError): + assert System( description="Test Policy", + fides_key="test_system", + ingress=[ + DataFlow( + fides_key="test_system_3", + type="system", + data_categories=[], + ) + ], + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key=1, privacy_declarations=[ - models.PrivacyDeclaration( - name="declaration-name", + PrivacyDeclaration( data_categories=[], - data_use="provide", + data_qualifier="aggregated_data", data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + + def test_system_no_ingress(self) -> None: + with raises(ValueError): + assert System( + description="Test Policy", + egress=[ + DataFlow( + fides_key="test_system_2", + type="system", + data_categories=[], + ) + ], + fides_key="test_system", + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key=1, + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], data_qualifier="aggregated_data", - dataset_references=[], + data_subjects=[], + data_use="provide", + egress=["test_system_2"], + ingress=["test_system_3"], + name="declaration-name", ) ], - system_dependencies=[], - ), + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], + ) + + def test_system_user_ingress_valid(self) -> None: + assert System( + description="Test Policy", + fides_key="test_system", + ingress=[ + DataFlow( + fides_key="user", + type="user", + data_categories=[], + ) + ], + meta={"some": "meta stuff"}, + name="Test System", + organization_fides_key=1, + privacy_declarations=[ + PrivacyDeclaration( + data_categories=[], + data_qualifier="aggregated_data", + data_subjects=[], + data_use="provide", + ingress=["user"], + name="declaration-name", + ) + ], + registry_id=1, + system_type="SYSTEM", + tags=["some", "tags"], ) - assert system