From 9fa0ce539e38b5983ab319e8280bcd664f8b0fa8 Mon Sep 17 00:00:00 2001 From: Steve Murphy Date: Mon, 23 May 2022 16:32:18 -0400 Subject: [PATCH] Export data map from server resources instead of manifest dir (#662) * All resources for an organization in data map Previously only fides keys found in a specified manifest directory were included in a data map. This not only led to a bug when an organization is not defined in a manifest directory but also is impractical when working inside a large organization. This change will have a data map exported for a specific organization, which is more aligned with our documentation around organizations and expactations of users. * filter resources by organization_fides_key * change organization key from argument to option * changelog * replace manifest_dir arg with option The positional argument felt a bit out of place. As of today, we can expect most users to have a default_organization and to use the .fides/ directory. These options will allow for deviations from that plan without requiring any specific positional arguments to take place. * move output-dir option to options.py --- CHANGELOG.md | 1 + docs/fides/docs/guides/generating_datamap.md | 4 +- src/fidesctl/cli/commands/export.py | 30 +++++++++---- src/fidesctl/cli/options.py | 22 +++++++++ src/fidesctl/core/export.py | 47 ++++++++++++-------- src/fidesctl/core/export_helpers.py | 26 +---------- tests/cli/test_cli.py | 6 --- 7 files changed, 76 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abfaa37165..8e45b8ea3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The types of changes are: * Comparing server and CLI versions ignores `.dirty` only differences, and is quiet on success when running general CLI commands * Migrate all endpoints to be prefixed by `/api/v1` [#623](https://github.com/ethyca/fides/issues/623) * Allow credentials to be passed to the generate systems from aws functionality via the API [#645](https://github.com/ethyca/fides/pull/645) +* Update the export of a datamap to load resources from the server instead of a manifest directory[#662](https://github.com/ethyca/fides/pull/662) ### Developer Experience diff --git a/docs/fides/docs/guides/generating_datamap.md b/docs/fides/docs/guides/generating_datamap.md index 1fbcc0b75f..5872e97a0a 100644 --- a/docs/fides/docs/guides/generating_datamap.md +++ b/docs/fides/docs/guides/generating_datamap.md @@ -9,7 +9,7 @@ To apply and export the provided `demo_resources`, run the following commands: ```sh title="Apply and Export Defaults" $ fidesctl apply demo_resources/ -$ fidesctl export datamap demo_resources/ +$ fidesctl export datamap --output-dir demo_resources/ ``` This will export a data map to the `demo_resources/` directory. @@ -170,7 +170,7 @@ Now that you have added the additional information around privacy notices and da ```sh title="Apply and Export Defaults" $ fidesctl apply demo_resources/ -$ fidesctl export datamap demo_resources/ +$ fidesctl export datamap --output_dir demo_resources/ ``` ### Populated Fields diff --git a/src/fidesctl/cli/commands/export.py b/src/fidesctl/cli/commands/export.py index 9f57e99c98..dffd56f780 100644 --- a/src/fidesctl/cli/commands/export.py +++ b/src/fidesctl/cli/commands/export.py @@ -1,7 +1,12 @@ """Contains the export group of CLI commands for Fidesctl.""" import click -from fidesctl.cli.options import dry_flag, manifests_dir_argument +from fidesctl.cli.options import ( + dry_flag, + manifests_dir_argument, + organization_fides_key_option, + output_directory_option, +) from fidesctl.cli.utils import with_analytics from fidesctl.core import export as _export from fidesctl.core import parse as _parse @@ -89,7 +94,8 @@ def export_organization( @export.command(name="datamap") @click.pass_context -@manifests_dir_argument +@output_directory_option +@organization_fides_key_option @dry_flag @click.option( "--csv", @@ -99,22 +105,30 @@ def export_organization( @with_analytics def export_datamap( ctx: click.Context, - manifests_dir: str, + output_dir: str, + org_key: str, dry: bool, csv: bool, ) -> None: """ - Export a formatted data map to excel using template + Export a formatted data map to excel using the fides template. + + The data map is comprised of an Organization, Systems, and Datasets. - The csv flag can be used to output data as csv instead + The default organization is used, however a custom one can be + passed if required. + + A custom manifest directory can be provided for the output location. + + The csv flag can be used to output data as csv, while the dry + flag can be used to return data to the console instead. """ config = ctx.obj["CONFIG"] - taxonomy = _parse.parse(manifests_dir) _export.export_datamap( url=config.cli.server_url, - taxonomy=taxonomy, headers=config.user.request_headers, - manifests_dir=manifests_dir, + organization_fides_key=org_key, + output_directory=output_dir, dry=dry, to_csv=csv, ) diff --git a/src/fidesctl/cli/options.py b/src/fidesctl/cli/options.py index b6ce6fb709..3322a13772 100644 --- a/src/fidesctl/cli/options.py +++ b/src/fidesctl/cli/options.py @@ -93,3 +93,25 @@ def include_null_flag(command: Callable) -> Callable: help="Includes attributes that would otherwise be null.", )(command) return command + + +def organization_fides_key_option(command: Callable) -> Callable: + "Add the organization_fides_key option." + command = click.option( + "--org-key", + "-k", + default="default_organization", + help="The organization_fides_key you wish to export resources for.", + )(command) + return command + + +def output_directory_option(command: Callable) -> Callable: + "Add the output directory option" + command = click.option( + "--output-dir", + "-d", + default=".fides/", + help="The output directory for the data map to be exported to.", + )(command) + return command diff --git a/src/fidesctl/core/export.py b/src/fidesctl/core/export.py index 1179ce8d47..c0950fcb3f 100644 --- a/src/fidesctl/core/export.py +++ b/src/fidesctl/core/export.py @@ -5,15 +5,18 @@ from typing import Dict, List, Tuple import pandas as pd -from fideslang.models import ContactDetails, Taxonomy +from fideslang.models import ContactDetails -from fidesctl.core.api_helpers import get_server_resources +from fidesctl.core.api_helpers import ( + get_server_resource, + get_server_resources, + list_server_resources, +) from fidesctl.core.export_helpers import ( convert_tuple_to_string, export_datamap_to_excel, export_to_csv, generate_data_category_rows, - get_datamap_fides_keys, get_formatted_data_protection_impact_assessment, get_formatted_data_subjects, get_formatted_data_use, @@ -386,9 +389,9 @@ def build_joined_dataframe( def export_datamap( url: str, - taxonomy: Taxonomy, headers: Dict[str, str], - manifests_dir: str, + organization_fides_key: str, + output_directory: str, dry: bool, to_csv: bool, ) -> None: @@ -399,26 +402,32 @@ def export_datamap( flattened as needed for exporting. """ - # load resources from server - - fides_keys_dict = get_datamap_fides_keys(taxonomy) - - server_resource_dict = {} - for resource_type in ["organization", "system", "dataset"]: - - server_resource_dict[resource_type] = get_server_resources( + # load resources from server, filtered by organization + server_resource_dict = { + "organization": [ + get_server_resource(url, "organization", organization_fides_key, headers) + ] + } + for resource_type in ["system", "dataset"]: + server_resources = list_server_resources( url, - resource_type, - fides_keys_dict[resource_type], headers, + resource_type, + exclude_keys=[], ) - + filtered_server_resources = [ + resource + for resource in server_resources + if resource.organization_fides_key == organization_fides_key + ] + server_resource_dict[resource_type] = filtered_server_resources + + # transform the resources to join a system and referenced datasets joined_system_dataset_df = build_joined_dataframe( server_resource_dict, url, headers ) if not dry and not to_csv: - # build an organization dataframe if exporting to excel organization_df = pd.DataFrame.from_records( generate_contact_records(server_resource_dict["organization"]) @@ -427,7 +436,7 @@ def export_datamap( organization_df = organization_df[1:] exported_filename = export_datamap_to_excel( - organization_df, joined_system_dataset_df, manifests_dir + organization_df, joined_system_dataset_df, output_directory ) echo_green(exported_filename + " successfully exported.") else: @@ -439,5 +448,5 @@ def export_datamap( for record in output_list: print(record) else: - exported_filename = export_to_csv(output_list, "datamap", manifests_dir) + exported_filename = export_to_csv(output_list, "datamap", output_directory) echo_green(exported_filename + " successfully exported.") diff --git a/src/fidesctl/core/export_helpers.py b/src/fidesctl/core/export_helpers.py index feaa5223e5..d7e98ab039 100644 --- a/src/fidesctl/core/export_helpers.py +++ b/src/fidesctl/core/export_helpers.py @@ -5,7 +5,7 @@ from typing import Dict, List, Set, Tuple import pandas as pd -from fideslang.models import DataSubjectRightsEnum, Taxonomy +from fideslang.models import DataSubjectRightsEnum from fidesctl.core.api_helpers import get_server_resource, get_server_resources from fidesctl.core.utils import echo_red @@ -295,30 +295,6 @@ def calculate_data_subject_rights(rights: Dict) -> str: return data_subject_rights -def get_datamap_fides_keys(taxonomy: Taxonomy) -> Dict: - """ - Gathers all fides keys for an organization, systems, - and datasets based on the resources found in the - provided taxonomy built from manifests. - - If no Organization is found, ensure the 'default_organization' - is applied. This is a temporary measure put in place until a - more thorough change is made to pull all resources from the - server. - """ - taxonomy_keys_dict = {} - taxonomy_keys_dict["organization"] = [ - resource.fides_key for resource in taxonomy.organization - ] - if not taxonomy_keys_dict["organization"]: - taxonomy_keys_dict["organization"] = ["default_organization"] - taxonomy_keys_dict["system"] = [resource.fides_key for resource in taxonomy.system] - taxonomy_keys_dict["dataset"] = [ - resource.fides_key for resource in taxonomy.dataset - ] - return taxonomy_keys_dict - - def remove_duplicates_from_comma_separated_column(comma_separated_string: str) -> str: "transform the row using a set to remove duplcation" return ", ".join(set(comma_separated_string.split(", "))) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 026cf2b2a7..8f416e83c1 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -263,15 +263,10 @@ def test_evaluate_with_dataset_collection_failed( @pytest.mark.parametrize( "export_resource", ["system", "dataset", "organization", "datamap"] ) -@pytest.mark.parametrize( - "manifest_dir", - ["demo_resources/", ".fides/"], -) def test_export_resources( test_config_path: str, test_cli_runner: CliRunner, export_resource: str, - manifest_dir: str, ) -> None: """ Tests that each resource is successfully exported @@ -284,7 +279,6 @@ def test_export_resources( test_config_path, "export", export_resource, - manifest_dir, "--dry", ], )