Skip to content

Commit

Permalink
Export data map from server resources instead of manifest dir (ethyca…
Browse files Browse the repository at this point in the history
…#662)

* All resources for an organization in data map

Previously only fides keys found in a specified manifest directory were included in a data map. This not only led to a bug when an organization is not defined in a manifest directory but also is impractical when working inside a large organization.

This change will have a data map exported for a specific organization, which is more aligned with our documentation around organizations and expactations of users.

* filter resources by organization_fides_key

* change organization key from argument to option

* changelog

* replace manifest_dir arg with option

The positional argument felt a bit out of place. As of today, we can expect most users to have a default_organization and to use the .fides/ directory. These options will allow for deviations from that plan without requiring any specific positional arguments to take place.

* move output-dir option to options.py
  • Loading branch information
SteveDMurphy authored May 23, 2022
1 parent 8677c6f commit 9fa0ce5
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 60 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The types of changes are:
* Comparing server and CLI versions ignores `.dirty` only differences, and is quiet on success when running general CLI commands
* Migrate all endpoints to be prefixed by `/api/v1` [#623](https://github.com/ethyca/fides/issues/623)
* Allow credentials to be passed to the generate systems from aws functionality via the API [#645](https://github.com/ethyca/fides/pull/645)
* Update the export of a datamap to load resources from the server instead of a manifest directory[#662](https://github.com/ethyca/fides/pull/662)

### Developer Experience

Expand Down
4 changes: 2 additions & 2 deletions docs/fides/docs/guides/generating_datamap.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ To apply and export the provided `demo_resources`, run the following commands:

```sh title="Apply and Export Defaults"
$ fidesctl apply demo_resources/
$ fidesctl export datamap demo_resources/
$ fidesctl export datamap --output-dir demo_resources/
```

This will export a data map to the `demo_resources/` directory.
Expand Down Expand Up @@ -170,7 +170,7 @@ Now that you have added the additional information around privacy notices and da

```sh title="Apply and Export Defaults"
$ fidesctl apply demo_resources/
$ fidesctl export datamap demo_resources/
$ fidesctl export datamap --output_dir demo_resources/
```

### Populated Fields
Expand Down
30 changes: 22 additions & 8 deletions src/fidesctl/cli/commands/export.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""Contains the export group of CLI commands for Fidesctl."""
import click

from fidesctl.cli.options import dry_flag, manifests_dir_argument
from fidesctl.cli.options import (
dry_flag,
manifests_dir_argument,
organization_fides_key_option,
output_directory_option,
)
from fidesctl.cli.utils import with_analytics
from fidesctl.core import export as _export
from fidesctl.core import parse as _parse
Expand Down Expand Up @@ -89,7 +94,8 @@ def export_organization(

@export.command(name="datamap")
@click.pass_context
@manifests_dir_argument
@output_directory_option
@organization_fides_key_option
@dry_flag
@click.option(
"--csv",
Expand All @@ -99,22 +105,30 @@ def export_organization(
@with_analytics
def export_datamap(
ctx: click.Context,
manifests_dir: str,
output_dir: str,
org_key: str,
dry: bool,
csv: bool,
) -> None:
"""
Export a formatted data map to excel using template
Export a formatted data map to excel using the fides template.
The data map is comprised of an Organization, Systems, and Datasets.
The csv flag can be used to output data as csv instead
The default organization is used, however a custom one can be
passed if required.
A custom manifest directory can be provided for the output location.
The csv flag can be used to output data as csv, while the dry
flag can be used to return data to the console instead.
"""
config = ctx.obj["CONFIG"]
taxonomy = _parse.parse(manifests_dir)
_export.export_datamap(
url=config.cli.server_url,
taxonomy=taxonomy,
headers=config.user.request_headers,
manifests_dir=manifests_dir,
organization_fides_key=org_key,
output_directory=output_dir,
dry=dry,
to_csv=csv,
)
22 changes: 22 additions & 0 deletions src/fidesctl/cli/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,25 @@ def include_null_flag(command: Callable) -> Callable:
help="Includes attributes that would otherwise be null.",
)(command)
return command


def organization_fides_key_option(command: Callable) -> Callable:
"Add the organization_fides_key option."
command = click.option(
"--org-key",
"-k",
default="default_organization",
help="The organization_fides_key you wish to export resources for.",
)(command)
return command


def output_directory_option(command: Callable) -> Callable:
"Add the output directory option"
command = click.option(
"--output-dir",
"-d",
default=".fides/",
help="The output directory for the data map to be exported to.",
)(command)
return command
47 changes: 28 additions & 19 deletions src/fidesctl/core/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@
from typing import Dict, List, Tuple

import pandas as pd
from fideslang.models import ContactDetails, Taxonomy
from fideslang.models import ContactDetails

from fidesctl.core.api_helpers import get_server_resources
from fidesctl.core.api_helpers import (
get_server_resource,
get_server_resources,
list_server_resources,
)
from fidesctl.core.export_helpers import (
convert_tuple_to_string,
export_datamap_to_excel,
export_to_csv,
generate_data_category_rows,
get_datamap_fides_keys,
get_formatted_data_protection_impact_assessment,
get_formatted_data_subjects,
get_formatted_data_use,
Expand Down Expand Up @@ -386,9 +389,9 @@ def build_joined_dataframe(

def export_datamap(
url: str,
taxonomy: Taxonomy,
headers: Dict[str, str],
manifests_dir: str,
organization_fides_key: str,
output_directory: str,
dry: bool,
to_csv: bool,
) -> None:
Expand All @@ -399,26 +402,32 @@ def export_datamap(
flattened as needed for exporting.
"""

# load resources from server

fides_keys_dict = get_datamap_fides_keys(taxonomy)

server_resource_dict = {}
for resource_type in ["organization", "system", "dataset"]:

server_resource_dict[resource_type] = get_server_resources(
# load resources from server, filtered by organization
server_resource_dict = {
"organization": [
get_server_resource(url, "organization", organization_fides_key, headers)
]
}
for resource_type in ["system", "dataset"]:
server_resources = list_server_resources(
url,
resource_type,
fides_keys_dict[resource_type],
headers,
resource_type,
exclude_keys=[],
)

filtered_server_resources = [
resource
for resource in server_resources
if resource.organization_fides_key == organization_fides_key
]
server_resource_dict[resource_type] = filtered_server_resources

# transform the resources to join a system and referenced datasets
joined_system_dataset_df = build_joined_dataframe(
server_resource_dict, url, headers
)

if not dry and not to_csv:

# build an organization dataframe if exporting to excel
organization_df = pd.DataFrame.from_records(
generate_contact_records(server_resource_dict["organization"])
Expand All @@ -427,7 +436,7 @@ def export_datamap(
organization_df = organization_df[1:]

exported_filename = export_datamap_to_excel(
organization_df, joined_system_dataset_df, manifests_dir
organization_df, joined_system_dataset_df, output_directory
)
echo_green(exported_filename + " successfully exported.")
else:
Expand All @@ -439,5 +448,5 @@ def export_datamap(
for record in output_list:
print(record)
else:
exported_filename = export_to_csv(output_list, "datamap", manifests_dir)
exported_filename = export_to_csv(output_list, "datamap", output_directory)
echo_green(exported_filename + " successfully exported.")
26 changes: 1 addition & 25 deletions src/fidesctl/core/export_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Dict, List, Set, Tuple

import pandas as pd
from fideslang.models import DataSubjectRightsEnum, Taxonomy
from fideslang.models import DataSubjectRightsEnum

from fidesctl.core.api_helpers import get_server_resource, get_server_resources
from fidesctl.core.utils import echo_red
Expand Down Expand Up @@ -295,30 +295,6 @@ def calculate_data_subject_rights(rights: Dict) -> str:
return data_subject_rights


def get_datamap_fides_keys(taxonomy: Taxonomy) -> Dict:
"""
Gathers all fides keys for an organization, systems,
and datasets based on the resources found in the
provided taxonomy built from manifests.
If no Organization is found, ensure the 'default_organization'
is applied. This is a temporary measure put in place until a
more thorough change is made to pull all resources from the
server.
"""
taxonomy_keys_dict = {}
taxonomy_keys_dict["organization"] = [
resource.fides_key for resource in taxonomy.organization
]
if not taxonomy_keys_dict["organization"]:
taxonomy_keys_dict["organization"] = ["default_organization"]
taxonomy_keys_dict["system"] = [resource.fides_key for resource in taxonomy.system]
taxonomy_keys_dict["dataset"] = [
resource.fides_key for resource in taxonomy.dataset
]
return taxonomy_keys_dict


def remove_duplicates_from_comma_separated_column(comma_separated_string: str) -> str:
"transform the row using a set to remove duplcation"
return ", ".join(set(comma_separated_string.split(", ")))
Expand Down
6 changes: 0 additions & 6 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,10 @@ def test_evaluate_with_dataset_collection_failed(
@pytest.mark.parametrize(
"export_resource", ["system", "dataset", "organization", "datamap"]
)
@pytest.mark.parametrize(
"manifest_dir",
["demo_resources/", ".fides/"],
)
def test_export_resources(
test_config_path: str,
test_cli_runner: CliRunner,
export_resource: str,
manifest_dir: str,
) -> None:
"""
Tests that each resource is successfully exported
Expand All @@ -284,7 +279,6 @@ def test_export_resources(
test_config_path,
"export",
export_resource,
manifest_dir,
"--dry",
],
)
Expand Down

0 comments on commit 9fa0ce5

Please sign in to comment.