Skip to content

Commit

Permalink
feat: CDI-3103 - New databricks volume module (#593)
Browse files Browse the repository at this point in the history
* feat: new databricks_volume module

* fix: grantees and grant structure
  • Loading branch information
jayengee authored Jun 25, 2024
1 parent c540f4e commit 2d72a6c
Show file tree
Hide file tree
Showing 11 changed files with 565 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
12 changes: 12 additions & 0 deletions databricks-s3-volume/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Auto-generated by fogg. Do not edit
# Make improvements in fogg, so that everyone can benefit.

export TERRAFORM_VERSION := 1.3.0
export TF_PLUGIN_CACHE_DIR := ../../..//.terraform.d/plugin-cache

include ../../..//scripts/module.mk


help: ## display help for this makefile
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY: help
73 changes: 73 additions & 0 deletions databricks-s3-volume/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<!-- START -->
## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_aws"></a> [aws](#provider\_aws) | n/a |
| <a name="provider_databricks"></a> [databricks](#provider\_databricks) | n/a |

## Modules

| Name | Source | Version |
|------|--------|---------|
| <a name="module_databricks_bucket"></a> [databricks\_bucket](#module\_databricks\_bucket) | github.com/chanzuckerberg/cztack//aws-s3-private-bucket | v0.71.0 |

## Resources

| Name | Type |
|------|------|
| [aws_iam_policy.dbx_unity_access_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_role.dbx_unity_aws_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role_policy_attachment.dbx_unity_aws_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [databricks_catalog.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/catalog) | resource |
| [databricks_external_location.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/external_location) | resource |
| [databricks_grant.catalog_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_grant.catalog_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_grant.schema_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_grant.schema_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_grant.volume_r](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_grant.volume_rw](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/grant) | resource |
| [databricks_schema.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/schema) | resource |
| [databricks_storage_credential.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/storage_credential) | resource |
| [databricks_volume.volume](https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/volume) | resource |
| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
| [aws_iam_policy_document.databricks-s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.dbx_unity_aws_role_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_iam_policy_document.volume_bucket_dbx_unity_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_additional_rw_bucket_grant_arns"></a> [additional\_rw\_bucket\_grant\_arns](#input\_additional\_rw\_bucket\_grant\_arns) | (Optional) Additional AWS ARNs to grant read/write permissions to on the bucket (may be necessary for service principals, instance profiles, or users | `list(string)` | `[]` | no |
| <a name="input_bucket_object_ownership"></a> [bucket\_object\_ownership](#input\_bucket\_object\_ownership) | Set default owner of all objects within bucket (e.g., bucket vs. object owner) | `string` | `null` | no |
| <a name="input_catalog_name"></a> [catalog\_name](#input\_catalog\_name) | Name of the Databricks existing catalog to add the volume to | `string` | n/a | yes |
| <a name="input_catalog_owner"></a> [catalog\_owner](#input\_catalog\_owner) | User or group name of the catalog owner | `string` | n/a | yes |
| <a name="input_catalog_r_grant_principals"></a> [catalog\_r\_grant\_principals](#input\_catalog\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the catalog | `list(string)` | `[]` | no |
| <a name="input_catalog_rw_grant_principals"></a> [catalog\_rw\_grant\_principals](#input\_catalog\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the catalog | `list(string)` | `[]` | no |
| <a name="input_metastore_id"></a> [metastore\_id](#input\_metastore\_id) | ID of metastore to create catalog in | `string` | n/a | yes |
| <a name="input_schema_r_grant_principals"></a> [schema\_r\_grant\_principals](#input\_schema\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the schema | `list(string)` | `[]` | no |
| <a name="input_schema_rw_grant_principals"></a> [schema\_rw\_grant\_principals](#input\_schema\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the schema | `list(string)` | `[]` | no |
| <a name="input_tags"></a> [tags](#input\_tags) | REQUIRED: Tags to include for this environment. | <pre>object({<br> project : string<br> env : string<br> service : string<br> owner : string<br> managedBy : string<br> })</pre> | n/a | yes |
| <a name="input_volume_bucket"></a> [volume\_bucket](#input\_volume\_bucket) | (Optional) Name of an existing S3 bucket to use for Databricks volume. NOTE: if provided, you will need to update the bucket policy whereever it is defined to allow Databricks access | `string` | `null` | no |
| <a name="input_volume_comment"></a> [volume\_comment](#input\_volume\_comment) | (Optional) Comment to add to the Databricks volume | `string` | `"Managed by Terraform - this is a default volume for the Databricks workspace"` | no |
| <a name="input_volume_name"></a> [volume\_name](#input\_volume\_name) | Name of the Databricks volume to create | `string` | n/a | yes |
| <a name="input_volume_r_grant_principals"></a> [volume\_r\_grant\_principals](#input\_volume\_r\_grant\_principals) | (Optional) Databricks groups to grant read-only permissions to on the volume | `list(string)` | `[]` | no |
| <a name="input_volume_rw_grant_principals"></a> [volume\_rw\_grant\_principals](#input\_volume\_rw\_grant\_principals) | (Optional) Databricks groups to grant read/write permissions to on the volume | `list(string)` | `[]` | no |
| <a name="input_volume_schema_properties"></a> [volume\_schema\_properties](#input\_volume\_schema\_properties) | Properties of the Databricks schema to add the volume to | `map(string)` | `{}` | no |
| <a name="input_workspace_name"></a> [workspace\_name](#input\_workspace\_name) | Name of the Databricks catalog to add the volume to | `string` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_dbx_unity_aws_role_arn"></a> [dbx\_unity\_aws\_role\_arn](#output\_dbx\_unity\_aws\_role\_arn) | n/a |
| <a name="output_volume_path"></a> [volume\_path](#output\_volume\_path) | n/a |
| <a name="output_volume_specific_bucket_name"></a> [volume\_specific\_bucket\_name](#output\_volume\_specific\_bucket\_name) | n/a |
<!-- END -->
113 changes: 113 additions & 0 deletions databricks-s3-volume/bucket.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
locals {
standard_grant_principals = concat(["arn:aws:iam::${local.databricks_aws_account}:root"], var.additional_rw_bucket_grant_arns)
}

data "aws_iam_policy_document" "databricks-s3" {
count = var.volume_bucket != null ? 0 : 1

# standard UC access
statement {
sid = "dbxBucketAccess"
effect = "Allow"
principals {
type = "AWS"
identifiers = local.standard_grant_principals
}
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration",
]
resources = [
"arn:aws:s3:::${local.bucket_name}",
]
}
statement {
sid = "dbxObjAccess"
effect = "Allow"
principals {
type = "AWS"
identifiers = local.standard_grant_principals
}
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
resources = [
"arn:aws:s3:::${local.bucket_name}/*" # root access
]
}
# storage credential access - uses string to avoid race condition of role v. bucket creation
statement {
sid = "dbxSCBucketAccess"
effect = "Allow"
principals {
type = "AWS"
identifiers = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root",
]
}
condition {
test = "ArnEquals"
variable = "aws:PrincipalArn"
values = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}",
]
}
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration",
]
resources = [
"arn:aws:s3:::${local.bucket_name}",
]
}
statement {
sid = "dbxSCObjAccess"
effect = "Allow"
principals {
type = "AWS"
identifiers = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root",
]
}
condition {
test = "ArnEquals"
variable = "aws:PrincipalArn"
values = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}",
]
}
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
resources = [
"arn:aws:s3:::${local.bucket_name}/*"
]
}

}

module "databricks_bucket" {
count = var.volume_bucket != null ? 0 : 1
depends_on = [
aws_iam_role.dbx_unity_aws_role
]

source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.71.0"
bucket_name = local.bucket_name
bucket_policy = data.aws_iam_policy_document.databricks-s3[0].json
project = var.tags["project"]
env = var.tags["env"]
service = var.tags["service"]
owner = var.tags["owner"]
object_ownership = var.bucket_object_ownership
}
2 changes: 2 additions & 0 deletions databricks-s3-volume/fogg.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto-generated by fogg. Do not edit
# Make improvements in fogg, so that everyone can benefit.
70 changes: 70 additions & 0 deletions databricks-s3-volume/grants.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# catalog
resource "databricks_grant" "catalog_r" {
for_each = toset(var.catalog_r_grant_principals)
catalog = databricks_catalog.volume.name
principal = each.value
privileges = ["USE_CATALOG", "USE_SCHEMA", "SELECT"]
}

resource "databricks_grant" "catalog_rw" {
for_each = toset(var.catalog_rw_grant_principals)
catalog = databricks_catalog.volume.name
principal = "Data Scientists"
privileges = [
"APPLY_TAG",
"CREATE_CONNECTION",
"CREATE_SCHEMA",
"USE_CATALOG",
"CREATE_FUNCTION",
"CREATE_TABLE",
"EXECUTE",
"MODIFY",
"REFRESH",
"SELECT",
"READ_VOLUME",
"WRITE_VOLUME",
"USE_SCHEMA",
]
}

# schema
resource "databricks_grant" "schema_r" {
for_each = toset(var.schema_r_grant_principals)
schema = databricks_schema.volume.id
principal = each.value
privileges = ["USE_SCHEMA", "SELECT", "READ_VOLUME"]
}

resource "databricks_grant" "schema_rw" {
for_each = toset(var.schema_rw_grant_principals)
schema = databricks_schema.volume.id
principal = each.value
privileges = [
"APPLY_TAG",
"CREATE_FUNCTION",
"CREATE_TABLE",
"CREATE_VOLUME",
"USE_SCHEMA",
"EXECUTE",
"MODIFY",
"REFRESH",
"SELECT",
"READ_VOLUME",
"WRITE_VOLUME"
]
}

# volume
resource "databricks_grant" "volume_r" {
for_each = toset(var.volume_r_grant_principals)
volume = databricks_volume.volume.id
principal = each.value
privileges = ["READ_VOLUME"]
}

resource "databricks_grant" "volume_rw" {
for_each = toset(var.volume_rw_grant_principals)
volume = databricks_volume.volume.id
principal = each.value
privileges = ["READ_VOLUME", "WRITE_VOLUME"]
}
93 changes: 93 additions & 0 deletions databricks-s3-volume/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
## Databricks external location and IAM

data "aws_caller_identity" "current" {
provider = aws
}

data "aws_iam_policy_document" "dbx_unity_aws_role_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = ["4a2f419c-ae7a-49f1-b774-8f3113d9834d"]
}
}
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
}

actions = ["sts:AssumeRole"]
condition {
test = "ArnEquals"
variable = "aws:PrincipalArn"
values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}"]
}
}
}

resource "aws_iam_role" "dbx_unity_aws_role" {
name = local.unity_aws_role_name
path = local.path
assume_role_policy = data.aws_iam_policy_document.dbx_unity_aws_role_assume_role.json
}

### Policy document to access default volume bucket and assume role
data "aws_iam_policy_document" "volume_bucket_dbx_unity_access" {
depends_on = [
module.databricks_bucket
]

statement {
sid = "dbxSCBucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
"arn:aws:s3:::${local.bucket_name}",
]
}
statement {
sid = "dbxSCObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
]
resources = [
"arn:aws:s3:::${local.bucket_name}/*",
]
}
statement {
sid = "databricksAssumeRole"
effect = "Allow"
actions = [
"sts:AssumeRole"
]
resources = [
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${local.unity_aws_role_name}"
]
}
}

resource "aws_iam_policy" "dbx_unity_access_policy" {
policy = data.aws_iam_policy_document.volume_bucket_dbx_unity_access.json
}

resource "aws_iam_role_policy_attachment" "dbx_unity_aws_access" {
policy_arn = aws_iam_policy.dbx_unity_access_policy.arn
role = aws_iam_role.dbx_unity_aws_role.name
}
Loading

0 comments on commit 2d72a6c

Please sign in to comment.