Skip to content

Commit

Permalink
feat: Create databricks catalog ext loc modules (#614)
Browse files Browse the repository at this point in the history
  • Loading branch information
alldoami authored Aug 16, 2024
1 parent 2e7ef2a commit 8b47a1c
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 0 deletions.
54 changes: 54 additions & 0 deletions databricks-catalog-external-location/catalogs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
resource "databricks_catalog" "catalog" {
for_each = { for idx, catalog in var.catalogs : catalog.name => catalog }
name = each.value.name
storage_root = "s3://${module.catalog_bucket.name}/{each.value.name}"
comment = "this catalog is managed by terraform"
isolation_mode = each.value.isolation_mode
owner = each.value.owner
}

resource "databricks_grants" "grants" {
for_each = { for idx, catalog in var.catalogs : catalog.name => catalog }
catalog = each.value.name

depends_on = [
databricks_catalog.catalog
]

dynamic "grant" {
for_each = toset(each.value.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}

dynamic "grant" {
for_each = toset(each.value.read_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
]
}
}

dynamic "grant" {
for_each = toset(each.value.write_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
"CREATE_TABLE",
"CREATE_SCHEMA",
"MODIFY",
]
}
}
}
168 changes: 168 additions & 0 deletions databricks-catalog-external-location/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
data "aws_caller_identity" "current" {
provider = aws
}

locals {
path = "/databricks/"
name = "${var.tags.project}-${var.tags.env}"
bucket_name = "${local.name}-dbx-catalog-bucket"
iam_role_name = "external_location_dbx_${var.tags.env}_aws_role"
}

## Bucket and policy
data "aws_iam_policy_document" "catalog_bucket_access" {
statement {
sid = "bucketObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
"${module.catalog_bucket.arn}/*",
]
}
statement {
sid = "bucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
module.catalog_bucket.arn,
]
}
}

module "catalog_bucket" {
source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1"
bucket_name = local.bucket_name
bucket_policy = data.aws_iam_policy_document.catalog_bucket_access.json
project = var.tags.project
env = var.tags.env
service = var.tags.service
owner = var.tags.owner
}

## Databricks external location and IAM
data "aws_iam_policy_document" "databricks_external_location_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = [var.databricks_external_id]
}
}
}

resource "aws_iam_role" "databricks_external_location_iam_role" {
name = local.iam_role_name
path = local.path
assume_role_policy = data.aws_iam_policy_document.databricks_external_location_assume_role.json
}

### Policy documents to access bucket and assume role
data "aws_iam_policy_document" "databricks_external_location_bucket_access" {
statement {
sid = "databricksObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
"${module.catalog_bucket.arn}/*",
module.catalog_bucket.arn,
]
}
}

### Policies to access bucket
resource "aws_iam_policy" "databricks_external_location_bucket_access" {
policy = data.aws_iam_policy_document.databricks_external_location_bucket_access.json
}

### Attachments for policies to access bucket, to the role that Databricks can assume
resource "aws_iam_role_policy_attachment" "databricks_external_location_bucket_access" {
policy_arn = aws_iam_policy.databricks_external_location_bucket_access.arn
role = aws_iam_role.databricks_external_location_iam_role.name
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace
resource "databricks_storage_credential" "external" {
depends_on = [
resource.aws_iam_role.databricks_external_location_iam_role,
module.catalog_bucket
]
name = aws_iam_role.databricks_external_location_iam_role.name
aws_iam_role {
role_arn = aws_iam_role.databricks_external_location_iam_role.arn
}
isolation_mode = "ISOLATION_MODE_ISOLATED"
comment = "Managed by Terraform"
}

## user/group Grants to an external storage
resource "databricks_grants" "databricks_credential_grants" {
depends_on = [
resource.databricks_storage_credential.external
]
storage_credential = databricks_storage_credential.external.id
dynamic "grant" {
for_each = toset(var.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}

resource "databricks_external_location" "external_locations" {
depends_on = [
resource.databricks_storage_credential.external
]
name = var.external_location_name
url = "s3://${local.bucket_name}"
credential_name = databricks_storage_credential.external.id
isolation_mode = "ISOLATION_MODE_ISOLATED"
comment = "Managed by Terraform"
}

resource "databricks_grants" "databricks_external_location_grants" {
external_location = databricks_external_location.external_locations.id
dynamic "grant" {
for_each = toset(var.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}
32 changes: 32 additions & 0 deletions databricks-catalog-external-location/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
variable "tags" {
description = "Tags to apply to resources"
type = map(string)
}

variable "all_privileges_groups" {
description = "List of group names to grant access to"
type = list(string)
}

variable "databricks_external_id" {
description = "ID of the databricks external location"
type = string
}

variable "external_location_name" {
description = "Name of the external location"
type = string
}

variable "catalogs" {
description = "List of catalogs to create with their cooresponding attributes"
type = list(
object({
name = string
isolation_mode = optional(string, "ISOLATED")
owner = string
all_privileges_groups = list(string)
read_privileges_groups = optional(list(string), [])
write_privileges_groups = optional(list(string), [])
}))
}
12 changes: 12 additions & 0 deletions databricks-catalog-external-location/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
databricks = {
source = "databricks/databricks"
version = "1.49.1"
}
}
required_version = ">= 1.3.0"
}

0 comments on commit 8b47a1c

Please sign in to comment.