Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create databricks catalog ext loc modules #614

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions databricks-catalog-external-location/catalogs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
resource "databricks_catalog" "catalog" {
for_each = { for idx, catalog in var.catalogs : catalog.name => catalog }
name = each.value.name
storage_root = "s3://${module.catalog_bucket.name}/{each.value.name}"
comment = "this catalog is managed by terraform"
isolation_mode = each.value.isolation_mode
owner = each.value.owner
}

resource "databricks_grants" "grants" {
for_each = { for idx, catalog in var.catalogs : catalog.name => catalog }
catalog = each.value.name

depends_on = [
databricks_catalog.catalog
]

dynamic "grant" {
for_each = toset(each.value.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}

dynamic "grant" {
for_each = toset(each.value.read_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
]
}
}

dynamic "grant" {
for_each = toset(each.value.write_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
"CREATE_TABLE",
"CREATE_SCHEMA",
"MODIFY",
]
}
}
}
168 changes: 168 additions & 0 deletions databricks-catalog-external-location/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
data "aws_caller_identity" "current" {
provider = aws
}

locals {
path = "/databricks/"
name = "${var.tags.project}-${var.tags.env}"
alldoami marked this conversation as resolved.
Show resolved Hide resolved
bucket_name = "${local.name}-dbx-catalog-bucket"
iam_role_name = "external_location_dbx_${var.tags.env}_aws_role"
}

## Bucket and policy
data "aws_iam_policy_document" "catalog_bucket_access" {
statement {
sid = "bucketObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
"${module.catalog_bucket.arn}/*",
]
}
statement {
sid = "bucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
module.catalog_bucket.arn,
]
}
}

module "catalog_bucket" {
source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1"
bucket_name = local.bucket_name
bucket_policy = data.aws_iam_policy_document.catalog_bucket_access.json
project = var.tags.project
env = var.tags.env
service = var.tags.service
owner = var.tags.owner
}

## Databricks external location and IAM
data "aws_iam_policy_document" "databricks_external_location_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = [var.databricks_external_id]
}
}
}

resource "aws_iam_role" "databricks_external_location_iam_role" {
name = local.iam_role_name
path = local.path
assume_role_policy = data.aws_iam_policy_document.databricks_external_location_assume_role.json
}

### Policy documents to access bucket and assume role
data "aws_iam_policy_document" "databricks_external_location_bucket_access" {
statement {
sid = "databricksObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
"${module.catalog_bucket.arn}/*",
module.catalog_bucket.arn,
]
}
}

### Policies to access bucket
resource "aws_iam_policy" "databricks_external_location_bucket_access" {
policy = data.aws_iam_policy_document.databricks_external_location_bucket_access.json
}

### Attachments for policies to access bucket, to the role that Databricks can assume
resource "aws_iam_role_policy_attachment" "databricks_external_location_bucket_access" {
policy_arn = aws_iam_policy.databricks_external_location_bucket_access.arn
role = aws_iam_role.databricks_external_location_iam_role.name
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace
resource "databricks_storage_credential" "external" {
depends_on = [
resource.aws_iam_role.databricks_external_location_iam_role,
module.catalog_bucket
]
name = aws_iam_role.databricks_external_location_iam_role.name
aws_iam_role {
role_arn = aws_iam_role.databricks_external_location_iam_role.arn
}
isolation_mode = "ISOLATION_MODE_ISOLATED"
jayengee marked this conversation as resolved.
Show resolved Hide resolved
comment = "Managed by Terraform"
}

## user/group Grants to an external storage
resource "databricks_grants" "databricks_credential_grants" {
depends_on = [
resource.databricks_storage_credential.external
]
storage_credential = databricks_storage_credential.external.id
dynamic "grant" {
for_each = toset(var.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}

resource "databricks_external_location" "external_locations" {
depends_on = [
resource.databricks_storage_credential.external
]
name = var.external_location_name
url = "s3://${local.bucket_name}"
credential_name = databricks_storage_credential.external.id
isolation_mode = "ISOLATION_MODE_ISOLATED"
jayengee marked this conversation as resolved.
Show resolved Hide resolved
comment = "Managed by Terraform"
}

resource "databricks_grants" "databricks_external_location_grants" {
external_location = databricks_external_location.external_locations.id
dynamic "grant" {
for_each = toset(var.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}
32 changes: 32 additions & 0 deletions databricks-catalog-external-location/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
variable "tags" {
description = "Tags to apply to resources"
type = map(string)
}

variable "all_privileges_groups" {
description = "List of group names to grant access to"
type = list(string)
}

variable "databricks_external_id" {
description = "ID of the databricks external location"
type = string
}

variable "external_location_name" {
description = "Name of the external location"
type = string
}

variable "catalogs" {
description = "List of catalogs to create with their cooresponding attributes"
type = list(
object({
name = string
isolation_mode = optional(string, "ISOLATED")
owner = string
all_privileges_groups = list(string)
read_privileges_groups = optional(list(string), [])
write_privileges_groups = optional(list(string), [])
}))
}
12 changes: 12 additions & 0 deletions databricks-catalog-external-location/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
databricks = {
source = "databricks/databricks"
version = "1.49.1"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

required for the isolation_mode attribute for databricks_external_location

}
}
required_version = ">= 1.3.0"
}
Loading