Skip to content

Commit

Permalink
feat: Create databricks catalog ext loc
Browse files Browse the repository at this point in the history
  • Loading branch information
alldoami committed Aug 14, 2024
1 parent 2e7ef2a commit fc031fb
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 0 deletions.
163 changes: 163 additions & 0 deletions databricks-catalog-external-location/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
data "aws_caller_identity" "current" {
provider = aws
}

locals {
path = "/databricks/"
}

## Bucket and policy
data "aws_iam_policy_document" "catalog_bucket_access" {
statement {
sid = "bucketObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
"${module.catalog_bucket.arn}/*",
]
}
statement {
sid = "bucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
module.catalog_bucket.arn,
]
}
}

module "catalog_bucket" {
source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1"
bucket_name = var.bucket_name
bucket_policy = data.aws_iam_policy_document.catalog_bucket_access.json
project = var.tags.project
env = var.tags.env
service = var.tags.service
owner = var.tags.owner
}

## Databricks external location and IAM
data "aws_iam_policy_document" "databricks_external_location_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = [var.databricks_external_id]
}
}
}

resource "aws_iam_role" "databricks_external_location_iam_role" {
name = var.databricks_external_location_iam_role
path = local.path
assume_role_policy = data.aws_iam_policy_document.databricks_external_location_assume_role.json
}

### Policy documents to access bucket and assume role
data "aws_iam_policy_document" "databricks_external_location_bucket_access" {
statement {
sid = "databricksObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
"${module.catalog_bucket.arn}/*",
module.catalog_bucket.arn,
]
}
}

### Policies to access bucket
resource "aws_iam_policy" "databricks_external_location_bucket_access" {
policy = data.aws_iam_policy_document.databricks_external_location_bucket_access.json
}

### Attachments for policies to access bucket, to the role that Databricks can assume
resource "aws_iam_role_policy_attachment" "databricks_external_location_bucket_access" {
policy_arn = aws_iam_policy.databricks_external_location_bucket_access.arn
role = aws_iam_role.databricks_external_location_iam_role.name
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace
resource "databricks_storage_credential" "external" {
depends_on = [
resource.aws_iam_role.databricks_external_location_iam_role,
module.catalog_bucket
]
name = aws_iam_role.databricks_external_location_iam_role.name
aws_iam_role {
role_arn = aws_iam_role.databricks_external_location_iam_role.arn
}
comment = "Managed by Terraform"
}

## user/group Grants to an external storage
resource "databricks_grants" "databricks_credential_grants" {
depends_on = [
resource.databricks_storage_credential.external
]
storage_credential = databricks_storage_credential.external.id
dynamic "grant" {
for_each = toset(var.group_names)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}

resource "databricks_external_location" "external_locations" {
depends_on = [
resource.databricks_storage_credential.external
]
name = "external-${var.name_prefix}"
url = "s3://${var.bucket_name}"
credential_name = databricks_storage_credential.external.id
comment = "Managed by Terraform"
}

resource "databricks_grants" "databricks_external_location_grants" {
external_location = databricks_external_location.external_locations.id
dynamic "grant" {
for_each = toset(var.group_names)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}
29 changes: 29 additions & 0 deletions databricks-catalog-external-location/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
variable "tags" {
description = "Tags to apply to resources"
type = map(string)
}

variable "bucket_name" {
description = "Name of the bucket"
type = string
}

variable "databricks_external_location_iam_role" {
description = "Name of the role for instance profiles"
type = string
}

variable "name_prefix" {
description = "Prefix to apply to resources"
type = string
}

variable "group_names" {
description = "List of group names to grant access to"
type = list(string)
}

variable "databricks_external_id" {
description = "ID of the databricks external location"
type = string
}

0 comments on commit fc031fb

Please sign in to comment.