Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create databricks catalog ext loc modules #614

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions databricks-catalog-external-location/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
data "aws_caller_identity" "current" {
provider = aws
}

locals {
path = "/databricks/"
}

## Bucket and policy
data "aws_iam_policy_document" "catalog_bucket_access" {
statement {
sid = "bucketObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:GetObjectVersion",
"s3:PutObject",
"s3:DeleteObject",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
"${module.catalog_bucket.arn}/*",
]
}
statement {
sid = "bucketAccess"
effect = "Allow"
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
]
principals {
type = "AWS"
identifiers = [aws_iam_role.databricks_external_location_iam_role.arn]
}
resources = [
module.catalog_bucket.arn,
]
}
}

module "catalog_bucket" {
source = "github.com/chanzuckerberg/cztack//aws-s3-private-bucket?ref=v0.60.1"
bucket_name = var.bucket_name
bucket_policy = data.aws_iam_policy_document.catalog_bucket_access.json
project = var.tags.project
env = var.tags.env
service = var.tags.service
owner = var.tags.owner
}

## Databricks external location and IAM
data "aws_iam_policy_document" "databricks_external_location_assume_role" {
statement {
principals {
type = "AWS"
identifiers = ["arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"]
}

actions = ["sts:AssumeRole"]
condition {
test = "StringEquals"
variable = "sts:ExternalId"

values = [var.databricks_external_id]
}
}
}

resource "aws_iam_role" "databricks_external_location_iam_role" {
name = var.databricks_external_location_iam_role
path = local.path
assume_role_policy = data.aws_iam_policy_document.databricks_external_location_assume_role.json
}

### Policy documents to access bucket and assume role
data "aws_iam_policy_document" "databricks_external_location_bucket_access" {
statement {
sid = "databricksObjAccess"
effect = "Allow"
actions = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
"s3:GetLifecycleConfiguration",
"s3:PutLifecycleConfiguration"
]
resources = [
"${module.catalog_bucket.arn}/*",
module.catalog_bucket.arn,
]
}
}

### Policies to access bucket
resource "aws_iam_policy" "databricks_external_location_bucket_access" {
policy = data.aws_iam_policy_document.databricks_external_location_bucket_access.json
}

### Attachments for policies to access bucket, to the role that Databricks can assume
resource "aws_iam_role_policy_attachment" "databricks_external_location_bucket_access" {
policy_arn = aws_iam_policy.databricks_external_location_bucket_access.arn
role = aws_iam_role.databricks_external_location_iam_role.name
}

### Databricks storage credential - allows workspace to access an external location.
### NOTE: names need to be unique across an account, not just a workspace
resource "databricks_storage_credential" "external" {
depends_on = [
resource.aws_iam_role.databricks_external_location_iam_role,
module.catalog_bucket
]
name = aws_iam_role.databricks_external_location_iam_role.name
aws_iam_role {
role_arn = aws_iam_role.databricks_external_location_iam_role.arn
}
comment = "Managed by Terraform"
}

## user/group Grants to an external storage
resource "databricks_grants" "databricks_credential_grants" {
depends_on = [
resource.databricks_storage_credential.external
]
storage_credential = databricks_storage_credential.external.id
dynamic "grant" {
for_each = toset(var.group_names)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}

resource "databricks_external_location" "external_locations" {
depends_on = [
resource.databricks_storage_credential.external
]
name = "external-${var.name_prefix}"
url = "s3://${var.bucket_name}"
credential_name = databricks_storage_credential.external.id
comment = "Managed by Terraform"
}

resource "databricks_grants" "databricks_external_location_grants" {
external_location = databricks_external_location.external_locations.id
dynamic "grant" {
for_each = toset(var.group_names)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}
}
29 changes: 29 additions & 0 deletions databricks-catalog-external-location/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
variable "tags" {
description = "Tags to apply to resources"
type = map(string)
}

variable "bucket_name" {
description = "Name of the bucket"
type = string
}

variable "databricks_external_location_iam_role" {
description = "Name of the role for instance profiles"
type = string
}

variable "name_prefix" {
alldoami marked this conversation as resolved.
Show resolved Hide resolved
description = "Prefix to apply to resources"
type = string
}

variable "group_names" {
alldoami marked this conversation as resolved.
Show resolved Hide resolved
description = "List of group names to grant access to"
type = list(string)
}

variable "databricks_external_id" {
description = "ID of the databricks external location"
type = string
}
11 changes: 11 additions & 0 deletions databricks-catalog-external-location/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
databricks = {
source = "databricks/databricks"
}
}
required_version = ">= 1.3.0"
}
52 changes: 52 additions & 0 deletions databricks-catalog/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
resource "databricks_catalog" "catalog" {
alldoami marked this conversation as resolved.
Show resolved Hide resolved
name = var.catalog_name
storage_root = "s3://${var.bucket_name}"
comment = "this catalog is managed by terraform"
isolation_mode = var.isolation_mode
owner = var.catalog_owner
}

resource "databricks_grants" "grants" {
catalog = databricks_catalog.catalog.name

depends_on = [
databricks_catalog.catalog
]

dynamic "grant" {
for_each = toset(var.all_privileges_groups)
content {
principal = grant.value
privileges = [
"ALL_PRIVILEGES",
]
}
}

dynamic "grant" {
for_each = toset(var.read_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
]
}
}

dynamic "grant" {
for_each = toset(var.write_privileges_groups)
content {
principal = grant.value
privileges = [
"USE_CATALOG",
"USE_SCHEMA",
"SELECT",
"CREATE_TABLE",
"CREATE_SCHEMA",
"MODIFY",
]
}
}
}
39 changes: 39 additions & 0 deletions databricks-catalog/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
variable "catalog_name" {
description = "Name of the catalog"
type = string
}

variable "bucket_name" {
description = "Name of the bucket"
type = string
}

variable "isolation_mode" {
description = "Isolation mode of the catalog"
type = string
default = "ISOLATED"
}

variable "catalog_owner" {
description = "Owner of the catalog"
type = string
default = ""
alldoami marked this conversation as resolved.
Show resolved Hide resolved
}

variable "all_privileges_groups" {
description = "Groups with all privileges"
type = list(string)
default = []
}

variable "read_privileges_groups" {
description = "Groups with read privileges"
type = list(string)
default = []
}

variable "write_privileges_groups" {
description = "Groups with write privileges"
type = list(string)
default = []
}
11 changes: 11 additions & 0 deletions databricks-catalog/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
databricks = {
source = "databricks/databricks"
}
}
required_version = ">= 1.3.0"
}
Loading