diff --git a/aws_datalake/modules/emr/main.tf b/aws_datalake/modules/emr/main.tf index 1b0c581..e73eae2 100644 --- a/aws_datalake/modules/emr/main.tf +++ b/aws_datalake/modules/emr/main.tf @@ -17,6 +17,16 @@ resource "aws_emr_cluster" "segment_data_lake_emr_cluster" { service_role = var.iam_emr_service_role autoscaling_role = var.iam_emr_autoscaling_role security_configuration = var.security_configuration + + dynamic "bootstrap_action" { + for_each = var.bootstrap_action + + content { + args = try(bootstrap_action.value.args, null) + name = bootstrap_action.value.name + path = bootstrap_action.value.path + } + } master_instance_group { instance_type = var.master_instance_type diff --git a/aws_datalake/modules/emr/variables.tf b/aws_datalake/modules/emr/variables.tf index 49ff7c8..f1c0186 100644 --- a/aws_datalake/modules/emr/variables.tf +++ b/aws_datalake/modules/emr/variables.tf @@ -101,6 +101,12 @@ variable "task_instance_max_count" { default = "4" } +variable "bootstrap_action" { + description = "Ordered list of bootstrap actions that will be run before Hadoop is started on the cluster nodes" + type = any + default = {} +} + locals { tags = merge(tomap({"vendor" = "segment"}), var.tags) } diff --git a/aws_datalake/modules/iam/main.tf b/aws_datalake/modules/iam/main.tf index 044b806..6619dad 100644 --- a/aws_datalake/modules/iam/main.tf +++ b/aws_datalake/modules/iam/main.tf @@ -289,9 +289,8 @@ resource "aws_iam_instance_profile" "segment_emr_instance_profile" { role = aws_iam_role.segment_emr_instance_profile_role.name } -resource "aws_iam_role_policy" "segment_emr_instance_profile_policy" { +resource "aws_iam_policy" "segment_emr_instance_profile_policy" { name = "SegmentEMRInstanceProfilePolicy${var.suffix}" - role = aws_iam_role.segment_emr_instance_profile_role.id policy = <