From f524f3498584019da424f74e96878ea564857928 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Mon, 29 Jan 2024 20:36:51 -0800 Subject: [PATCH 1/7] Add support for use-web-identity in spark-run --- paasta_tools/cli/cmds/spark_run.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/paasta_tools/cli/cmds/spark_run.py b/paasta_tools/cli/cmds/spark_run.py index f9145305d2..0323413a10 100644 --- a/paasta_tools/cli/cmds/spark_run.py +++ b/paasta_tools/cli/cmds/spark_run.py @@ -439,7 +439,10 @@ def add_subparser(subparsers): aws_group.add_argument( "--assume-aws-role", - help="Takes an AWS IAM role ARN and attempts to create a session", + help=( + "Takes an AWS IAM role ARN and attempts to create a session using " + "spark_role_assumer" + ), ) aws_group.add_argument( @@ -452,6 +455,18 @@ def add_subparser(subparsers): default=43200, ) + aws_group.add_argument( + "--use-web-identity", + help=( + "If the current environment contains AWS_ROLE_ARN and " + "AWS_WEB_IDENTITY_TOKEN_FILE, creates a session to use. These " + "ENV vars must be present, and will be in the context of a pod-" + "identity enabled pod." + ), + action="store_true", + default=False, + ) + jupyter_group = list_parser.add_argument_group( title="Jupyter kernel culling options", description="Idle kernels will be culled by default. Idle " @@ -1274,6 +1289,7 @@ def paasta_spark_run(args): profile_name=args.aws_profile, assume_aws_role_arn=args.assume_aws_role, session_duration=args.aws_role_duration, + use_web_identity=args.use_web_identity, ) docker_image_digest = get_docker_image(args, instance_config) if docker_image_digest is None: From 2c306031a1add206bcb0b6a3a971a6fe67cbcf8b Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Fri, 12 Apr 2024 12:48:35 -0700 Subject: [PATCH 2/7] Remove unused argument --- paasta_tools/cli/cmds/spark_run.py | 1 - tests/cli/test_cmds_spark_run.py | 6 ------ 2 files changed, 7 deletions(-) diff --git a/paasta_tools/cli/cmds/spark_run.py b/paasta_tools/cli/cmds/spark_run.py index 0323413a10..dfd463c1e2 100644 --- a/paasta_tools/cli/cmds/spark_run.py +++ b/paasta_tools/cli/cmds/spark_run.py @@ -1284,7 +1284,6 @@ def paasta_spark_run(args): aws_creds = get_aws_credentials( service=args.service, - no_aws_credentials=args.no_aws_credentials, aws_credentials_yaml=args.aws_credentials_yaml, profile_name=args.aws_profile, assume_aws_role_arn=args.assume_aws_role, diff --git a/tests/cli/test_cmds_spark_run.py b/tests/cli/test_cmds_spark_run.py index f6ffd9ee57..e39407db90 100644 --- a/tests/cli/test_cmds_spark_run.py +++ b/tests/cli/test_cmds_spark_run.py @@ -1117,7 +1117,6 @@ def test_paasta_spark_run_bash( cluster="test-cluster", pool="test-pool", yelpsoa_config_root="/path/to/soa", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", aws_profile=None, spark_args="spark.cores.max=100 spark.executor.cores=10", @@ -1149,7 +1148,6 @@ def test_paasta_spark_run_bash( ) mock_get_aws_credentials.assert_called_once_with( service="test-service", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", profile_name=None, assume_aws_role_arn=None, @@ -1231,7 +1229,6 @@ def test_paasta_spark_run( cluster="test-cluster", pool="test-pool", yelpsoa_config_root="/path/to/soa", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", aws_profile=None, spark_args="spark.cores.max=100 spark.executor.cores=10", @@ -1263,7 +1260,6 @@ def test_paasta_spark_run( ) mock_get_aws_credentials.assert_called_once_with( service="test-service", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", profile_name=None, assume_aws_role_arn=None, @@ -1344,7 +1340,6 @@ def test_paasta_spark_run_pyspark( cluster="test-cluster", pool="test-pool", yelpsoa_config_root="/path/to/soa", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", aws_profile=None, spark_args="spark.cores.max=70 spark.executor.cores=10", @@ -1379,7 +1374,6 @@ def test_paasta_spark_run_pyspark( ) mock_get_aws_credentials.assert_called_once_with( service="test-service", - no_aws_credentials=False, aws_credentials_yaml="/path/to/creds", profile_name=None, assume_aws_role_arn=None, From 894c622307447e781e3f21bd4ca1bed747df9400 Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Mon, 22 Apr 2024 14:31:46 -0700 Subject: [PATCH 3/7] Update service-configuration-lib --- requirements-minimal.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-minimal.txt b/requirements-minimal.txt index a42e3006c9..40024715b0 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -54,7 +54,7 @@ requests-cache >= 0.4.10 retry ruamel.yaml sensu-plugin -service-configuration-lib >= 2.18.11 +service-configuration-lib >= 2.18.17 signalfx slackclient >= 1.2.1 sticht >= 1.1.0 diff --git a/requirements.txt b/requirements.txt index aa6201a9bb..371791d8ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -90,7 +90,7 @@ rsa==4.7.2 ruamel.yaml==0.15.96 s3transfer==0.10.0 sensu-plugin==0.3.1 -service-configuration-lib==2.18.11 +service-configuration-lib==2.18.17 setuptools==39.0.1 signalfx==1.0.17 simplejson==3.10.0 From 38bf3587081168fc7e4b690fe140f4539f4ffaec Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Mon, 22 Apr 2024 14:54:45 -0700 Subject: [PATCH 4/7] Remove deleted function --- paasta_tools/cli/cmds/spark_run.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/paasta_tools/cli/cmds/spark_run.py b/paasta_tools/cli/cmds/spark_run.py index dfd463c1e2..570386a1c4 100644 --- a/paasta_tools/cli/cmds/spark_run.py +++ b/paasta_tools/cli/cmds/spark_run.py @@ -22,7 +22,6 @@ from service_configuration_lib.spark_config import get_aws_credentials from service_configuration_lib.spark_config import get_grafana_url from service_configuration_lib.spark_config import get_resources_requested -from service_configuration_lib.spark_config import get_signalfx_url from service_configuration_lib.spark_config import get_spark_hourly_cost from service_configuration_lib.spark_config import send_and_calculate_resources_cost from service_configuration_lib.spark_config import UnsupportedClusterManagerException @@ -947,12 +946,8 @@ def configure_and_run_docker_container( print(PaastaColors.green(f"\nSpark history server URL: ") + f"{webui_url}\n") elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]): grafana_url = get_grafana_url(spark_conf) - signalfx_url = get_signalfx_url(spark_conf) dashboard_url_msg = ( - PaastaColors.green(f"\nGrafana dashboard: ") - + f"{grafana_url}\n" - + PaastaColors.green(f"\nSignalfx dashboard: ") - + f"{signalfx_url}\n" + PaastaColors.green(f"\nGrafana dashboard: ") + f"{grafana_url}\n" ) print(webui_url_msg) print(dashboard_url_msg) From 3c76e29c87f0cf2b4b45c545ea305d8b0c0efdb6 Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Tue, 23 Apr 2024 10:18:53 -0700 Subject: [PATCH 5/7] Fix existing tests --- tests/cli/test_cmds_spark_run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/cli/test_cmds_spark_run.py b/tests/cli/test_cmds_spark_run.py index f322030fc6..a66b9b1824 100644 --- a/tests/cli/test_cmds_spark_run.py +++ b/tests/cli/test_cmds_spark_run.py @@ -979,6 +979,7 @@ def test_paasta_spark_run_bash( k8s_server_address=None, tronfig=None, job_id=None, + use_web_identity=False, ) mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = ( {} @@ -1012,6 +1013,7 @@ def test_paasta_spark_run_bash( profile_name=None, assume_aws_role_arn=None, session_duration=3600, + use_web_identity=False, ) mock_get_docker_image.assert_called_once_with( args, mock_get_instance_config.return_value @@ -1103,6 +1105,7 @@ def test_paasta_spark_run( k8s_server_address=None, tronfig=None, job_id=None, + use_web_identity=False, ) mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = ( {} @@ -1139,6 +1142,7 @@ def test_paasta_spark_run( profile_name=None, assume_aws_role_arn=None, session_duration=3600, + use_web_identity=False, ) mock_get_docker_image.assert_called_once_with( args, mock_get_instance_config.return_value @@ -1227,6 +1231,7 @@ def test_paasta_spark_run_pyspark( k8s_server_address=None, tronfig=None, job_id=None, + use_web_identity=False, ) mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = ( {} @@ -1265,6 +1270,7 @@ def test_paasta_spark_run_pyspark( profile_name=None, assume_aws_role_arn=None, session_duration=3600, + use_web_identity=False, ) mock_get_docker_image.assert_called_once_with( args, mock_get_instance_config.return_value From 98b1217f925a3a6f3fedbb68d569baed428afad5 Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Tue, 23 Apr 2024 10:56:25 -0700 Subject: [PATCH 6/7] Add test for pod identity --- tests/cli/test_cmds_spark_run.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/cli/test_cmds_spark_run.py b/tests/cli/test_cmds_spark_run.py index a66b9b1824..17e5dd5aac 100644 --- a/tests/cli/test_cmds_spark_run.py +++ b/tests/cli/test_cmds_spark_run.py @@ -1419,3 +1419,34 @@ def test_build_and_push_docker_image_unexpected_output_format( with pytest.raises(ValueError) as e: build_and_push_docker_image(args) assert "Could not determine digest from output" in str(e.value) + + +def test_get_aws_credentials(): + import os + from service_configuration_lib.spark_config import get_aws_credentials + + with mock.patch.dict( + os.environ, + { + "AWS_WEB_IDENTITY_TOKEN_FILE": "./some-file.txt", + "AWS_ROLE_ARN": "some-role-for-test", + }, + ), mock.patch( + "service_configuration_lib.spark_config.open", + mock.mock_open(read_data="token-content"), + autospec=True, + ), mock.patch( + "service_configuration_lib.spark_config.boto3.client", + autospec=True, + ) as boto3_client: + get_aws_credentials( + service="some-service", + use_web_identity=True, + ) + boto3_client.assert_called_once_with("sts") + boto3_client.return_value.assume_role_with_web_identity.assert_called_once_with( + DurationSeconds=3600, + RoleArn="some-role-for-test", + RoleSessionName=mock.ANY, + WebIdentityToken="token-content", + ) From 7a7bd0e09a809092092d17f3b25bb2566cc41c27 Mon Sep 17 00:00:00 2001 From: Nurdan Almazbekov Date: Tue, 23 Apr 2024 11:04:16 -0700 Subject: [PATCH 7/7] Fix test for pod identity --- tests/cli/test_cmds_spark_run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cli/test_cmds_spark_run.py b/tests/cli/test_cmds_spark_run.py index 17e5dd5aac..42a5445a68 100644 --- a/tests/cli/test_cmds_spark_run.py +++ b/tests/cli/test_cmds_spark_run.py @@ -1434,10 +1434,10 @@ def test_get_aws_credentials(): ), mock.patch( "service_configuration_lib.spark_config.open", mock.mock_open(read_data="token-content"), - autospec=True, + autospec=False, ), mock.patch( "service_configuration_lib.spark_config.boto3.client", - autospec=True, + autospec=False, ) as boto3_client: get_aws_credentials( service="some-service",