Skip to content

Commit

Permalink
Merge pull request #3828 from Yelp/u/nalma/spark_run_pod_identity
Browse files Browse the repository at this point in the history
Support pod identity in `spark-run`
  • Loading branch information
nurdann authored Apr 29, 2024
2 parents 6b97632 + 7a7bd0e commit c70d69c
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 6 deletions.
18 changes: 17 additions & 1 deletion paasta_tools/cli/cmds/spark_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,10 @@ def add_subparser(subparsers):

aws_group.add_argument(
"--assume-aws-role",
help="Takes an AWS IAM role ARN and attempts to create a session",
help=(
"Takes an AWS IAM role ARN and attempts to create a session using "
"spark_role_assumer"
),
)

aws_group.add_argument(
Expand All @@ -386,6 +389,18 @@ def add_subparser(subparsers):
default=43200,
)

aws_group.add_argument(
"--use-web-identity",
help=(
"If the current environment contains AWS_ROLE_ARN and "
"AWS_WEB_IDENTITY_TOKEN_FILE, creates a session to use. These "
"ENV vars must be present, and will be in the context of a pod-"
"identity enabled pod."
),
action="store_true",
default=False,
)

jupyter_group = list_parser.add_argument_group(
title="Jupyter kernel culling options",
description="Idle kernels will be culled by default. Idle "
Expand Down Expand Up @@ -1172,6 +1187,7 @@ def paasta_spark_run(args: argparse.Namespace) -> int:
profile_name=args.aws_profile,
assume_aws_role_arn=args.assume_aws_role,
session_duration=args.aws_role_duration,
use_web_identity=args.use_web_identity,
)
docker_image_digest = get_docker_image(args, instance_config)
if docker_image_digest is None:
Expand Down
2 changes: 1 addition & 1 deletion requirements-minimal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ requests-cache >= 0.4.10
retry
ruamel.yaml
sensu-plugin
service-configuration-lib >= 2.18.15
service-configuration-lib >= 2.18.17
signalfx
slackclient >= 1.2.1
sticht >= 1.1.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ rsa==4.7.2
ruamel.yaml==0.15.96
s3transfer==0.10.0
sensu-plugin==0.3.1
service-configuration-lib==2.18.15
service-configuration-lib==2.18.17
setuptools==39.0.1
signalfx==1.0.17
simplejson==3.10.0
Expand Down
40 changes: 37 additions & 3 deletions tests/cli/test_cmds_spark_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,6 @@ def test_paasta_spark_run_bash(
cluster="test-cluster",
pool="test-pool",
yelpsoa_config_root="/path/to/soa",
no_aws_credentials=False,
aws_credentials_yaml="/path/to/creds",
aws_profile=None,
spark_args="spark.cores.max=100 spark.executor.cores=10",
Expand All @@ -980,6 +979,7 @@ def test_paasta_spark_run_bash(
k8s_server_address=None,
tronfig=None,
job_id=None,
use_web_identity=False,
)
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
{}
Expand Down Expand Up @@ -1013,6 +1013,7 @@ def test_paasta_spark_run_bash(
profile_name=None,
assume_aws_role_arn=None,
session_duration=3600,
use_web_identity=False,
)
mock_get_docker_image.assert_called_once_with(
args, mock_get_instance_config.return_value
Expand Down Expand Up @@ -1091,7 +1092,6 @@ def test_paasta_spark_run(
cluster="test-cluster",
pool="test-pool",
yelpsoa_config_root="/path/to/soa",
no_aws_credentials=False,
aws_credentials_yaml="/path/to/creds",
aws_profile=None,
spark_args="spark.cores.max=100 spark.executor.cores=10",
Expand All @@ -1105,6 +1105,7 @@ def test_paasta_spark_run(
k8s_server_address=None,
tronfig=None,
job_id=None,
use_web_identity=False,
)
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
{}
Expand Down Expand Up @@ -1141,6 +1142,7 @@ def test_paasta_spark_run(
profile_name=None,
assume_aws_role_arn=None,
session_duration=3600,
use_web_identity=False,
)
mock_get_docker_image.assert_called_once_with(
args, mock_get_instance_config.return_value
Expand Down Expand Up @@ -1216,7 +1218,6 @@ def test_paasta_spark_run_pyspark(
cluster="test-cluster",
pool="test-pool",
yelpsoa_config_root="/path/to/soa",
no_aws_credentials=False,
aws_credentials_yaml="/path/to/creds",
aws_profile=None,
spark_args="spark.cores.max=70 spark.executor.cores=10",
Expand All @@ -1230,6 +1231,7 @@ def test_paasta_spark_run_pyspark(
k8s_server_address=None,
tronfig=None,
job_id=None,
use_web_identity=False,
)
mock_load_system_paasta_config_utils.return_value.get_kube_clusters.return_value = (
{}
Expand Down Expand Up @@ -1268,6 +1270,7 @@ def test_paasta_spark_run_pyspark(
profile_name=None,
assume_aws_role_arn=None,
session_duration=3600,
use_web_identity=False,
)
mock_get_docker_image.assert_called_once_with(
args, mock_get_instance_config.return_value
Expand Down Expand Up @@ -1416,3 +1419,34 @@ def test_build_and_push_docker_image_unexpected_output_format(
with pytest.raises(ValueError) as e:
build_and_push_docker_image(args)
assert "Could not determine digest from output" in str(e.value)


def test_get_aws_credentials():
import os
from service_configuration_lib.spark_config import get_aws_credentials

with mock.patch.dict(
os.environ,
{
"AWS_WEB_IDENTITY_TOKEN_FILE": "./some-file.txt",
"AWS_ROLE_ARN": "some-role-for-test",
},
), mock.patch(
"service_configuration_lib.spark_config.open",
mock.mock_open(read_data="token-content"),
autospec=False,
), mock.patch(
"service_configuration_lib.spark_config.boto3.client",
autospec=False,
) as boto3_client:
get_aws_credentials(
service="some-service",
use_web_identity=True,
)
boto3_client.assert_called_once_with("sts")
boto3_client.return_value.assume_role_with_web_identity.assert_called_once_with(
DurationSeconds=3600,
RoleArn="some-role-for-test",
RoleSessionName=mock.ANY,
WebIdentityToken="token-content",
)

0 comments on commit c70d69c

Please sign in to comment.