From 286efbf3f18683317fe33ac6ceaad49b87ed62f1 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Mon, 26 Aug 2024 15:19:10 +0100 Subject: [PATCH] check bucket encryption type: key|alias (#1499) ### Feature or Bugfix - Bugfix ### Detail - `get_bucket_encryption` returns also type: key|alias - if type `alias` we don't need to get `key_id` - if type `key` we check `key_id` ### Relates - #1410 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. Co-authored-by: Sofia Sazonova --- .../modules/s3_datasets/aws/s3_dataset_client.py | 10 +++++++--- .../modules/s3_datasets/services/dataset_service.py | 13 ++++++++----- tests/modules/s3_datasets/test_dataset.py | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/aws/s3_dataset_client.py b/backend/dataall/modules/s3_datasets/aws/s3_dataset_client.py index b6331c989..56bab7f7c 100644 --- a/backend/dataall/modules/s3_datasets/aws/s3_dataset_client.py +++ b/backend/dataall/modules/s3_datasets/aws/s3_dataset_client.py @@ -47,7 +47,7 @@ def get_file_upload_presigned_url(self, data): except ClientError as e: raise e - def get_bucket_encryption(self) -> (str, str): + def get_bucket_encryption(self) -> (str, str, str): dataset = self._dataset try: response = self._client.get_bucket_encryption( @@ -56,9 +56,13 @@ def get_bucket_encryption(self) -> (str, str): rule = response['ServerSideEncryptionConfiguration']['Rules'][0] encryption = rule['ApplyServerSideEncryptionByDefault'] s3_encryption = encryption['SSEAlgorithm'] - kms_id = encryption.get('KMSMasterKeyID').split('/')[-1] if encryption.get('KMSMasterKeyID') else None + # Format (using key id): arn:aws:kms:::key/ + # (using alias): arn:aws:kms:::alias/ + kms_key = encryption.get('KMSMasterKeyID') + kms_id = kms_key.split('/')[-1] if kms_key else None + kms_id_type = 'alias' if 'alias' in kms_key else 'key' - return s3_encryption, kms_id + return s3_encryption, kms_id_type, kms_id except ClientError as e: if e.response['Error']['Code'] == 'AccessDenied': diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 27febeb3a..51750d6fb 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -121,7 +121,7 @@ def check_imported_resources(dataset: S3Dataset): ) kms_alias = dataset.KmsAlias - s3_encryption, kms_id = S3DatasetClient(dataset).get_bucket_encryption() + s3_encryption, kms_id_type, kms_id = S3DatasetClient(dataset).get_bucket_encryption() if kms_alias not in [None, 'Undefined', '', 'SSE-S3']: # user-defined KMS encryption if s3_encryption == 'AES256': raise exceptions.InvalidInput( @@ -139,11 +139,14 @@ def check_imported_resources(dataset: S3Dataset): message=f'KMS key with alias={kms_alias} cannot be found - Please check if KMS Key Alias exists in account {dataset.AwsAccountId}', ) - key_id = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region).get_key_id( - key_alias=f'alias/{kms_alias}' - ) + key_matches = kms_id == kms_alias + if kms_id_type == 'key': + key_id = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region).get_key_id( + key_alias=f'alias/{kms_alias}' + ) + key_matches = key_id == kms_id - if key_id != kms_id: + if not key_matches: raise exceptions.InvalidInput( param_name='KmsAlias', param_value=dataset.KmsAlias, diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index ee6508a8e..c2e55fe6c 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -23,7 +23,7 @@ def mock_s3_client(module_mocker): s3_client = MagicMock() module_mocker.patch('dataall.modules.s3_datasets.services.dataset_service.S3DatasetClient', s3_client) - s3_client().get_bucket_encryption.return_value = ('aws:kms', mocked_key_id) + s3_client().get_bucket_encryption.return_value = ('aws:kms', 'key', mocked_key_id) yield s3_client