From 7e611f809d89bf6f296c24445c23935d8a95ba0b Mon Sep 17 00:00:00 2001 From: Alexander Braverman Masis Date: Fri, 30 Aug 2024 07:46:26 -0400 Subject: [PATCH] S3 bucket inventories support (#2074) SUMMARY Add S3 inventory support - https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-inventory.html ISSUE TYPE Feature Pull Request COMPONENT NAME S3 Bucket Reviewed-by: Helen Bailey Reviewed-by: Alexander Braverman Masis Reviewed-by: Mark Chappell Reviewed-by: Alina Buzachis (cherry picked from commit c96d250169247c004ed36e205f9ff7c39f5da5b3) --- .../2074-s3_bucket-inventory-feature.yml | 3 + plugins/module_utils/s3.py | 30 +++ plugins/modules/s3_bucket.py | 240 +++++++++++++++++- tests/integration/targets/s3_bucket/inventory | 1 + .../roles/s3_bucket/tasks/inventory.yml | 143 +++++++++++ 5 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 changelogs/fragments/2074-s3_bucket-inventory-feature.yml create mode 100644 tests/integration/targets/s3_bucket/roles/s3_bucket/tasks/inventory.yml diff --git a/changelogs/fragments/2074-s3_bucket-inventory-feature.yml b/changelogs/fragments/2074-s3_bucket-inventory-feature.yml new file mode 100644 index 00000000000..d326e0c4560 --- /dev/null +++ b/changelogs/fragments/2074-s3_bucket-inventory-feature.yml @@ -0,0 +1,3 @@ +--- +minor_changes: + - s3_bucket - Add support for bucket inventories (https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-inventory.html) diff --git a/plugins/module_utils/s3.py b/plugins/module_utils/s3.py index 961f36f22f0..304803961d2 100644 --- a/plugins/module_utils/s3.py +++ b/plugins/module_utils/s3.py @@ -21,6 +21,8 @@ from ansible.module_utils.basic import to_text +from ansible_collections.amazon.aws.plugins.module_utils.retries import AWSRetry + def s3_head_objects(client, parts, bucket, obj, versionId): args = {"Bucket": bucket, "Key": obj} @@ -151,3 +153,31 @@ def s3_extra_params(options, sigv4=False): config["signature_version"] = "s3v4" extra_params["config"] = config return extra_params + + +@AWSRetry.exponential_backoff(max_delay=120, catch_extra_error_codes=["NoSuchBucket", "OperationAborted"]) +def _list_bucket_inventory_configurations(client, **params): + return client.list_bucket_inventory_configurations(**params) + + +# _list_backup_inventory_configurations is a workaround for a missing paginator for listing +# bucket inventory configuration in boto3: +# https://github.com/boto/botocore/blob/1.34.141/botocore/data/s3/2006-03-01/paginators-1.json +def list_bucket_inventory_configurations(client, bucket_name): + first_iteration = False + next_token = None + + response = _list_bucket_inventory_configurations(client, Bucket=bucket_name) + next_token = response.get("NextToken", None) + + if next_token is None: + return response.get("InventoryConfigurationList", []) + + entries = [] + while next_token is not None: + if first_iteration: + response = _list_bucket_inventory_configurations(client, NextToken=next_token, Bucket=bucket_name) + first_iteration = True + entries.extend(response["InventoryConfigurationList"]) + next_token = response.get("NextToken") + return entries diff --git a/plugins/modules/s3_bucket.py b/plugins/modules/s3_bucket.py index 764e90fc1e2..ffd9b6c1057 100644 --- a/plugins/modules/s3_bucket.py +++ b/plugins/modules/s3_bucket.py @@ -197,7 +197,63 @@ type: int type: dict version_added: 8.1.0 - + inventory: + description: + - Enable S3 Inventory, saving list of the objects and their corresponding + metadata on a daily or weekly basis for an S3 bucket. + type: list + elements: dict + suboptions: + destination: + description: Contains information about where to publish the inventory results. + type: dict + required: True + suboptions: + account_id: + description: The account ID that owns the destination S3 bucket. If no account ID is provided, the owner is not validated before exporting data. + type: str + bucket: + description: The Amazon Resource Name (ARN) of the bucket where inventory results will be published. + type: str + required: True + format: + description: Specifies the output format of the inventory results. + type: str + choices: [ 'CSV', 'ORC', 'Parquet' ] + required: True + prefix: + description: The prefix that is prepended to all inventory results. + type: str + filter: + description: The prefix that an object must have to be included in the inventory results. + type: str + id: + description: The ID used to identify the inventory configuration. + type: str + required: True + schedule: + description: Specifies the schedule for generating inventory results. + type: str + choices: [ 'Daily', 'Weekly' ] + required: True + included_object_versions: + description: | + Object versions to include in the inventory list. If set to All, the list includes all the object versions, + which adds the version-related fields VersionId, IsLatest, and DeleteMarker to the list. If set to Current, + the list does not contain these version-related fields. + type: str + required: True + choices: [ 'All', 'Current' ] + optional_fields: + description: Contains the optional fields that are included in the inventory results. + type: list + elements: str + choices: [ "Size", "LastModifiedDate", "StorageClass", "ETag", + "IsMultipartUploaded", "ReplicationStatus", "EncryptionStatus", + "ObjectLockRetainUntilDate", "ObjectLockMode", + "ObjectLockLegalHoldStatus", "IntelligentTieringAccessTier", + "BucketKeyStatus", "ChecksumAlgorithm", "ObjectAccessControlList", + "ObjectOwner" ] extends_documentation_fragment: - amazon.aws.common.modules - amazon.aws.region.modules @@ -332,6 +388,18 @@ object_lock_default_retention: mode: governance days: 1 +# Bucket with inventory configuration: +- amazon.aws.s3_bucket: + name: mys3bucket + state: present + inventory: + - id: mys3bucket-inventory-id + destination: + bucket: "arn:aws:s3:::mys3inventorybucket" + optional_fields: + - "Size" + included_object_versions: "All" + schedule: "Weekly" """ RETURN = r""" @@ -451,6 +519,27 @@ type: bool returned: O(state=present) sample: true +bucket_inventory: + description: S3 bucket inventory configuration. + type: list + returned: when O(state=present) + sample: [ + { + "IsEnabled": true, + "Id": "9c2a337ba5fd64de777f499441f83093-inventory-target", + "Destination": { + "S3BucketDestination": { + "Bucket": "arn:aws:s3:::9c2a337ba5fd64de777f499441f83093-inventory-target", + "Format": "CSV" + } + }, + "IncludedObjectVersions": "All", + "Schedule": { + "Frequency": "Daily" + }, + "OptionalFields": [] + } + ] """ import json @@ -472,6 +561,7 @@ from ansible_collections.amazon.aws.plugins.module_utils.modules import AnsibleAWSModule from ansible_collections.amazon.aws.plugins.module_utils.policy import compare_policies from ansible_collections.amazon.aws.plugins.module_utils.retries import AWSRetry +from ansible_collections.amazon.aws.plugins.module_utils.s3 import list_bucket_inventory_configurations from ansible_collections.amazon.aws.plugins.module_utils.s3 import s3_extra_params from ansible_collections.amazon.aws.plugins.module_utils.s3 import validate_bucket_name from ansible_collections.amazon.aws.plugins.module_utils.tagging import ansible_dict_to_boto3_tag_list @@ -1055,6 +1145,79 @@ def handle_bucket_object_lock_retention(s3_client, module: AnsibleAWSModule, nam return object_lock_default_retention_changed, object_lock_default_retention_result +def handle_bucket_inventory(s3_client, module: AnsibleAWSModule, name: str) -> tuple[bool, dict]: + """ + Manage inventory configuration for an S3 bucket. + Parameters: + s3_client (boto3.client): The Boto3 S3 client object. + module (AnsibleAWSModule): The Ansible module object. + name (str): The name of the bucket to handle inventory for. + Returns: + A tuple containing a boolean indicating whether inventory settings were changed + and a dictionary containing the updated inventory. + """ + declared_inventories = module.params.get("inventory") + results = [] + bucket_changed = False + + if declared_inventories is None: + return (False, results) + + try: + present_inventories = {i["Id"]: i for i in list_bucket_inventory_configurations(s3_client, name)} + except is_boto3_error_code(["NotImplemented", "XNotImplemented"]) as e: + if declared_inventories is not None: + module.fail_json_aws(e, msg="Fetching bucket inventories is not supported") + except is_boto3_error_code("AccessDenied") as e: # pylint: disable=duplicate-except + if declared_inventories is not None: + module.fail_json_aws(e, msg="Permission denied fetching bucket inventories") + except ( + botocore.exceptions.BotoCoreError, + botocore.exceptions.ClientError, + ) as e: # pylint: disable=duplicate-except + module.fail_json_aws(e, msg="Failed to fetch bucket inventories") + + for declared_inventory in declared_inventories: + camel_destination = snake_dict_to_camel_dict(declared_inventory.get("destination", {}), True) + declared_inventory_api = { + "IsEnabled": True, + "Id": declared_inventory.get("id"), + "Destination": {"S3BucketDestination": {k: v for k, v in camel_destination.items() if v is not None}}, + "IncludedObjectVersions": declared_inventory.get("included_object_versions"), + "Schedule": {"Frequency": declared_inventory.get("schedule")}, + "OptionalFields": [], + } + for field in declared_inventory.get("optional_fields", []): + declared_inventory_api["OptionalFields"].append(field) + if declared_inventory.get("filter") is not None: + declared_inventory_api["Filter"] = {"Prefix": declared_inventory.get("filter")} + + present_inventory = present_inventories.pop(declared_inventory_api["Id"], None) + + if declared_inventory_api != present_inventory: + try: + put_bucket_inventory(s3_client, name, declared_inventory_api) + bucket_changed = True + except is_boto3_error_code("InvalidS3DestinationBucket") as e: + module.fail_json_aws(e, msg="Invalid destination bucket ARN") + except ( + botocore.exceptions.BotoCoreError, + botocore.exceptions.ClientError, + ) as e: # pylint: disable=duplicate-except + module.fail_json_aws(e, msg="Failed to set bucket inventory setting") + + results.append(declared_inventory_api) + + for id in present_inventories.keys(): + try: + delete_bucket_inventory(s3_client, name, id) + except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: + module.fail_json_aws(e, msg="Failed to delete bucket inventory") + bucket_changed = True + + return bucket_changed, results + + def create_or_update_bucket(s3_client, module: AnsibleAWSModule): """ Create or update an S3 bucket along with its associated configurations. @@ -1140,6 +1303,9 @@ def create_or_update_bucket(s3_client, module: AnsibleAWSModule): s3_client, module, name ) result["object_lock_default_retention"] = bucket_object_lock_retention_result + # -- Inventory + bucket_inventory_changed, bucket_inventory_result = handle_bucket_inventory(s3_client, module, name) + result["bucket_inventory"] = bucket_inventory_result # Module exit changed = ( @@ -1154,6 +1320,7 @@ def create_or_update_bucket(s3_client, module: AnsibleAWSModule): or bucket_acl_changed or bucket_accelerate_changed or bucket_object_lock_retention_changed + or bucket_inventory_changed ) module.exit_json(changed=changed, name=name, **result) @@ -1279,6 +1446,22 @@ def get_bucket_accelerate_status(s3_client, bucket_name) -> bool: return accelerate_configuration.get("Status") == "Enabled" +@AWSRetry.exponential_backoff(max_delay=120, catch_extra_error_codes=["NoSuchBucket", "OperationAborted"]) +def put_bucket_inventory(s3_client, bucket_name: str, inventory: dict) -> None: + """ + Set inventory settings for an S3 bucket. + Parameters: + s3_client (boto3.client): The Boto3 S3 client object. + bucket_name (str): The name of the S3 bucket. + tags (dict): A dictionary containing the inventory settings to be set on the bucket. + Returns: + None + """ + s3_client.put_bucket_inventory_configuration( + Bucket=bucket_name, InventoryConfiguration=inventory, Id=inventory.get("Id") + ) + + @AWSRetry.exponential_backoff(max_delay=120, catch_extra_error_codes=["NoSuchBucket", "OperationAborted"]) def put_bucket_tagging(s3_client, bucket_name: str, tags: dict): """ @@ -1293,6 +1476,20 @@ def put_bucket_tagging(s3_client, bucket_name: str, tags: dict): s3_client.put_bucket_tagging(Bucket=bucket_name, Tagging={"TagSet": ansible_dict_to_boto3_tag_list(tags)}) +@AWSRetry.exponential_backoff(max_delay=120, catch_extra_error_codes=["NoSuchBucket", "OperationAborted"]) +def delete_bucket_inventory(s3_client, bucket_name: str, id: str) -> None: + """ + Delete the inventory settings for an S3 bucket. + Parameters: + s3_client (boto3.client): The Boto3 S3 client object. + bucket_name (str): The name of the S3 bucket. + id (str): The ID used to identify the inventory configuration + Returns: + None + """ + s3_client.delete_bucket_inventory_configuration(Bucket=bucket_name, Id=id) + + @AWSRetry.exponential_backoff(max_delay=120, catch_extra_error_codes=["NoSuchBucket", "OperationAborted"]) def put_bucket_policy(s3_client, bucket_name: str, policy: dict): """ @@ -2054,6 +2251,47 @@ def main(): mutually_exclusive=[("days", "years")], required_one_of=[("days", "years")], ), + inventory=dict( + type="list", + elements="dict", + options=dict( + destination=dict( + type="dict", + options=dict( + account_id=dict(type="str"), + bucket=dict(type="str", required=True), + format=dict(type="str", choices=["CSV", "ORC", "Parquet"], required=True), + prefix=dict(type="str"), + ), + required=True, + ), + filter=dict(type="str"), + optional_fields=dict( + type="list", + elements="str", + choices=[ + "Size", + "LastModifiedDate", + "StorageClass", + "ETag", + "IsMultipartUploaded", + "ReplicationStatus", + "EncryptionStatus", + "ObjectLockRetainUntilDate", + "ObjectLockMode", + "ObjectLockLegalHoldStatus", + "IntelligentTieringAccessTier", + "BucketKeyStatus", + "ChecksumAlgorithm", + "ObjectAccessControlList", + "ObjectOwner", + ], + ), + id=dict(type="str", required=True), + schedule=dict(type="str", choices=["Daily", "Weekly"], required=True), + included_object_versions=dict(type="str", choices=["All", "Current"], required=True), + ), + ), ) required_by = dict( diff --git a/tests/integration/targets/s3_bucket/inventory b/tests/integration/targets/s3_bucket/inventory index 30c203beac2..a4a5fdaac47 100644 --- a/tests/integration/targets/s3_bucket/inventory +++ b/tests/integration/targets/s3_bucket/inventory @@ -13,6 +13,7 @@ acl object_lock accelerate default_retention +inventory [all:vars] ansible_connection=local diff --git a/tests/integration/targets/s3_bucket/roles/s3_bucket/tasks/inventory.yml b/tests/integration/targets/s3_bucket/roles/s3_bucket/tasks/inventory.yml new file mode 100644 index 00000000000..4c7df7c105c --- /dev/null +++ b/tests/integration/targets/s3_bucket/roles/s3_bucket/tasks/inventory.yml @@ -0,0 +1,143 @@ +--- +- module_defaults: + group/aws: + access_key: "{{ aws_access_key }}" + secret_key: "{{ aws_secret_key }}" + session_token: "{{ security_token | default(omit) }}" + region: "{{ aws_region }}" + block: + - ansible.builtin.set_fact: + local_bucket_name: "{{ bucket_name | hash('md5')}}-inventory-source" + local_dest_bucket_name: "{{ bucket_name | hash('md5')}}-inventory-target" + - name: Create a simple bucket to be inventory destination + amazon.aws.s3_bucket: + name: "{{ local_dest_bucket_name }}" + state: present + register: output + + - ansible.builtin.assert: + that: + - output.changed + + - name: Create a simple bucket with minimal inventory configuration + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: present + inventory: + - id: "{{ local_dest_bucket_name }}" + destination: + bucket: "arn:aws:s3:::{{ local_dest_bucket_name }}" + format: "CSV" + optional_fields: + - "Size" + included_object_versions: "All" + schedule: "Weekly" + register: output + + - ansible.builtin.assert: + that: + - output.changed + - output.bucket_inventory + + - name: Re-configure inventory configuration + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: present + inventory: + - id: "{{ local_dest_bucket_name }}" + destination: + bucket: "arn:aws:s3:::{{ local_dest_bucket_name }}" + format: "CSV" + optional_fields: + - "Size" + schedule: "Daily" + included_object_versions: "All" + register: output + + - ansible.builtin.assert: + that: + - output.changed + - output.bucket_inventory + + - name: Re-configure inventory configuration (idempotency) + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: present + inventory: + - id: "{{ local_dest_bucket_name }}" + destination: + bucket: "arn:aws:s3:::{{ local_dest_bucket_name }}" + format: "CSV" + optional_fields: + - "Size" + schedule: "Daily" + included_object_versions: "All" + register: output + + - ansible.builtin.assert: + that: + - output is not changed + - output.bucket_inventory + + + - name: Delete inventory configuration + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: present + inventory: [] + register: output + + - ansible.builtin.assert: + that: + - output is changed + - not output.bucket_inventory|bool + + - name: Delete inventory configuration (idempotency) + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: present + inventory: [] + register: output + + - ansible.builtin.assert: + that: + - output is not changed + - not output.bucket_inventory|bool + + # ============================================================ + + - name: Delete testing s3 bucket + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: absent + register: output + + - ansible.builtin.assert: + that: + - output.changed + + + - name: Delete testing inventory s3 bucket + amazon.aws.s3_bucket: + name: "{{ local_dest_bucket_name }}" + state: absent + register: output + + - ansible.builtin.assert: + that: + - output.changed + + + # ============================================================ + always: + - name: Ensure all buckets are deleted + amazon.aws.s3_bucket: + name: "{{ local_bucket_name }}" + state: absent + ignore_errors: true + + - name: Ensure all buckets are deleted + amazon.aws.s3_bucket: + name: "{{ local_dest_bucket_name }}" + state: absent + ignore_errors: true