From 3594dec0f50f1aa2e0cebf8b409b4e6b1e0e6739 Mon Sep 17 00:00:00 2001 From: Bikouo Aubin <79859644+abikouo@users.noreply.github.com> Date: Mon, 3 Jul 2023 16:29:23 +0200 Subject: [PATCH] s3_object - allow recursive copy of all objects in S3 bucket (#1608) s3_object - allow recursive copy of all objects in S3 bucket SUMMARY Add support to copy recursively all objects from one bucket to another one, user can set prefix to limit the object to copy. closes #1379 ISSUE TYPE Feature Pull Request COMPONENT NAME s3_object Reviewed-by: Helen Bailey Reviewed-by: Bikouo Aubin --- ...bject-support-copy-objects-recursively.yml | 2 + plugins/modules/s3_object.py | 244 +++++++++++------- .../s3_object/tasks/copy_recursively.yml | 152 +++++++++++ .../targets/s3_object/tasks/main.yml | 2 + tests/unit/plugins/modules/test_s3_object.py | 10 +- 5 files changed, 308 insertions(+), 102 deletions(-) create mode 100644 changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml create mode 100644 tests/integration/targets/s3_object/tasks/copy_recursively.yml diff --git a/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml b/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml new file mode 100644 index 00000000000..9157ec0d0b0 --- /dev/null +++ b/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml @@ -0,0 +1,2 @@ +minor_changes: +- s3_object - Allow recursive copy of objects in S3 bucket (https://github.com/ansible-collections/amazon.aws/issues/1379). diff --git a/plugins/modules/s3_object.py b/plugins/modules/s3_object.py index 84f2c0c7b5c..8f36df398f0 100644 --- a/plugins/modules/s3_object.py +++ b/plugins/modules/s3_object.py @@ -228,11 +228,19 @@ type: str description: - key name of the source object. - required: true + - if not specified, all the objects of the I(copy_src.bucket) will be copied into the specified bucket. + required: false version_id: type: str description: - version ID of the source object. + prefix: + description: + - Copy all the keys that begin with the specified prefix. + - Ignored if I(copy_src.object) is supplied. + default: "" + type: str + version_added: 6.2.0 validate_bucket_name: description: - Whether the bucket name should be validated to conform to AWS S3 naming rules. @@ -370,6 +378,14 @@ copy_src: bucket: srcbucket object: /source/key.txt + +- name: Copy all the objects with name starting with 'ansible_' + amazon.aws.s3_object: + bucket: mybucket + mode: copy + copy_src: + bucket: srcbucket + prefix: 'ansible_' """ RETURN = r""" @@ -566,7 +582,7 @@ def paginated_versioned_list_with_fallback(s3, **pagination_params): yield [{"Key": key}] -def list_keys(module, s3, bucket, prefix, marker, max_keys): +def list_keys(s3, bucket, prefix=None, marker=None, max_keys=None): pagination_params = { "Bucket": bucket, "Prefix": prefix, @@ -576,8 +592,7 @@ def list_keys(module, s3, bucket, prefix, marker, max_keys): pagination_params = {k: v for k, v in pagination_params.items() if v} try: - keys = list(paginated_list(s3, **pagination_params)) - module.exit_json(msg="LIST operation complete", s3_keys=keys) + return list(paginated_list(s3, **pagination_params)) except ( botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError, @@ -892,78 +907,6 @@ def put_download_url(s3, bucket, obj, expiry): return url -def copy_object_to_bucket(module, s3, bucket, obj, encrypt, metadata, validate, d_etag): - if module.check_mode: - module.exit_json(msg="COPY operation skipped - running in check mode", changed=True) - try: - params = {"Bucket": bucket, "Key": obj} - bucketsrc = { - "Bucket": module.params["copy_src"].get("bucket"), - "Key": module.params["copy_src"].get("object"), - } - version = None - if module.params["copy_src"].get("version_id"): - version = module.params["copy_src"].get("version_id") - bucketsrc.update({"VersionId": version}) - if not key_check( - module, - s3, - bucketsrc["Bucket"], - bucketsrc["Key"], - version=version, - validate=validate, - ): - # Key does not exist in source bucket - module.exit_json( - msg=f"Key {bucketsrc['Key']} does not exist in bucket {bucketsrc['Bucket']}.", - changed=False, - ) - - s_etag = get_etag(s3, bucketsrc["Bucket"], bucketsrc["Key"], version=version) - if s_etag == d_etag: - # Tags - tags, changed = ensure_tags(s3, module, bucket, obj) - if not changed: - module.exit_json( - msg="ETag from source and destination are the same", - changed=False, - ) - else: - module.exit_json( - msg="tags successfully updated.", - changed=changed, - tags=tags, - ) - else: - params.update({"CopySource": bucketsrc}) - params.update( - get_extra_params( - encrypt, - module.params.get("encryption_mode"), - module.params.get("encryption_kms_key_id"), - metadata, - ) - ) - s3.copy_object(aws_retry=True, **params) - put_object_acl(module, s3, bucket, obj) - # Tags - tags, changed = ensure_tags(s3, module, bucket, obj) - module.exit_json( - msg=f"Object copied from bucket {bucketsrc['Bucket']} to bucket {bucket}.", - tags=tags, - changed=True, - ) - except ( - botocore.exceptions.BotoCoreError, - botocore.exceptions.ClientError, - boto3.exceptions.Boto3Error, - ) as e: # pylint: disable=duplicate-except - raise S3ObjectFailure( - f"Failed while copying object {obj} from bucket {module.params['copy_src'].get('Bucket')}.", - e, - ) - - def get_current_object_tags_dict(module, s3, bucket, obj, version=None): try: if version: @@ -1230,8 +1173,7 @@ def s3_object_do_delobj(module, connection, connection_v4, s3_vars): def s3_object_do_list(module, connection, connection_v4, s3_vars): # If the bucket does not exist then bail out - list_keys( - module, + keys = list_keys( connection, s3_vars["bucket"], s3_vars["prefix"], @@ -1239,6 +1181,8 @@ def s3_object_do_list(module, connection, connection_v4, s3_vars): s3_vars["max_keys"], ) + module.exit_json(msg="LIST operation complete", s3_keys=keys) + def s3_object_do_create(module, connection, connection_v4, s3_vars): # if both creating a bucket and putting an object in it, acls for the bucket and/or the object may be specified @@ -1330,23 +1274,132 @@ def s3_object_do_getstr(module, connection, connection_v4, s3_vars): module.fail_json(msg=f"Key {s3_vars['object']} does not exist.") +def check_object_tags(module, connection, bucket, obj): + tags = module.params.get("tags") + purge_tags = module.params.get("purge_tags") + diff = False + if tags: + current_tags_dict = get_current_object_tags_dict(module, connection, bucket, obj) + if not purge_tags: + # Ensure existing tags that aren't updated by desired tags remain + current_tags_dict.update(tags) + diff = current_tags_dict != tags + return diff + + +def copy_object_to_bucket(module, s3, bucket, obj, encrypt, metadata, validate, src_bucket, src_obj, versionId=None): + try: + params = {"Bucket": bucket, "Key": obj} + if not key_check(module, s3, src_bucket, src_obj, version=versionId, validate=validate): + # Key does not exist in source bucket + module.exit_json( + msg=f"Key {src_obj} does not exist in bucket {src_bucket}.", + changed=False, + ) + + s_etag = get_etag(s3, src_bucket, src_obj, version=versionId) + d_etag = get_etag(s3, bucket, obj) + if s_etag == d_etag: + if module.check_mode: + changed = check_object_tags(module, s3, bucket, obj) + result = {} + if changed: + result.update({"msg": "Would have update object tags is not running in check mode."}) + return changed, result + + # Ensure tags + tags, changed = ensure_tags(s3, module, bucket, obj) + result = {"msg": "ETag from source and destination are the same"} + if changed: + result = {"msg": "tags successfully updated.", "tags": tags} + return changed, result + elif module.check_mode: + return True, {"msg": "ETag from source and destination differ"} + else: + changed = True + bucketsrc = { + "Bucket": src_bucket, + "Key": src_obj, + } + if versionId: + bucketsrc.update({"VersionId": versionId}) + params.update({"CopySource": bucketsrc}) + params.update( + get_extra_params( + encrypt, + module.params.get("encryption_mode"), + module.params.get("encryption_kms_key_id"), + metadata, + ) + ) + s3.copy_object(aws_retry=True, **params) + put_object_acl(module, s3, bucket, obj) + # Tags + tags, tags_updated = ensure_tags(s3, module, bucket, obj) + msg = f"Object copied from bucket {bucketsrc['Bucket']} to bucket {bucket}." + return changed, {"msg": msg, "tags": tags} + except ( + botocore.exceptions.BotoCoreError, + botocore.exceptions.ClientError, + boto3.exceptions.Boto3Error, + ) as e: # pylint: disable=duplicate-except + raise S3ObjectFailure( + f"Failed while copying object {obj} from bucket {module.params['copy_src'].get('Bucket')}.", + e, + ) + + def s3_object_do_copy(module, connection, connection_v4, s3_vars): - # if copying an object in a bucket yet to be created, acls for the bucket and/or the object may be specified - # these were separated into the variables bucket_acl and object_acl above - d_etag = get_etag(connection, s3_vars["bucket"], s3_vars["object"]) - if not s3_vars["acl_disabled"]: - # only use valid object acls for the copy operation - s3_vars["permission"] = s3_vars["object_acl"] - copy_object_to_bucket( - module, - connection, - s3_vars["bucket"], - s3_vars["object"], - s3_vars["encrypt"], - s3_vars["metadata"], - s3_vars["validate"], - d_etag, - ) + copy_src = module.params.get("copy_src") + if not copy_src.get("object") and s3_vars["object"]: + module.fail_json( + msg="A destination object was specified while trying to copy all the objects from the source bucket." + ) + src_bucket = copy_src.get("bucket") + if not copy_src.get("object"): + # copy recursively object(s) from source bucket to destination bucket + # list all the objects from the source bucket + keys = list_keys(connection, src_bucket, copy_src.get("prefix")) + if len(keys) == 0: + module.exit_json(msg=f"No object found to be copied from source bucket {src_bucket}.") + + changed = False + number_keys_updated = 0 + for key in keys: + updated, result = copy_object_to_bucket( + module, + connection, + s3_vars["bucket"], + key, + s3_vars["encrypt"], + s3_vars["metadata"], + s3_vars["validate"], + src_bucket, + key, + versionId=copy_src.get("version_id"), + ) + changed |= updated + number_keys_updated += 1 if updated else 0 + + msg = "object(s) from buckets '{0}' and '{1}' are the same.".format(src_bucket, s3_vars["bucket"]) + if number_keys_updated: + msg = "{0} copied into bucket '{1}'".format(number_keys_updated, s3_vars["bucket"]) + module.exit_json(changed=changed, msg=msg) + else: + # copy single object from source bucket into destination bucket + changed, result = copy_object_to_bucket( + module, + connection, + s3_vars["bucket"], + s3_vars["object"], + s3_vars["encrypt"], + s3_vars["metadata"], + s3_vars["validate"], + src_bucket, + copy_src.get("object"), + versionId=copy_src.get("version_id"), + ) + module.exit_json(changed=changed, **result) def populate_params(module): @@ -1459,7 +1512,8 @@ def main(): type="dict", options=dict( bucket=dict(required=True), - object=dict(required=True), + object=dict(), + prefix=dict(default=""), version_id=dict(), ), ), diff --git a/tests/integration/targets/s3_object/tasks/copy_recursively.yml b/tests/integration/targets/s3_object/tasks/copy_recursively.yml new file mode 100644 index 00000000000..1e31020d714 --- /dev/null +++ b/tests/integration/targets/s3_object/tasks/copy_recursively.yml @@ -0,0 +1,152 @@ +- name: Test copy recursively object from one bucket to another one. + block: + - name: Create S3 bucket + s3_bucket: + name: "{{ item }}" + state: present + with_items: + - "{{ bucket_src }}" + - "{{ bucket_dst }}" + + - name: Create object into bucket + s3_object: + bucket: "{{ bucket_src }}" + mode: put + content: "{{ item.content }}" + object: "{{ item.object }}" + with_items: "{{ s3_objects }}" + + - name: Copy all objects from source bucket into destination bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + check_mode: true + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure no object were found into bucket + assert: + that: + - _objects.s3_keys | length == 0 + + # Test: Copy all objects using prefix + - name: copy object using prefix + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + prefix: "file" + register: _copy_with_prefix + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure objects with prefix 'file' were copied into bucket + assert: + that: + - _copy_with_prefix is changed + - _objects.s3_keys | length == 3 + - '"file1.txt" in _objects.s3_keys' + - '"file2.txt" in _objects.s3_keys' + - '"file3.txt" in _objects.s3_keys' + + # Test: Copy all objects using prefix (idempotency) + - name: copy object using prefix (idempotency) + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + prefix: "file" + register: _copy_with_prefix_idempotency + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure objects with prefix 'file' were copied into bucket + assert: + that: + - _copy_with_prefix_idempotency is not changed + - _objects.s3_keys | length == 3 + - '"file1.txt" in _objects.s3_keys' + - '"file2.txt" in _objects.s3_keys' + - '"file3.txt" in _objects.s3_keys' + + # Test: Copy all objects from source bucket + - name: copy all objects from source bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + register: _copy_all + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure all objects were copied into bucket + assert: + that: + - _copy_all is changed + - _objects.s3_keys | length == 5 + + # Test: Copy all objects from source bucket (idempotency) + - name: copy all objects from source bucket (idempotency) + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + register: _copy_all_idempotency + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure number of copied objects remains the same. + assert: + that: + - _copy_all_idempotency is not changed + - _objects.s3_keys | length == 5 + + vars: + bucket_src: "{{ bucket_name }}-recursive-src" + bucket_dst: "{{ bucket_name }}-recursive-dst" + s3_objects: + - object: file1.txt + content: | + some content for file1.txt + - object: file2.txt + content: | + some content for file2.txt + - object: file3.txt + content: | + some content for file3.txt + - object: testfile.py + content: "This is a sample text file" + - object: another.txt + content: "another file to create into bucket" + + always: + - include_tasks: delete_bucket.yml + with_items: + - "{{ bucket_src }}" + - "{{ bucket_dst }}" diff --git a/tests/integration/targets/s3_object/tasks/main.yml b/tests/integration/targets/s3_object/tasks/main.yml index 5603ddc1ec1..c5006322545 100644 --- a/tests/integration/targets/s3_object/tasks/main.yml +++ b/tests/integration/targets/s3_object/tasks/main.yml @@ -1039,6 +1039,8 @@ - "'tags' in result" - (result.tags | length) == 0 + - include_tasks: copy_recursively.yml + always: - name: delete temporary files diff --git a/tests/unit/plugins/modules/test_s3_object.py b/tests/unit/plugins/modules/test_s3_object.py index c8d3fc4fcf3..863001335a3 100644 --- a/tests/unit/plugins/modules/test_s3_object.py +++ b/tests/unit/plugins/modules/test_s3_object.py @@ -17,26 +17,22 @@ @patch(module_name + ".paginated_list") def test_list_keys_success(m_paginated_list): - module = MagicMock() s3 = MagicMock() m_paginated_list.return_value = ["delete.txt"] - s3_object.list_keys(module, s3, "a987e6b6026ab04e4717", "", "", 1000) - - assert m_paginated_list.call_count == 1 - module.exit_json.assert_called_with(msg="LIST operation complete", s3_keys=["delete.txt"]) + assert ["delete.txt"] == s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000) + m_paginated_list.assert_called_once() @patch(module_name + ".paginated_list") def test_list_keys_failure(m_paginated_list): - module = MagicMock() s3 = MagicMock() m_paginated_list.side_effect = botocore.exceptions.BotoCoreError with pytest.raises(s3_object.S3ObjectFailure): - s3_object.list_keys(module, s3, "a987e6b6026ab04e4717", "", "", 1000) + s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000) @patch(module_name + ".delete_key")