diff --git a/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml b/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml new file mode 100644 index 00000000000..9157ec0d0b0 --- /dev/null +++ b/changelogs/fragments/20230612-s3_object-support-copy-objects-recursively.yml @@ -0,0 +1,2 @@ +minor_changes: +- s3_object - Allow recursive copy of objects in S3 bucket (https://github.com/ansible-collections/amazon.aws/issues/1379). diff --git a/plugins/modules/s3_object.py b/plugins/modules/s3_object.py index 84f2c0c7b5c..8f36df398f0 100644 --- a/plugins/modules/s3_object.py +++ b/plugins/modules/s3_object.py @@ -228,11 +228,19 @@ type: str description: - key name of the source object. - required: true + - if not specified, all the objects of the I(copy_src.bucket) will be copied into the specified bucket. + required: false version_id: type: str description: - version ID of the source object. + prefix: + description: + - Copy all the keys that begin with the specified prefix. + - Ignored if I(copy_src.object) is supplied. + default: "" + type: str + version_added: 6.2.0 validate_bucket_name: description: - Whether the bucket name should be validated to conform to AWS S3 naming rules. @@ -370,6 +378,14 @@ copy_src: bucket: srcbucket object: /source/key.txt + +- name: Copy all the objects with name starting with 'ansible_' + amazon.aws.s3_object: + bucket: mybucket + mode: copy + copy_src: + bucket: srcbucket + prefix: 'ansible_' """ RETURN = r""" @@ -566,7 +582,7 @@ def paginated_versioned_list_with_fallback(s3, **pagination_params): yield [{"Key": key}] -def list_keys(module, s3, bucket, prefix, marker, max_keys): +def list_keys(s3, bucket, prefix=None, marker=None, max_keys=None): pagination_params = { "Bucket": bucket, "Prefix": prefix, @@ -576,8 +592,7 @@ def list_keys(module, s3, bucket, prefix, marker, max_keys): pagination_params = {k: v for k, v in pagination_params.items() if v} try: - keys = list(paginated_list(s3, **pagination_params)) - module.exit_json(msg="LIST operation complete", s3_keys=keys) + return list(paginated_list(s3, **pagination_params)) except ( botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError, @@ -892,78 +907,6 @@ def put_download_url(s3, bucket, obj, expiry): return url -def copy_object_to_bucket(module, s3, bucket, obj, encrypt, metadata, validate, d_etag): - if module.check_mode: - module.exit_json(msg="COPY operation skipped - running in check mode", changed=True) - try: - params = {"Bucket": bucket, "Key": obj} - bucketsrc = { - "Bucket": module.params["copy_src"].get("bucket"), - "Key": module.params["copy_src"].get("object"), - } - version = None - if module.params["copy_src"].get("version_id"): - version = module.params["copy_src"].get("version_id") - bucketsrc.update({"VersionId": version}) - if not key_check( - module, - s3, - bucketsrc["Bucket"], - bucketsrc["Key"], - version=version, - validate=validate, - ): - # Key does not exist in source bucket - module.exit_json( - msg=f"Key {bucketsrc['Key']} does not exist in bucket {bucketsrc['Bucket']}.", - changed=False, - ) - - s_etag = get_etag(s3, bucketsrc["Bucket"], bucketsrc["Key"], version=version) - if s_etag == d_etag: - # Tags - tags, changed = ensure_tags(s3, module, bucket, obj) - if not changed: - module.exit_json( - msg="ETag from source and destination are the same", - changed=False, - ) - else: - module.exit_json( - msg="tags successfully updated.", - changed=changed, - tags=tags, - ) - else: - params.update({"CopySource": bucketsrc}) - params.update( - get_extra_params( - encrypt, - module.params.get("encryption_mode"), - module.params.get("encryption_kms_key_id"), - metadata, - ) - ) - s3.copy_object(aws_retry=True, **params) - put_object_acl(module, s3, bucket, obj) - # Tags - tags, changed = ensure_tags(s3, module, bucket, obj) - module.exit_json( - msg=f"Object copied from bucket {bucketsrc['Bucket']} to bucket {bucket}.", - tags=tags, - changed=True, - ) - except ( - botocore.exceptions.BotoCoreError, - botocore.exceptions.ClientError, - boto3.exceptions.Boto3Error, - ) as e: # pylint: disable=duplicate-except - raise S3ObjectFailure( - f"Failed while copying object {obj} from bucket {module.params['copy_src'].get('Bucket')}.", - e, - ) - - def get_current_object_tags_dict(module, s3, bucket, obj, version=None): try: if version: @@ -1230,8 +1173,7 @@ def s3_object_do_delobj(module, connection, connection_v4, s3_vars): def s3_object_do_list(module, connection, connection_v4, s3_vars): # If the bucket does not exist then bail out - list_keys( - module, + keys = list_keys( connection, s3_vars["bucket"], s3_vars["prefix"], @@ -1239,6 +1181,8 @@ def s3_object_do_list(module, connection, connection_v4, s3_vars): s3_vars["max_keys"], ) + module.exit_json(msg="LIST operation complete", s3_keys=keys) + def s3_object_do_create(module, connection, connection_v4, s3_vars): # if both creating a bucket and putting an object in it, acls for the bucket and/or the object may be specified @@ -1330,23 +1274,132 @@ def s3_object_do_getstr(module, connection, connection_v4, s3_vars): module.fail_json(msg=f"Key {s3_vars['object']} does not exist.") +def check_object_tags(module, connection, bucket, obj): + tags = module.params.get("tags") + purge_tags = module.params.get("purge_tags") + diff = False + if tags: + current_tags_dict = get_current_object_tags_dict(module, connection, bucket, obj) + if not purge_tags: + # Ensure existing tags that aren't updated by desired tags remain + current_tags_dict.update(tags) + diff = current_tags_dict != tags + return diff + + +def copy_object_to_bucket(module, s3, bucket, obj, encrypt, metadata, validate, src_bucket, src_obj, versionId=None): + try: + params = {"Bucket": bucket, "Key": obj} + if not key_check(module, s3, src_bucket, src_obj, version=versionId, validate=validate): + # Key does not exist in source bucket + module.exit_json( + msg=f"Key {src_obj} does not exist in bucket {src_bucket}.", + changed=False, + ) + + s_etag = get_etag(s3, src_bucket, src_obj, version=versionId) + d_etag = get_etag(s3, bucket, obj) + if s_etag == d_etag: + if module.check_mode: + changed = check_object_tags(module, s3, bucket, obj) + result = {} + if changed: + result.update({"msg": "Would have update object tags is not running in check mode."}) + return changed, result + + # Ensure tags + tags, changed = ensure_tags(s3, module, bucket, obj) + result = {"msg": "ETag from source and destination are the same"} + if changed: + result = {"msg": "tags successfully updated.", "tags": tags} + return changed, result + elif module.check_mode: + return True, {"msg": "ETag from source and destination differ"} + else: + changed = True + bucketsrc = { + "Bucket": src_bucket, + "Key": src_obj, + } + if versionId: + bucketsrc.update({"VersionId": versionId}) + params.update({"CopySource": bucketsrc}) + params.update( + get_extra_params( + encrypt, + module.params.get("encryption_mode"), + module.params.get("encryption_kms_key_id"), + metadata, + ) + ) + s3.copy_object(aws_retry=True, **params) + put_object_acl(module, s3, bucket, obj) + # Tags + tags, tags_updated = ensure_tags(s3, module, bucket, obj) + msg = f"Object copied from bucket {bucketsrc['Bucket']} to bucket {bucket}." + return changed, {"msg": msg, "tags": tags} + except ( + botocore.exceptions.BotoCoreError, + botocore.exceptions.ClientError, + boto3.exceptions.Boto3Error, + ) as e: # pylint: disable=duplicate-except + raise S3ObjectFailure( + f"Failed while copying object {obj} from bucket {module.params['copy_src'].get('Bucket')}.", + e, + ) + + def s3_object_do_copy(module, connection, connection_v4, s3_vars): - # if copying an object in a bucket yet to be created, acls for the bucket and/or the object may be specified - # these were separated into the variables bucket_acl and object_acl above - d_etag = get_etag(connection, s3_vars["bucket"], s3_vars["object"]) - if not s3_vars["acl_disabled"]: - # only use valid object acls for the copy operation - s3_vars["permission"] = s3_vars["object_acl"] - copy_object_to_bucket( - module, - connection, - s3_vars["bucket"], - s3_vars["object"], - s3_vars["encrypt"], - s3_vars["metadata"], - s3_vars["validate"], - d_etag, - ) + copy_src = module.params.get("copy_src") + if not copy_src.get("object") and s3_vars["object"]: + module.fail_json( + msg="A destination object was specified while trying to copy all the objects from the source bucket." + ) + src_bucket = copy_src.get("bucket") + if not copy_src.get("object"): + # copy recursively object(s) from source bucket to destination bucket + # list all the objects from the source bucket + keys = list_keys(connection, src_bucket, copy_src.get("prefix")) + if len(keys) == 0: + module.exit_json(msg=f"No object found to be copied from source bucket {src_bucket}.") + + changed = False + number_keys_updated = 0 + for key in keys: + updated, result = copy_object_to_bucket( + module, + connection, + s3_vars["bucket"], + key, + s3_vars["encrypt"], + s3_vars["metadata"], + s3_vars["validate"], + src_bucket, + key, + versionId=copy_src.get("version_id"), + ) + changed |= updated + number_keys_updated += 1 if updated else 0 + + msg = "object(s) from buckets '{0}' and '{1}' are the same.".format(src_bucket, s3_vars["bucket"]) + if number_keys_updated: + msg = "{0} copied into bucket '{1}'".format(number_keys_updated, s3_vars["bucket"]) + module.exit_json(changed=changed, msg=msg) + else: + # copy single object from source bucket into destination bucket + changed, result = copy_object_to_bucket( + module, + connection, + s3_vars["bucket"], + s3_vars["object"], + s3_vars["encrypt"], + s3_vars["metadata"], + s3_vars["validate"], + src_bucket, + copy_src.get("object"), + versionId=copy_src.get("version_id"), + ) + module.exit_json(changed=changed, **result) def populate_params(module): @@ -1459,7 +1512,8 @@ def main(): type="dict", options=dict( bucket=dict(required=True), - object=dict(required=True), + object=dict(), + prefix=dict(default=""), version_id=dict(), ), ), diff --git a/tests/integration/targets/s3_object/tasks/copy_recursively.yml b/tests/integration/targets/s3_object/tasks/copy_recursively.yml new file mode 100644 index 00000000000..1e31020d714 --- /dev/null +++ b/tests/integration/targets/s3_object/tasks/copy_recursively.yml @@ -0,0 +1,152 @@ +- name: Test copy recursively object from one bucket to another one. + block: + - name: Create S3 bucket + s3_bucket: + name: "{{ item }}" + state: present + with_items: + - "{{ bucket_src }}" + - "{{ bucket_dst }}" + + - name: Create object into bucket + s3_object: + bucket: "{{ bucket_src }}" + mode: put + content: "{{ item.content }}" + object: "{{ item.object }}" + with_items: "{{ s3_objects }}" + + - name: Copy all objects from source bucket into destination bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + check_mode: true + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure no object were found into bucket + assert: + that: + - _objects.s3_keys | length == 0 + + # Test: Copy all objects using prefix + - name: copy object using prefix + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + prefix: "file" + register: _copy_with_prefix + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure objects with prefix 'file' were copied into bucket + assert: + that: + - _copy_with_prefix is changed + - _objects.s3_keys | length == 3 + - '"file1.txt" in _objects.s3_keys' + - '"file2.txt" in _objects.s3_keys' + - '"file3.txt" in _objects.s3_keys' + + # Test: Copy all objects using prefix (idempotency) + - name: copy object using prefix (idempotency) + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + prefix: "file" + register: _copy_with_prefix_idempotency + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure objects with prefix 'file' were copied into bucket + assert: + that: + - _copy_with_prefix_idempotency is not changed + - _objects.s3_keys | length == 3 + - '"file1.txt" in _objects.s3_keys' + - '"file2.txt" in _objects.s3_keys' + - '"file3.txt" in _objects.s3_keys' + + # Test: Copy all objects from source bucket + - name: copy all objects from source bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + register: _copy_all + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure all objects were copied into bucket + assert: + that: + - _copy_all is changed + - _objects.s3_keys | length == 5 + + # Test: Copy all objects from source bucket (idempotency) + - name: copy all objects from source bucket (idempotency) + s3_object: + bucket: "{{ bucket_dst }}" + mode: copy + copy_src: + bucket: "{{ bucket_src }}" + register: _copy_all_idempotency + + - name: list objects from bucket + s3_object: + bucket: "{{ bucket_dst }}" + mode: list + register: _objects + + - name: Ensure number of copied objects remains the same. + assert: + that: + - _copy_all_idempotency is not changed + - _objects.s3_keys | length == 5 + + vars: + bucket_src: "{{ bucket_name }}-recursive-src" + bucket_dst: "{{ bucket_name }}-recursive-dst" + s3_objects: + - object: file1.txt + content: | + some content for file1.txt + - object: file2.txt + content: | + some content for file2.txt + - object: file3.txt + content: | + some content for file3.txt + - object: testfile.py + content: "This is a sample text file" + - object: another.txt + content: "another file to create into bucket" + + always: + - include_tasks: delete_bucket.yml + with_items: + - "{{ bucket_src }}" + - "{{ bucket_dst }}" diff --git a/tests/integration/targets/s3_object/tasks/main.yml b/tests/integration/targets/s3_object/tasks/main.yml index 5603ddc1ec1..c5006322545 100644 --- a/tests/integration/targets/s3_object/tasks/main.yml +++ b/tests/integration/targets/s3_object/tasks/main.yml @@ -1039,6 +1039,8 @@ - "'tags' in result" - (result.tags | length) == 0 + - include_tasks: copy_recursively.yml + always: - name: delete temporary files diff --git a/tests/unit/plugins/modules/test_s3_object.py b/tests/unit/plugins/modules/test_s3_object.py index c8d3fc4fcf3..863001335a3 100644 --- a/tests/unit/plugins/modules/test_s3_object.py +++ b/tests/unit/plugins/modules/test_s3_object.py @@ -17,26 +17,22 @@ @patch(module_name + ".paginated_list") def test_list_keys_success(m_paginated_list): - module = MagicMock() s3 = MagicMock() m_paginated_list.return_value = ["delete.txt"] - s3_object.list_keys(module, s3, "a987e6b6026ab04e4717", "", "", 1000) - - assert m_paginated_list.call_count == 1 - module.exit_json.assert_called_with(msg="LIST operation complete", s3_keys=["delete.txt"]) + assert ["delete.txt"] == s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000) + m_paginated_list.assert_called_once() @patch(module_name + ".paginated_list") def test_list_keys_failure(m_paginated_list): - module = MagicMock() s3 = MagicMock() m_paginated_list.side_effect = botocore.exceptions.BotoCoreError with pytest.raises(s3_object.S3ObjectFailure): - s3_object.list_keys(module, s3, "a987e6b6026ab04e4717", "", "", 1000) + s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000) @patch(module_name + ".delete_key")