From 27fa5f607fc1d3b37c5a935466acaa11a3dac762 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 03:12:30 -0700 Subject: [PATCH 1/8] More comprehensive error handling --- metaflow/plugins/datatools/s3/s3.py | 4 + metaflow/plugins/datatools/s3/s3op.py | 131 ++++++++++++++++++++------ 2 files changed, 104 insertions(+), 31 deletions(-) diff --git a/metaflow/plugins/datatools/s3/s3.py b/metaflow/plugins/datatools/s3/s3.py index 20b5d5fc3cc..6f052d0d267 100644 --- a/metaflow/plugins/datatools/s3/s3.py +++ b/metaflow/plugins/datatools/s3/s3.py @@ -853,6 +853,8 @@ def _head(): raise MetaflowS3NotFound() elif info["error"] == s3op.ERROR_URL_ACCESS_DENIED: raise MetaflowS3AccessDenied() + elif info["error"] == s3op.ERROR_INVALID_REQUEST: + raise MetaflowS3Exception("Invalid request for %s" % s3url) else: raise MetaflowS3Exception("Got error: %d" % info["error"]) else: @@ -1379,6 +1381,8 @@ def _one_boto_op(self, op, url, create_tmp_file=True): raise MetaflowS3AccessDenied(url) elif error_code == 416: raise MetaflowS3InvalidRange(err) + elif error_code == 400: + raise MetaflowS3Exception(f"Invalid request for {url}: {str(err)}") elif error_code == "NoSuchBucket": raise MetaflowS3URLException("Specified S3 bucket doesn't exist.") error = str(err) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 44c5a1f1d3b..dcf75507adb 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -107,6 +107,7 @@ def __str__(self): ERROR_INVALID_RANGE = 11 ERROR_TRANSIENT = 12 ERROR_OUT_OF_DISK_SPACE = 13 +ERROR_INVALID_REQUEST = 14 def format_result_line(idx, prefix, url="", local=""): @@ -129,37 +130,94 @@ def normalize_client_error(err): try: return int(error_code) except ValueError: - if error_code in ("AccessDenied", "AllAccessDisabled", "InvalidAccessKeyId"): - return 403 - if error_code in ("NoSuchKey", "NoSuchBucket"): - return 404 - if error_code == "InvalidRange": - return 416 - # We "normalize" retriable server errors to 503. These are also considered - # transient by boto3 (see: - # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html) - if error_code in ( - "SlowDown", - "RequestTimeout", - "RequestTimeoutException", - "PriorRequestNotComplete", - "ConnectionError", - "HTTPClientError", - "Throttling", - "ThrottlingException", - "ThrottledException", - "RequestThrottledException", - "TooManyRequestsException", - "ProvisionedThroughputExceededException", - "TransactionInProgressException", - "RequestLimitExceeded", - "BandwidthLimitExceeded", - "LimitExceededException", - "RequestThrottled", - "EC2ThrottledException", - "InternalError", - ): - return 503 + pass + + # Permission or access-related errors → 403 Forbidden + permission_errors = { + "AccessDenied", + "AccessDeniedException", + "AccountProblem", + "AllAccessDisabled", + "AuthFailure", + "ExpiredToken", + "InvalidAccessKeyId", + "InvalidSecurity", + "SignatureDoesNotMatch", + "UnauthorizedOperation", + "UnrecognizedClientException", + } + + # Not found errors → 404 Not Found + not_found_errors = { + "NoSuchKey", + "NotFound", + } + + # Range/invalid byte-range errors → 416 + range_errors = { + "InvalidRange", + } + + # Server-side throttling, timeout, or transient errors → 503 + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html + transient_errors = { + "BandwidthLimitExceeded", + "ConnectionError", + "EC2ThrottledException", + "HTTPClientError", + "InternalError", + "InternalFailure", + "LimitExceededException", + "PriorRequestNotComplete", + "ProvisionedThroughputExceededException", + "RequestLimitExceeded", + "RequestThrottled", + "RequestThrottledException", + "RequestTimeout", + "RequestTimeoutException", + "ServerError", + "ServiceUnavailable", + "SlowDown", + "ThrottledException", + "Throttling", + "ThrottlingException", + "TooManyRequestsException", + "TransactionInProgressException", + "Unavailable", + } + + # Fatal/unrecoverable → 400 + fatal_errors = { + "BucketAlreadyExists", + "BucketAlreadyOwnedByYou", + "DryRunOperation", + "InvalidClientTokenId", + "InvalidParameterCombination", + "InvalidParameterValue", + "InvalidQueryParameter", + "MalformedPolicyDocument", + "MalformedQueryString", + "MethodNotAllowed", + "MissingParameter", + "OperationAborted", + "OptInRequired", + "UnsupportedOperation", + "UnsupportedProtocol", + "ValidationException", + } + + if error_code in permission_errors: + return 403 + elif error_code in not_found_errors: + return 404 + elif error_code in range_errors: + return 416 + elif error_code in fatal_errors: + return 400 + elif error_code in transient_errors: + return 503 + + # Default: return original string code if unmapped return error_code @@ -199,6 +257,8 @@ def op_info(url): to_return = {"error": ERROR_URL_ACCESS_DENIED, "raise_error": err} elif error_code == 416: to_return = {"error": ERROR_INVALID_RANGE, "raise_error": err} + elif error_code == 400: + to_return = {"error": ERROR_INVALID_REQUEST, "raise_error": err} elif error_code in (500, 502, 503, 504): to_return = {"error": ERROR_TRANSIENT, "raise_error": err} else: @@ -392,6 +452,9 @@ def handle_client_error(err, idx, result_file): elif error_code == 403: result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED)) result_file.flush() + elif error_code == 400: + result_file.write("%d %d\n" % (idx, -ERROR_INVALID_REQUEST)) + result_file.flush() elif error_code == 503: result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT)) result_file.flush() @@ -564,6 +627,8 @@ def get_info(self, url): return False, url, ERROR_URL_NOT_FOUND elif error_code == 403: return False, url, ERROR_URL_ACCESS_DENIED + elif error_code == 400: + return False, url, ERROR_INVALID_REQUEST # Transient errors are going to be retried by the aws_retry decorator else: raise @@ -612,6 +677,8 @@ def list_prefix(self, prefix_url, delimiter=""): return False, prefix_url, ERROR_URL_NOT_FOUND elif error_code == 403: return False, prefix_url, ERROR_URL_ACCESS_DENIED + elif error_code == 400: + return False, prefix_url, ERROR_INVALID_REQUEST # Transient errors are going to be retried by the aws_retry decorator else: raise @@ -655,6 +722,8 @@ def exit(exit_code, url): msg = "Transient error for url: %s" % url elif exit_code == ERROR_OUT_OF_DISK_SPACE: msg = "Out of disk space when downloading URL: %s" % url + elif exit_code == ERROR_INVALID_REQUEST: + msg = "Invalid request for URL: %s" % url else: msg = "Unknown error" print("s3op failed:\n%s" % msg, file=sys.stderr) From 954a82798480f25b2d6f3a716fc466b7e359d98b Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 03:15:30 -0700 Subject: [PATCH 2/8] add NoSuchBucket --- metaflow/plugins/datatools/s3/s3op.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index dcf75507adb..1b42dd422f3 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -149,6 +149,7 @@ def normalize_client_error(err): # Not found errors → 404 Not Found not_found_errors = { + "NoSuchBucket", "NoSuchKey", "NotFound", } From 0265b171a29d8bff987a4c298d3a46523b3d09e8 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 03:19:56 -0700 Subject: [PATCH 3/8] black --- metaflow/plugins/datatools/s3/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaflow/plugins/datatools/s3/s3.py b/metaflow/plugins/datatools/s3/s3.py index 6f052d0d267..f564f86f997 100644 --- a/metaflow/plugins/datatools/s3/s3.py +++ b/metaflow/plugins/datatools/s3/s3.py @@ -525,7 +525,7 @@ def __init__( run: Optional[Union[FlowSpec, "metaflow.Run"]] = None, s3root: Optional[str] = None, encryption: Optional[str] = S3_SERVER_SIDE_ENCRYPTION, - **kwargs + **kwargs, ): if run: # 1. use a (current) run ID with optional customizations From e132f3e092795b8daf51ac4212a63a34cecf2378 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 12:10:41 -0700 Subject: [PATCH 4/8] declare sets as global vars --- metaflow/plugins/datatools/s3/s3op.py | 160 +++++++++++++------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 1b42dd422f3..86d0987f988 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -64,6 +64,81 @@ S3Config = namedtuple("S3Config", "role session_vars client_params") +# Permission or access-related errors → 403 Forbidden +PERMISSION_ERRORS = { + "AccessDenied", + "AccessDeniedException", + "AccountProblem", + "AllAccessDisabled", + "AuthFailure", + "ExpiredToken", + "InvalidAccessKeyId", + "InvalidSecurity", + "SignatureDoesNotMatch", + "UnauthorizedOperation", + "UnrecognizedClientException", +} + +# Not found errors → 404 Not Found +NOT_FOUND_ERRORS = { + "NoSuchBucket", + "NoSuchKey", + "NotFound", +} + +# Range/invalid byte-range errors → 416 +RANGE_ERRORS = { + "InvalidRange", +} + +# Server-side throttling, timeout, or transient errors → 503 +# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html +TRANSIENT_ERRORS = { + "BandwidthLimitExceeded", + "ConnectionError", + "EC2ThrottledException", + "HTTPClientError", + "InternalError", + "InternalFailure", + "LimitExceededException", + "PriorRequestNotComplete", + "ProvisionedThroughputExceededException", + "RequestLimitExceeded", + "RequestThrottled", + "RequestThrottledException", + "RequestTimeout", + "RequestTimeoutException", + "ServerError", + "ServiceUnavailable", + "SlowDown", + "ThrottledException", + "Throttling", + "ThrottlingException", + "TooManyRequestsException", + "TransactionInProgressException", + "Unavailable", +} + +# Fatal/unrecoverable → 400 +FATAL_ERRORS = { + "BucketAlreadyExists", + "BucketAlreadyOwnedByYou", + "DryRunOperation", + "InvalidClientTokenId", + "InvalidParameterCombination", + "InvalidParameterValue", + "InvalidQueryParameter", + "MalformedPolicyDocument", + "MalformedQueryString", + "MethodNotAllowed", + "MissingParameter", + "OperationAborted", + "OptInRequired", + "UnsupportedOperation", + "UnsupportedProtocol", + "ValidationException", +} + class S3Url(object): def __init__( @@ -132,90 +207,15 @@ def normalize_client_error(err): except ValueError: pass - # Permission or access-related errors → 403 Forbidden - permission_errors = { - "AccessDenied", - "AccessDeniedException", - "AccountProblem", - "AllAccessDisabled", - "AuthFailure", - "ExpiredToken", - "InvalidAccessKeyId", - "InvalidSecurity", - "SignatureDoesNotMatch", - "UnauthorizedOperation", - "UnrecognizedClientException", - } - - # Not found errors → 404 Not Found - not_found_errors = { - "NoSuchBucket", - "NoSuchKey", - "NotFound", - } - - # Range/invalid byte-range errors → 416 - range_errors = { - "InvalidRange", - } - - # Server-side throttling, timeout, or transient errors → 503 - # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html - transient_errors = { - "BandwidthLimitExceeded", - "ConnectionError", - "EC2ThrottledException", - "HTTPClientError", - "InternalError", - "InternalFailure", - "LimitExceededException", - "PriorRequestNotComplete", - "ProvisionedThroughputExceededException", - "RequestLimitExceeded", - "RequestThrottled", - "RequestThrottledException", - "RequestTimeout", - "RequestTimeoutException", - "ServerError", - "ServiceUnavailable", - "SlowDown", - "ThrottledException", - "Throttling", - "ThrottlingException", - "TooManyRequestsException", - "TransactionInProgressException", - "Unavailable", - } - - # Fatal/unrecoverable → 400 - fatal_errors = { - "BucketAlreadyExists", - "BucketAlreadyOwnedByYou", - "DryRunOperation", - "InvalidClientTokenId", - "InvalidParameterCombination", - "InvalidParameterValue", - "InvalidQueryParameter", - "MalformedPolicyDocument", - "MalformedQueryString", - "MethodNotAllowed", - "MissingParameter", - "OperationAborted", - "OptInRequired", - "UnsupportedOperation", - "UnsupportedProtocol", - "ValidationException", - } - - if error_code in permission_errors: + if error_code in PERMISSION_ERRORS: return 403 - elif error_code in not_found_errors: + elif error_code in NOT_FOUND_ERRORS: return 404 - elif error_code in range_errors: + elif error_code in RANGE_ERRORS: return 416 - elif error_code in fatal_errors: + elif error_code in FATAL_ERRORS: return 400 - elif error_code in transient_errors: + elif error_code in TRANSIENT_ERRORS: return 503 # Default: return original string code if unmapped From 7eb84eed0b4f154218f065105f2c2b22c8936459 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 12:20:09 -0700 Subject: [PATCH 5/8] add links to docs, add more error codes --- metaflow/plugins/datatools/s3/s3op.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 86d0987f988..51c05083dbd 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -64,6 +64,10 @@ S3Config = namedtuple("S3Config", "role session_vars client_params") +# Error code mappings for AWS S3 and general AWS services +# - S3 Error Responses: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html +# - Boto3 Retries: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html + # Permission or access-related errors → 403 Forbidden PERMISSION_ERRORS = { "AccessDenied", @@ -73,7 +77,9 @@ "AuthFailure", "ExpiredToken", "InvalidAccessKeyId", + "InvalidPayer", "InvalidSecurity", + "InvalidToken", "SignatureDoesNotMatch", "UnauthorizedOperation", "UnrecognizedClientException", @@ -82,7 +88,11 @@ # Not found errors → 404 Not Found NOT_FOUND_ERRORS = { "NoSuchBucket", + "NoSuchBucketPolicy", "NoSuchKey", + "NoSuchLifecycleConfiguration", + "NoSuchVersion", + "NoSuchWebsiteConfiguration", "NotFound", } @@ -111,6 +121,7 @@ "ServerError", "ServiceUnavailable", "SlowDown", + "TemporaryRedirect", "ThrottledException", "Throttling", "ThrottlingException", @@ -121,19 +132,45 @@ # Fatal/unrecoverable → 400 FATAL_ERRORS = { + "AccessControlListNotSupported", + "AmbiguousGrantByEmailAddress", + "BadDigest", "BucketAlreadyExists", "BucketAlreadyOwnedByYou", "DryRunOperation", + "EntityTooLarge", + "EntityTooSmall", + "IncompleteBody", + "InvalidArgument", + "InvalidBucketName", "InvalidClientTokenId", + "InvalidDigest", + "InvalidObjectState", "InvalidParameterCombination", "InvalidParameterValue", + "InvalidPart", + "InvalidPartOrder", "InvalidQueryParameter", + "InvalidRequest", + "InvalidStorageClass", + "InvalidTargetBucketForLogging", "MalformedPolicyDocument", "MalformedQueryString", + "MalformedXML", "MethodNotAllowed", + "MissingContentLength", "MissingParameter", + "MissingRequestBodyError", + "NoSuchUpload", + "NotImplemented", "OperationAborted", "OptInRequired", + "PreconditionFailed", + "Redirect", + "RestoreAlreadyInProgress", + "TooManyBuckets", + "UnexpectedContent", + "UnresolvableGrantByEmailAddress", "UnsupportedOperation", "UnsupportedProtocol", "ValidationException", From c373c01833c413beaa0a7c08c4dc448d493fbce4 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Wed, 11 Jun 2025 12:33:23 -0700 Subject: [PATCH 6/8] update codes --- metaflow/plugins/datatools/s3/s3op.py | 84 +++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 51c05083dbd..5d3bcf88881 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -80,6 +80,8 @@ "InvalidPayer", "InvalidSecurity", "InvalidToken", + "NotSignedUp", + "RequestTimeTooSkewed", "SignatureDoesNotMatch", "UnauthorizedOperation", "UnrecognizedClientException", @@ -87,12 +89,19 @@ # Not found errors → 404 Not Found NOT_FOUND_ERRORS = { + "AccessPointNotFound", + "NoSuchAccessGrantsLocationError", + "NoSuchAccessGrantError", "NoSuchBucket", "NoSuchBucketPolicy", "NoSuchKey", "NoSuchLifecycleConfiguration", + "NoSuchMultiRegionAccessPoint", + "NoSuchUpload", "NoSuchVersion", "NoSuchWebsiteConfiguration", + "ReplicationConfigurationNotFoundError", + "ServerSideEncryptionConfigurationNotFoundError", "NotFound", } @@ -102,15 +111,12 @@ } # Server-side throttling, timeout, or transient errors → 503 -# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html TRANSIENT_ERRORS = { "BandwidthLimitExceeded", "ConnectionError", "EC2ThrottledException", - "HTTPClientError", "InternalError", "InternalFailure", - "LimitExceededException", "PriorRequestNotComplete", "ProvisionedThroughputExceededException", "RequestLimitExceeded", @@ -128,51 +134,121 @@ "TooManyRequestsException", "TransactionInProgressException", "Unavailable", + "ReplicationInternalError", + "ReplicationTooManyRequests", } # Fatal/unrecoverable → 400 FATAL_ERRORS = { "AccessControlListNotSupported", + "AccessGrantAlreadyExists", + "AccessGrantsInstanceAlreadyExists", + "AccessGrantsInstanceNotEmptyError", + "AccessGrantsInstanceNotExistsError", + "AccessGrantsInstanceResourcePolicyNotExists", + "AccessGrantsLocationAlreadyExistsError", + "AccessGrantsLocationNotEmptyError", + "AccessGrantsLocationsQuotaExceededError", + "AccessGrantsQuotaExceededError", "AmbiguousGrantByEmailAddress", "BadDigest", + "BadRequest", "BucketAlreadyExists", "BucketAlreadyOwnedByYou", + "CrossLocationLoggingProhibited", "DryRunOperation", "EntityTooLarge", "EntityTooSmall", + "ExpiredObjectDeleteMarker", + "HTTPClientError", + "IllegalLocationConstraintException", + "IllegalVersioningConfigurationException", "IncompleteBody", + "IncorrectNumberOfFilesInPostRequest", + "InlineDataTooLarge", + "InsufficientData", + "InvalidAccessGrant", + "InvalidAccessGrantsLocation", + "InvalidAddressingHeader", "InvalidArgument", "InvalidBucketName", + "InvalidBucketState", "InvalidClientTokenId", "InvalidDigest", + "InvalidEncryptionAlgorithmError", + "InvalidIamRole", + "InvalidIdentityCenterInstance", + "InvalidLocationConstraint", "InvalidObjectState", "InvalidParameterCombination", "InvalidParameterValue", "InvalidPart", "InvalidPartOrder", + "InvalidPolicyDocument", "InvalidQueryParameter", + "InvalidRange", "InvalidRequest", + "InvalidResourcePolicy", "InvalidStorageClass", + "InvalidTag", + "InvalidTags", "InvalidTargetBucketForLogging", + "InvalidToken", + "InvalidURI", + "KeyTooLongError", + "LambdaInvalidResponse", + "LambdaInvocationFailed", + "LambdaNotFound", + "LambdaPermissionError", + "LambdaResponseNotReceived", + "LambdaRuntimeError", + "LambdaTimeout", + "LimitExceededException", + "MalformedACLError", "MalformedPolicyDocument", + "MalformedPOSTRequest", "MalformedQueryString", "MalformedXML", + "MaxMessageLengthExceeded", + "MaxPostPreDataLengthExceededError", + "MetadataTooLarge", "MethodNotAllowed", + "MissingAttachment", "MissingContentLength", "MissingParameter", "MissingRequestBodyError", - "NoSuchUpload", + "MissingSecurityHeader", + "MultiRegionAccessPointAlreadyOwnedByYou", + "MultiRegionAccessPointModifiedByAnotherRequest", + "MultiRegionAccessPointNotReady", + "MultiRegionAccessPointSameBucketRegion", + "MultiRegionAccessPointUnsupportedRegion", + "NoLoggingStatusForKey", "NotImplemented", + "NotSignedUp", "OperationAborted", "OptInRequired", + "PermanentRedirect", "PreconditionFailed", "Redirect", + "ReplicationParameterValueError", + "RequestIsNotMultiPartContent", + "RequestTorrentOfBucketError", "RestoreAlreadyInProgress", + "StsNotAuthorizedError", + "StsPackedPolicyTooLargeError", + "StsValidationError", + "TokenRefreshRequired", "TooManyBuckets", + "TooManyConfigurations", + "TooManyElements", + "TooManyTags", "UnexpectedContent", "UnresolvableGrantByEmailAddress", "UnsupportedOperation", "UnsupportedProtocol", + "UserKeyMustBeSpecified", + "ValidationError", "ValidationException", } From 2c237046e68c3454442c43e6aed69cfbf856ce5e Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Mon, 23 Jun 2025 18:43:47 -0700 Subject: [PATCH 7/8] add comments/description for error code --- metaflow/plugins/datatools/s3/s3op.py | 309 ++++++++++++-------------- 1 file changed, 145 insertions(+), 164 deletions(-) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 5d3bcf88881..e82863736a9 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -70,186 +70,167 @@ # Permission or access-related errors → 403 Forbidden PERMISSION_ERRORS = { - "AccessDenied", - "AccessDeniedException", - "AccountProblem", - "AllAccessDisabled", - "AuthFailure", - "ExpiredToken", - "InvalidAccessKeyId", - "InvalidPayer", - "InvalidSecurity", - "InvalidToken", - "NotSignedUp", - "RequestTimeTooSkewed", - "SignatureDoesNotMatch", - "UnauthorizedOperation", - "UnrecognizedClientException", + "AccessDenied", # Access Denied + "AccessDeniedException", # AWS service error for access denied + "AccountProblem", # There is a problem with your AWS account that prevents the operation from completing successfully + "AllAccessDisabled", # All access to this Amazon S3 resource has been disabled + "AuthFailure", # AWS authentication failure + "ExpiredToken", # The provided token has expired + "InvalidAccessKeyId", # The AWS access key ID you provided does not exist in our records + "InvalidPayer", # All access to this object has been disabled + "InvalidSecurity", # The provided security credentials are not valid + "InvalidToken", # The provided token is malformed or otherwise invalid + "NotSignedUp", # Your account is not signed up for the Amazon S3 service + "RequestTimeTooSkewed", # The difference between the request time and the server's time is too large + "SignatureDoesNotMatch", # The request signature we calculated does not match the signature you provided + "UnauthorizedOperation", # AWS service error for unauthorized operation + "UnrecognizedClientException", # AWS service error for unrecognized client } # Not found errors → 404 Not Found NOT_FOUND_ERRORS = { - "AccessPointNotFound", - "NoSuchAccessGrantsLocationError", - "NoSuchAccessGrantError", - "NoSuchBucket", - "NoSuchBucketPolicy", - "NoSuchKey", - "NoSuchLifecycleConfiguration", - "NoSuchMultiRegionAccessPoint", - "NoSuchUpload", - "NoSuchVersion", - "NoSuchWebsiteConfiguration", - "ReplicationConfigurationNotFoundError", - "ServerSideEncryptionConfigurationNotFoundError", - "NotFound", + "NoSuchAccessGrantsLocationError", # The specified access grants location does not exist + "NoSuchAccessGrantError", # The specified access grant does not exist + "NoSuchBucket", # The specified bucket does not exist + "NoSuchBucketPolicy", # The specified bucket does not have a bucket policy + "NoSuchKey", # The specified key does not exist + "NoSuchLifecycleConfiguration", # The lifecycle configuration does not exist + "NoSuchMultiRegionAccessPoint", # The specified Multi-Region Access Point does not exist + "NoSuchUpload", # The specified multipart upload does not exist + "NoSuchVersion", # Indicates that the version ID specified in the request does not match an existing version + "NoSuchWebsiteConfiguration", # The specified bucket does not have a website configuration + "ReplicationConfigurationNotFoundError", # The replication configuration was not found + "ServerSideEncryptionConfigurationNotFoundError", # The server-side encryption configuration was not found } # Range/invalid byte-range errors → 416 RANGE_ERRORS = { - "InvalidRange", + "InvalidRange", # The requested range cannot be satisfied } # Server-side throttling, timeout, or transient errors → 503 TRANSIENT_ERRORS = { - "BandwidthLimitExceeded", - "ConnectionError", - "EC2ThrottledException", - "InternalError", - "InternalFailure", - "PriorRequestNotComplete", - "ProvisionedThroughputExceededException", - "RequestLimitExceeded", - "RequestThrottled", - "RequestThrottledException", - "RequestTimeout", - "RequestTimeoutException", - "ServerError", - "ServiceUnavailable", - "SlowDown", - "TemporaryRedirect", - "ThrottledException", - "Throttling", - "ThrottlingException", - "TooManyRequestsException", - "TransactionInProgressException", - "Unavailable", - "ReplicationInternalError", - "ReplicationTooManyRequests", + "BandwidthLimitExceeded", # Request bandwidth limit has been exceeded + "InternalError", # We encountered an internal error. Please try again + "PriorRequestNotComplete", # Your previous request to create the named bucket succeeded and you already own it + "RequestTimeout", # Your socket connection to the server was not read from or written to within the timeout period + "ServiceUnavailable", # Reduce your request rate. Service temporarily unavailable + "SlowDown", # Reduce your request rate + "TemporaryRedirect", # You are being redirected to the bucket while DNS updates } # Fatal/unrecoverable → 400 FATAL_ERRORS = { - "AccessControlListNotSupported", - "AccessGrantAlreadyExists", - "AccessGrantsInstanceAlreadyExists", - "AccessGrantsInstanceNotEmptyError", - "AccessGrantsInstanceNotExistsError", - "AccessGrantsInstanceResourcePolicyNotExists", - "AccessGrantsLocationAlreadyExistsError", - "AccessGrantsLocationNotEmptyError", - "AccessGrantsLocationsQuotaExceededError", - "AccessGrantsQuotaExceededError", - "AmbiguousGrantByEmailAddress", - "BadDigest", - "BadRequest", - "BucketAlreadyExists", - "BucketAlreadyOwnedByYou", - "CrossLocationLoggingProhibited", - "DryRunOperation", - "EntityTooLarge", - "EntityTooSmall", - "ExpiredObjectDeleteMarker", - "HTTPClientError", - "IllegalLocationConstraintException", - "IllegalVersioningConfigurationException", - "IncompleteBody", - "IncorrectNumberOfFilesInPostRequest", - "InlineDataTooLarge", - "InsufficientData", - "InvalidAccessGrant", - "InvalidAccessGrantsLocation", - "InvalidAddressingHeader", - "InvalidArgument", - "InvalidBucketName", - "InvalidBucketState", - "InvalidClientTokenId", - "InvalidDigest", - "InvalidEncryptionAlgorithmError", - "InvalidIamRole", - "InvalidIdentityCenterInstance", - "InvalidLocationConstraint", - "InvalidObjectState", - "InvalidParameterCombination", - "InvalidParameterValue", - "InvalidPart", - "InvalidPartOrder", - "InvalidPolicyDocument", - "InvalidQueryParameter", - "InvalidRange", - "InvalidRequest", - "InvalidResourcePolicy", - "InvalidStorageClass", - "InvalidTag", - "InvalidTags", - "InvalidTargetBucketForLogging", - "InvalidToken", - "InvalidURI", - "KeyTooLongError", - "LambdaInvalidResponse", - "LambdaInvocationFailed", - "LambdaNotFound", - "LambdaPermissionError", - "LambdaResponseNotReceived", - "LambdaRuntimeError", - "LambdaTimeout", - "LimitExceededException", - "MalformedACLError", - "MalformedPolicyDocument", - "MalformedPOSTRequest", - "MalformedQueryString", - "MalformedXML", - "MaxMessageLengthExceeded", - "MaxPostPreDataLengthExceededError", - "MetadataTooLarge", - "MethodNotAllowed", - "MissingAttachment", - "MissingContentLength", - "MissingParameter", - "MissingRequestBodyError", - "MissingSecurityHeader", - "MultiRegionAccessPointAlreadyOwnedByYou", - "MultiRegionAccessPointModifiedByAnotherRequest", - "MultiRegionAccessPointNotReady", - "MultiRegionAccessPointSameBucketRegion", - "MultiRegionAccessPointUnsupportedRegion", - "NoLoggingStatusForKey", - "NotImplemented", - "NotSignedUp", - "OperationAborted", - "OptInRequired", - "PermanentRedirect", - "PreconditionFailed", - "Redirect", - "ReplicationParameterValueError", - "RequestIsNotMultiPartContent", - "RequestTorrentOfBucketError", - "RestoreAlreadyInProgress", - "StsNotAuthorizedError", - "StsPackedPolicyTooLargeError", - "StsValidationError", - "TokenRefreshRequired", - "TooManyBuckets", - "TooManyConfigurations", - "TooManyElements", - "TooManyTags", - "UnexpectedContent", - "UnresolvableGrantByEmailAddress", - "UnsupportedOperation", - "UnsupportedProtocol", - "UserKeyMustBeSpecified", - "ValidationError", - "ValidationException", + "AccessControlListNotSupported", # The bucket does not allow ACLs + "AccessGrantAlreadyExists", # The specified access grant already exists + "AccessGrantsInstanceAlreadyExists", # Access Grants Instance already exists + "AccessGrantsInstanceNotEmptyError", # Please clean up locations before deleting the access grants instance + "AccessGrantsInstanceNotExistsError", # Access Grants Instance does not exist + "AccessGrantsInstanceResourcePolicyNotExists", # Access Grants Instance Resource Policy does not exist + "AccessGrantsLocationAlreadyExistsError", # The specified access grants location already exists + "AccessGrantsLocationNotEmptyError", # Please clean up access grants before deleting access grants location + "AccessGrantsLocationsQuotaExceededError", # The access grants location quota has been exceeded + "AccessGrantsQuotaExceededError", # The access grants quota has been exceeded + "AmbiguousGrantByEmailAddress", # The email address that you provided is associated with more than one account + "BadDigest", # The Content-MD5 you specified did not match what we received + "BadRequest", # A bad request was made + "BucketAlreadyExists", # The requested bucket name is not available + "BucketAlreadyOwnedByYou", # The bucket that you tried to create already exists, and you own it + "CrossLocationLoggingProhibited", # Cross-location logging not allowed + "DryRunOperation", # Dry run operation was requested + "EntityTooLarge", # Your proposed upload is larger than the maximum allowed object size + "EntityTooSmall", # Your proposed upload is smaller than the minimum allowed object size + "ExpiredObjectDeleteMarker", # The object delete marker you specified has expired + "HTTPClientError", # HTTP client error occurred + "IllegalLocationConstraintException", # The unspecified location constraint is incompatible for the Region + "IllegalVersioningConfigurationException", # Indicates that the versioning configuration specified in the request is invalid + "IncompleteBody", # You did not provide the number of bytes specified by the Content-Length HTTP header + "IncorrectNumberOfFilesInPostRequest", # POST requires exactly one file upload per request + "InlineDataTooLarge", # Inline data exceeds the maximum allowed size + "InsufficientData", # The amount of data available to read is not known + "InvalidAccessGrant", # The specified Access Grant is invalid + "InvalidAccessGrantsLocation", # The specified Access Grants Location is invalid + "InvalidAddressingHeader", # You must specify the Anonymous role + "InvalidArgument", # Invalid Argument + "InvalidBucketName", # The specified bucket is not valid + "InvalidBucketState", # The request is not valid with the current state of the bucket + "InvalidClientTokenId", # The X.509 certificate or AWS access key ID provided does not exist in our records + "InvalidDigest", # The Content-MD5 you specified is not valid + "InvalidEncryptionAlgorithmError", # The encryption request you specified is not valid + "InvalidIamRole", # The specified IAM Role is invalid + "InvalidIdentityCenterInstance", # The specified identity center instance is invalid + "InvalidLocationConstraint", # The specified location constraint is not valid + "InvalidObjectState", # The operation is not valid for the current state of the object + "InvalidParameterCombination", # Parameters that must not be used together were used together + "InvalidParameterValue", # An invalid or out-of-range value was supplied for the input parameter + "InvalidPart", # One or more of the specified parts could not be found + "InvalidPartOrder", # The list of parts was not in ascending order + "InvalidPolicyDocument", # The content of the form does not meet the conditions specified in the policy document + "InvalidQueryParameter", # The AWS query parameter is malformed or does not adhere to AWS standards + "InvalidRange", # The requested range cannot be satisfied + "InvalidRequest", # Please use AWS4-HMAC-SHA256 + "InvalidResourcePolicy", # The specified Resource Policy is invalid + "InvalidStorageClass", # The storage class you specified is not valid + "InvalidTag", # The tag provided was not a valid tag + "InvalidTags", # Tag keys cannot start with AWS reserved prefix for system tags + "InvalidTargetBucketForLogging", # The target bucket for logging does not exist, is not owned by you, or does not have the appropriate grants for the log-delivery group + "InvalidToken", # The provided token is malformed or otherwise invalid + "InvalidURI", # Couldn't parse the specified URI + "KeyTooLongError", # Your key is too long + "LambdaInvalidResponse", # Lambda function returned an invalid response + "LambdaInvocationFailed", # Lambda function invocation failed + "LambdaNotFound", # The AWS Lambda function was not found + "LambdaPermissionError", # The caller is not authorized to invoke the Lambda function + "LambdaResponseNotReceived", # The Lambda function exited without successfully calling WriteGetObjectResponse + "LambdaRuntimeError", # The Lambda function failed during execution + "LambdaTimeout", # The Lambda function did not respond in the allowed time + "LimitExceededException", # A service limit was exceeded + "MalformedACLError", # The XML you provided was not well-formed or did not validate against our published schema + "MalformedPolicyDocument", # Policy document is malformed + "MalformedPOSTRequest", # The body of your POST request is not well-formed multipart/form-data + "MalformedQueryString", # The query string contains a malformed parameter + "MalformedXML", # This happens when the user sends malformed XML + "MaxMessageLengthExceeded", # Your request was too big + "MaxPostPreDataLengthExceededError", # Your POST request fields preceding the upload file were too large + "MetadataTooLarge", # Your metadata headers exceed the maximum allowed metadata size + "MethodNotAllowed", # The specified method is not allowed against this resource + "MissingAttachment", # A SOAP attachment was expected, but none were found + "MissingContentLength", # You must provide the Content-Length HTTP header + "MissingParameter", # A required parameter for the specified action is not supplied + "MissingRequestBodyError", # This happens when the user sends an empty XML document as a request + "MissingSecurityHeader", # Your request was missing a required header + "MultiRegionAccessPointAlreadyOwnedByYou", # You already have a Multi-Region Access Point with the same name + "MultiRegionAccessPointModifiedByAnotherRequest", # The action failed because another request is modifying the specified resource + "MultiRegionAccessPointNotReady", # The specified Multi-Region Access Point is not ready to be updated + "MultiRegionAccessPointSameBucketRegion", # The buckets used to create a Multi-Region Access Point cannot be in the same Region + "MultiRegionAccessPointUnsupportedRegion", # One of the buckets supplied to create the Multi-Region Access Point is in a Region that is not supported + "NoLoggingStatusForKey", # There is no such thing as a logging status subresource for a key + "NotImplemented", # A header you provided implies functionality that is not implemented + "NotSignedUp", # Your account is not signed up for the Amazon S3 service + "OperationAborted", # A conflicting conditional operation is currently in progress against this resource + "OptInRequired", # The AWS access key ID needs a subscription for the service + "PermanentRedirect", # The bucket you are attempting to access must be addressed using the specified endpoint + "PreconditionFailed", # At least one of the preconditions you specified did not hold + "Redirect", # Temporary redirect + "ReplicationParameterValueError", # Invalid parameter value in replication configuration + "RequestIsNotMultiPartContent", # Bucket POST must be of the enclosure-type multipart/form-data + "RequestTorrentOfBucketError", # Requesting the torrent file of a bucket is not permitted + "RestoreAlreadyInProgress", # Object restore is already in progress + "StsNotAuthorizedError", # An error occurred when calling the GetDataAccess operation: User is not authorized to perform sts:AssumeRole + "StsPackedPolicyTooLargeError", # An error occurred when calling the GetDataAccess operation: Serialized token too large for session + "StsValidationError", # STS validation error occurred + "TokenRefreshRequired", # The provided token must be refreshed + "TooManyBuckets", # You have attempted to create more buckets than allowed + "TooManyConfigurations", # You have attempted to create more Storage Lens group configurations than the 50 allowed + "TooManyElements", # The Element exceeds the maximum number of elements allowed within a logical operator + "TooManyTags", # The number of tags exceeds the limit of 50 tags + "UnexpectedContent", # This request does not support content + "UnresolvableGrantByEmailAddress", # The email address you provided does not match any account on record + "UnsupportedOperation", # A specified operation is not supported + "UnsupportedProtocol", # The specified protocol is not supported + "UserKeyMustBeSpecified", # The bucket POST must contain the specified field name + "ValidationError", # Validation errors might be returned and can occur for numerous reasons + "ValidationException", # A validation exception occurred } From 7115f337c6c04859e918b4977c3568092917d061 Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Mon, 23 Jun 2025 18:56:30 -0700 Subject: [PATCH 8/8] update --- metaflow/plugins/datatools/s3/s3op.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index e82863736a9..42a9bb34161 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -133,41 +133,29 @@ "AccessGrantsQuotaExceededError", # The access grants quota has been exceeded "AmbiguousGrantByEmailAddress", # The email address that you provided is associated with more than one account "BadDigest", # The Content-MD5 you specified did not match what we received - "BadRequest", # A bad request was made "BucketAlreadyExists", # The requested bucket name is not available "BucketAlreadyOwnedByYou", # The bucket that you tried to create already exists, and you own it "CrossLocationLoggingProhibited", # Cross-location logging not allowed - "DryRunOperation", # Dry run operation was requested "EntityTooLarge", # Your proposed upload is larger than the maximum allowed object size "EntityTooSmall", # Your proposed upload is smaller than the minimum allowed object size - "ExpiredObjectDeleteMarker", # The object delete marker you specified has expired - "HTTPClientError", # HTTP client error occurred - "IllegalLocationConstraintException", # The unspecified location constraint is incompatible for the Region - "IllegalVersioningConfigurationException", # Indicates that the versioning configuration specified in the request is invalid "IncompleteBody", # You did not provide the number of bytes specified by the Content-Length HTTP header "IncorrectNumberOfFilesInPostRequest", # POST requires exactly one file upload per request "InlineDataTooLarge", # Inline data exceeds the maximum allowed size - "InsufficientData", # The amount of data available to read is not known "InvalidAccessGrant", # The specified Access Grant is invalid "InvalidAccessGrantsLocation", # The specified Access Grants Location is invalid "InvalidAddressingHeader", # You must specify the Anonymous role "InvalidArgument", # Invalid Argument "InvalidBucketName", # The specified bucket is not valid "InvalidBucketState", # The request is not valid with the current state of the bucket - "InvalidClientTokenId", # The X.509 certificate or AWS access key ID provided does not exist in our records "InvalidDigest", # The Content-MD5 you specified is not valid "InvalidEncryptionAlgorithmError", # The encryption request you specified is not valid "InvalidIamRole", # The specified IAM Role is invalid "InvalidIdentityCenterInstance", # The specified identity center instance is invalid "InvalidLocationConstraint", # The specified location constraint is not valid "InvalidObjectState", # The operation is not valid for the current state of the object - "InvalidParameterCombination", # Parameters that must not be used together were used together - "InvalidParameterValue", # An invalid or out-of-range value was supplied for the input parameter "InvalidPart", # One or more of the specified parts could not be found "InvalidPartOrder", # The list of parts was not in ascending order "InvalidPolicyDocument", # The content of the form does not meet the conditions specified in the policy document - "InvalidQueryParameter", # The AWS query parameter is malformed or does not adhere to AWS standards - "InvalidRange", # The requested range cannot be satisfied "InvalidRequest", # Please use AWS4-HMAC-SHA256 "InvalidResourcePolicy", # The specified Resource Policy is invalid "InvalidStorageClass", # The storage class you specified is not valid @@ -184,11 +172,9 @@ "LambdaResponseNotReceived", # The Lambda function exited without successfully calling WriteGetObjectResponse "LambdaRuntimeError", # The Lambda function failed during execution "LambdaTimeout", # The Lambda function did not respond in the allowed time - "LimitExceededException", # A service limit was exceeded "MalformedACLError", # The XML you provided was not well-formed or did not validate against our published schema "MalformedPolicyDocument", # Policy document is malformed "MalformedPOSTRequest", # The body of your POST request is not well-formed multipart/form-data - "MalformedQueryString", # The query string contains a malformed parameter "MalformedXML", # This happens when the user sends malformed XML "MaxMessageLengthExceeded", # Your request was too big "MaxPostPreDataLengthExceededError", # Your POST request fields preceding the upload file were too large @@ -196,7 +182,6 @@ "MethodNotAllowed", # The specified method is not allowed against this resource "MissingAttachment", # A SOAP attachment was expected, but none were found "MissingContentLength", # You must provide the Content-Length HTTP header - "MissingParameter", # A required parameter for the specified action is not supplied "MissingRequestBodyError", # This happens when the user sends an empty XML document as a request "MissingSecurityHeader", # Your request was missing a required header "MultiRegionAccessPointAlreadyOwnedByYou", # You already have a Multi-Region Access Point with the same name @@ -208,29 +193,23 @@ "NotImplemented", # A header you provided implies functionality that is not implemented "NotSignedUp", # Your account is not signed up for the Amazon S3 service "OperationAborted", # A conflicting conditional operation is currently in progress against this resource - "OptInRequired", # The AWS access key ID needs a subscription for the service "PermanentRedirect", # The bucket you are attempting to access must be addressed using the specified endpoint "PreconditionFailed", # At least one of the preconditions you specified did not hold "Redirect", # Temporary redirect - "ReplicationParameterValueError", # Invalid parameter value in replication configuration "RequestIsNotMultiPartContent", # Bucket POST must be of the enclosure-type multipart/form-data "RequestTorrentOfBucketError", # Requesting the torrent file of a bucket is not permitted "RestoreAlreadyInProgress", # Object restore is already in progress "StsNotAuthorizedError", # An error occurred when calling the GetDataAccess operation: User is not authorized to perform sts:AssumeRole "StsPackedPolicyTooLargeError", # An error occurred when calling the GetDataAccess operation: Serialized token too large for session "StsValidationError", # STS validation error occurred - "TokenRefreshRequired", # The provided token must be refreshed "TooManyBuckets", # You have attempted to create more buckets than allowed "TooManyConfigurations", # You have attempted to create more Storage Lens group configurations than the 50 allowed "TooManyElements", # The Element exceeds the maximum number of elements allowed within a logical operator "TooManyTags", # The number of tags exceeds the limit of 50 tags "UnexpectedContent", # This request does not support content "UnresolvableGrantByEmailAddress", # The email address you provided does not match any account on record - "UnsupportedOperation", # A specified operation is not supported - "UnsupportedProtocol", # The specified protocol is not supported "UserKeyMustBeSpecified", # The bucket POST must contain the specified field name "ValidationError", # Validation errors might be returned and can occur for numerous reasons - "ValidationException", # A validation exception occurred }