Skip to content

[S3] More comprehensive error handling #2451

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion metaflow/plugins/datatools/s3/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def __init__(
run: Optional[Union[FlowSpec, "metaflow.Run"]] = None,
s3root: Optional[str] = None,
encryption: Optional[str] = S3_SERVER_SIDE_ENCRYPTION,
**kwargs
**kwargs,
):
if run:
# 1. use a (current) run ID with optional customizations
Expand Down Expand Up @@ -853,6 +853,8 @@ def _head():
raise MetaflowS3NotFound()
elif info["error"] == s3op.ERROR_URL_ACCESS_DENIED:
raise MetaflowS3AccessDenied()
elif info["error"] == s3op.ERROR_INVALID_REQUEST:
raise MetaflowS3Exception("Invalid request for %s" % s3url)
else:
raise MetaflowS3Exception("Got error: %d" % info["error"])
else:
Expand Down Expand Up @@ -1379,6 +1381,8 @@ def _one_boto_op(self, op, url, create_tmp_file=True):
raise MetaflowS3AccessDenied(url)
elif error_code == 416:
raise MetaflowS3InvalidRange(err)
elif error_code == 400:
raise MetaflowS3Exception(f"Invalid request for {url}: {str(err)}")
elif error_code == "NoSuchBucket":
raise MetaflowS3URLException("Specified S3 bucket doesn't exist.")
error = str(err)
Expand Down
205 changes: 174 additions & 31 deletions metaflow/plugins/datatools/s3/s3op.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,154 @@

S3Config = namedtuple("S3Config", "role session_vars client_params")

# Error code mappings for AWS S3 and general AWS services
# - S3 Error Responses: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
# - Boto3 Retries: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html

# Permission or access-related errors → 403 Forbidden
PERMISSION_ERRORS = {
"AccessDenied", # Access Denied
"AccessDeniedException", # AWS service error for access denied
"AccountProblem", # There is a problem with your AWS account that prevents the operation from completing successfully
"AllAccessDisabled", # All access to this Amazon S3 resource has been disabled
"AuthFailure", # AWS authentication failure
"ExpiredToken", # The provided token has expired
"InvalidAccessKeyId", # The AWS access key ID you provided does not exist in our records
"InvalidPayer", # All access to this object has been disabled
"InvalidSecurity", # The provided security credentials are not valid
"InvalidToken", # The provided token is malformed or otherwise invalid
"NotSignedUp", # Your account is not signed up for the Amazon S3 service
"RequestTimeTooSkewed", # The difference between the request time and the server's time is too large
"SignatureDoesNotMatch", # The request signature we calculated does not match the signature you provided
"UnauthorizedOperation", # AWS service error for unauthorized operation
"UnrecognizedClientException", # AWS service error for unrecognized client
}

# Not found errors → 404 Not Found
NOT_FOUND_ERRORS = {
"NoSuchAccessGrantsLocationError", # The specified access grants location does not exist
"NoSuchAccessGrantError", # The specified access grant does not exist
"NoSuchBucket", # The specified bucket does not exist
"NoSuchBucketPolicy", # The specified bucket does not have a bucket policy
"NoSuchKey", # The specified key does not exist
"NoSuchLifecycleConfiguration", # The lifecycle configuration does not exist
"NoSuchMultiRegionAccessPoint", # The specified Multi-Region Access Point does not exist
"NoSuchUpload", # The specified multipart upload does not exist
"NoSuchVersion", # Indicates that the version ID specified in the request does not match an existing version
"NoSuchWebsiteConfiguration", # The specified bucket does not have a website configuration
"ReplicationConfigurationNotFoundError", # The replication configuration was not found
"ServerSideEncryptionConfigurationNotFoundError", # The server-side encryption configuration was not found
}

# Range/invalid byte-range errors → 416
RANGE_ERRORS = {
"InvalidRange", # The requested range cannot be satisfied
}

# Server-side throttling, timeout, or transient errors → 503
TRANSIENT_ERRORS = {
"BandwidthLimitExceeded", # Request bandwidth limit has been exceeded
"InternalError", # We encountered an internal error. Please try again
"PriorRequestNotComplete", # Your previous request to create the named bucket succeeded and you already own it
"RequestTimeout", # Your socket connection to the server was not read from or written to within the timeout period
"ServiceUnavailable", # Reduce your request rate. Service temporarily unavailable
"SlowDown", # Reduce your request rate
"TemporaryRedirect", # You are being redirected to the bucket while DNS updates
}

# Fatal/unrecoverable → 400
FATAL_ERRORS = {
"AccessControlListNotSupported", # The bucket does not allow ACLs
"AccessGrantAlreadyExists", # The specified access grant already exists
"AccessGrantsInstanceAlreadyExists", # Access Grants Instance already exists
"AccessGrantsInstanceNotEmptyError", # Please clean up locations before deleting the access grants instance
"AccessGrantsInstanceNotExistsError", # Access Grants Instance does not exist
"AccessGrantsInstanceResourcePolicyNotExists", # Access Grants Instance Resource Policy does not exist
"AccessGrantsLocationAlreadyExistsError", # The specified access grants location already exists
"AccessGrantsLocationNotEmptyError", # Please clean up access grants before deleting access grants location
"AccessGrantsLocationsQuotaExceededError", # The access grants location quota has been exceeded
"AccessGrantsQuotaExceededError", # The access grants quota has been exceeded
"AmbiguousGrantByEmailAddress", # The email address that you provided is associated with more than one account
"BadDigest", # The Content-MD5 you specified did not match what we received
"BucketAlreadyExists", # The requested bucket name is not available
"BucketAlreadyOwnedByYou", # The bucket that you tried to create already exists, and you own it
"CrossLocationLoggingProhibited", # Cross-location logging not allowed
"EntityTooLarge", # Your proposed upload is larger than the maximum allowed object size
"EntityTooSmall", # Your proposed upload is smaller than the minimum allowed object size
"IncompleteBody", # You did not provide the number of bytes specified by the Content-Length HTTP header
"IncorrectNumberOfFilesInPostRequest", # POST requires exactly one file upload per request
"InlineDataTooLarge", # Inline data exceeds the maximum allowed size
"InvalidAccessGrant", # The specified Access Grant is invalid
"InvalidAccessGrantsLocation", # The specified Access Grants Location is invalid
"InvalidAddressingHeader", # You must specify the Anonymous role
"InvalidArgument", # Invalid Argument
"InvalidBucketName", # The specified bucket is not valid
"InvalidBucketState", # The request is not valid with the current state of the bucket
"InvalidDigest", # The Content-MD5 you specified is not valid
"InvalidEncryptionAlgorithmError", # The encryption request you specified is not valid
"InvalidIamRole", # The specified IAM Role is invalid
"InvalidIdentityCenterInstance", # The specified identity center instance is invalid
"InvalidLocationConstraint", # The specified location constraint is not valid
"InvalidObjectState", # The operation is not valid for the current state of the object
"InvalidPart", # One or more of the specified parts could not be found
"InvalidPartOrder", # The list of parts was not in ascending order
"InvalidPolicyDocument", # The content of the form does not meet the conditions specified in the policy document
"InvalidRequest", # Please use AWS4-HMAC-SHA256
"InvalidResourcePolicy", # The specified Resource Policy is invalid
"InvalidStorageClass", # The storage class you specified is not valid
"InvalidTag", # The tag provided was not a valid tag
"InvalidTags", # Tag keys cannot start with AWS reserved prefix for system tags
"InvalidTargetBucketForLogging", # The target bucket for logging does not exist, is not owned by you, or does not have the appropriate grants for the log-delivery group
"InvalidToken", # The provided token is malformed or otherwise invalid
"InvalidURI", # Couldn't parse the specified URI
"KeyTooLongError", # Your key is too long
"LambdaInvalidResponse", # Lambda function returned an invalid response
"LambdaInvocationFailed", # Lambda function invocation failed
"LambdaNotFound", # The AWS Lambda function was not found
"LambdaPermissionError", # The caller is not authorized to invoke the Lambda function
"LambdaResponseNotReceived", # The Lambda function exited without successfully calling WriteGetObjectResponse
"LambdaRuntimeError", # The Lambda function failed during execution
"LambdaTimeout", # The Lambda function did not respond in the allowed time
"MalformedACLError", # The XML you provided was not well-formed or did not validate against our published schema
"MalformedPolicyDocument", # Policy document is malformed
"MalformedPOSTRequest", # The body of your POST request is not well-formed multipart/form-data
"MalformedXML", # This happens when the user sends malformed XML
"MaxMessageLengthExceeded", # Your request was too big
"MaxPostPreDataLengthExceededError", # Your POST request fields preceding the upload file were too large
"MetadataTooLarge", # Your metadata headers exceed the maximum allowed metadata size
"MethodNotAllowed", # The specified method is not allowed against this resource
"MissingAttachment", # A SOAP attachment was expected, but none were found
"MissingContentLength", # You must provide the Content-Length HTTP header
"MissingRequestBodyError", # This happens when the user sends an empty XML document as a request
"MissingSecurityHeader", # Your request was missing a required header
"MultiRegionAccessPointAlreadyOwnedByYou", # You already have a Multi-Region Access Point with the same name
"MultiRegionAccessPointModifiedByAnotherRequest", # The action failed because another request is modifying the specified resource
"MultiRegionAccessPointNotReady", # The specified Multi-Region Access Point is not ready to be updated
"MultiRegionAccessPointSameBucketRegion", # The buckets used to create a Multi-Region Access Point cannot be in the same Region
"MultiRegionAccessPointUnsupportedRegion", # One of the buckets supplied to create the Multi-Region Access Point is in a Region that is not supported
"NoLoggingStatusForKey", # There is no such thing as a logging status subresource for a key
"NotImplemented", # A header you provided implies functionality that is not implemented
"NotSignedUp", # Your account is not signed up for the Amazon S3 service
"OperationAborted", # A conflicting conditional operation is currently in progress against this resource
"PermanentRedirect", # The bucket you are attempting to access must be addressed using the specified endpoint
"PreconditionFailed", # At least one of the preconditions you specified did not hold
"Redirect", # Temporary redirect
"RequestIsNotMultiPartContent", # Bucket POST must be of the enclosure-type multipart/form-data
"RequestTorrentOfBucketError", # Requesting the torrent file of a bucket is not permitted
"RestoreAlreadyInProgress", # Object restore is already in progress
"StsNotAuthorizedError", # An error occurred when calling the GetDataAccess operation: User is not authorized to perform sts:AssumeRole
"StsPackedPolicyTooLargeError", # An error occurred when calling the GetDataAccess operation: Serialized token too large for session
"StsValidationError", # STS validation error occurred
"TooManyBuckets", # You have attempted to create more buckets than allowed
"TooManyConfigurations", # You have attempted to create more Storage Lens group configurations than the 50 allowed
"TooManyElements", # The Element exceeds the maximum number of elements allowed within a logical operator
"TooManyTags", # The number of tags exceeds the limit of 50 tags
"UnexpectedContent", # This request does not support content
"UnresolvableGrantByEmailAddress", # The email address you provided does not match any account on record
"UserKeyMustBeSpecified", # The bucket POST must contain the specified field name
"ValidationError", # Validation errors might be returned and can occur for numerous reasons
}


class S3Url(object):
def __init__(
Expand Down Expand Up @@ -107,6 +255,7 @@ def __str__(self):
ERROR_INVALID_RANGE = 11
ERROR_TRANSIENT = 12
ERROR_OUT_OF_DISK_SPACE = 13
ERROR_INVALID_REQUEST = 14


def format_result_line(idx, prefix, url="", local=""):
Expand All @@ -129,37 +278,20 @@ def normalize_client_error(err):
try:
return int(error_code)
except ValueError:
if error_code in ("AccessDenied", "AllAccessDisabled", "InvalidAccessKeyId"):
return 403
if error_code in ("NoSuchKey", "NoSuchBucket"):
return 404
if error_code == "InvalidRange":
return 416
# We "normalize" retriable server errors to 503. These are also considered
# transient by boto3 (see:
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html)
if error_code in (
"SlowDown",
"RequestTimeout",
"RequestTimeoutException",
"PriorRequestNotComplete",
"ConnectionError",
"HTTPClientError",
"Throttling",
"ThrottlingException",
"ThrottledException",
"RequestThrottledException",
"TooManyRequestsException",
"ProvisionedThroughputExceededException",
"TransactionInProgressException",
"RequestLimitExceeded",
"BandwidthLimitExceeded",
"LimitExceededException",
"RequestThrottled",
"EC2ThrottledException",
"InternalError",
):
return 503
pass

if error_code in PERMISSION_ERRORS:
return 403
elif error_code in NOT_FOUND_ERRORS:
return 404
elif error_code in RANGE_ERRORS:
return 416
elif error_code in FATAL_ERRORS:
return 400
elif error_code in TRANSIENT_ERRORS:
return 503

# Default: return original string code if unmapped
return error_code


Expand Down Expand Up @@ -199,6 +331,8 @@ def op_info(url):
to_return = {"error": ERROR_URL_ACCESS_DENIED, "raise_error": err}
elif error_code == 416:
to_return = {"error": ERROR_INVALID_RANGE, "raise_error": err}
elif error_code == 400:
to_return = {"error": ERROR_INVALID_REQUEST, "raise_error": err}
elif error_code in (500, 502, 503, 504):
to_return = {"error": ERROR_TRANSIENT, "raise_error": err}
else:
Expand Down Expand Up @@ -392,6 +526,9 @@ def handle_client_error(err, idx, result_file):
elif error_code == 403:
result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED))
result_file.flush()
elif error_code == 400:
result_file.write("%d %d\n" % (idx, -ERROR_INVALID_REQUEST))
result_file.flush()
elif error_code == 503:
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
result_file.flush()
Expand Down Expand Up @@ -564,6 +701,8 @@ def get_info(self, url):
return False, url, ERROR_URL_NOT_FOUND
elif error_code == 403:
return False, url, ERROR_URL_ACCESS_DENIED
elif error_code == 400:
return False, url, ERROR_INVALID_REQUEST
# Transient errors are going to be retried by the aws_retry decorator
else:
raise
Expand Down Expand Up @@ -612,6 +751,8 @@ def list_prefix(self, prefix_url, delimiter=""):
return False, prefix_url, ERROR_URL_NOT_FOUND
elif error_code == 403:
return False, prefix_url, ERROR_URL_ACCESS_DENIED
elif error_code == 400:
return False, prefix_url, ERROR_INVALID_REQUEST
# Transient errors are going to be retried by the aws_retry decorator
else:
raise
Expand Down Expand Up @@ -655,6 +796,8 @@ def exit(exit_code, url):
msg = "Transient error for url: %s" % url
elif exit_code == ERROR_OUT_OF_DISK_SPACE:
msg = "Out of disk space when downloading URL: %s" % url
elif exit_code == ERROR_INVALID_REQUEST:
msg = "Invalid request for URL: %s" % url
else:
msg = "Unknown error"
print("s3op failed:\n%s" % msg, file=sys.stderr)
Expand Down
Loading