diff --git a/metaflow/plugins/datatools/s3/s3.py b/metaflow/plugins/datatools/s3/s3.py index 20b5d5fc3cc..f564f86f997 100644 --- a/metaflow/plugins/datatools/s3/s3.py +++ b/metaflow/plugins/datatools/s3/s3.py @@ -525,7 +525,7 @@ def __init__( run: Optional[Union[FlowSpec, "metaflow.Run"]] = None, s3root: Optional[str] = None, encryption: Optional[str] = S3_SERVER_SIDE_ENCRYPTION, - **kwargs + **kwargs, ): if run: # 1. use a (current) run ID with optional customizations @@ -853,6 +853,8 @@ def _head(): raise MetaflowS3NotFound() elif info["error"] == s3op.ERROR_URL_ACCESS_DENIED: raise MetaflowS3AccessDenied() + elif info["error"] == s3op.ERROR_INVALID_REQUEST: + raise MetaflowS3Exception("Invalid request for %s" % s3url) else: raise MetaflowS3Exception("Got error: %d" % info["error"]) else: @@ -1379,6 +1381,8 @@ def _one_boto_op(self, op, url, create_tmp_file=True): raise MetaflowS3AccessDenied(url) elif error_code == 416: raise MetaflowS3InvalidRange(err) + elif error_code == 400: + raise MetaflowS3Exception(f"Invalid request for {url}: {str(err)}") elif error_code == "NoSuchBucket": raise MetaflowS3URLException("Specified S3 bucket doesn't exist.") error = str(err) diff --git a/metaflow/plugins/datatools/s3/s3op.py b/metaflow/plugins/datatools/s3/s3op.py index 44c5a1f1d3b..42a9bb34161 100644 --- a/metaflow/plugins/datatools/s3/s3op.py +++ b/metaflow/plugins/datatools/s3/s3op.py @@ -64,6 +64,154 @@ S3Config = namedtuple("S3Config", "role session_vars client_params") +# Error code mappings for AWS S3 and general AWS services +# - S3 Error Responses: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html +# - Boto3 Retries: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html + +# Permission or access-related errors → 403 Forbidden +PERMISSION_ERRORS = { + "AccessDenied", # Access Denied + "AccessDeniedException", # AWS service error for access denied + "AccountProblem", # There is a problem with your AWS account that prevents the operation from completing successfully + "AllAccessDisabled", # All access to this Amazon S3 resource has been disabled + "AuthFailure", # AWS authentication failure + "ExpiredToken", # The provided token has expired + "InvalidAccessKeyId", # The AWS access key ID you provided does not exist in our records + "InvalidPayer", # All access to this object has been disabled + "InvalidSecurity", # The provided security credentials are not valid + "InvalidToken", # The provided token is malformed or otherwise invalid + "NotSignedUp", # Your account is not signed up for the Amazon S3 service + "RequestTimeTooSkewed", # The difference between the request time and the server's time is too large + "SignatureDoesNotMatch", # The request signature we calculated does not match the signature you provided + "UnauthorizedOperation", # AWS service error for unauthorized operation + "UnrecognizedClientException", # AWS service error for unrecognized client +} + +# Not found errors → 404 Not Found +NOT_FOUND_ERRORS = { + "NoSuchAccessGrantsLocationError", # The specified access grants location does not exist + "NoSuchAccessGrantError", # The specified access grant does not exist + "NoSuchBucket", # The specified bucket does not exist + "NoSuchBucketPolicy", # The specified bucket does not have a bucket policy + "NoSuchKey", # The specified key does not exist + "NoSuchLifecycleConfiguration", # The lifecycle configuration does not exist + "NoSuchMultiRegionAccessPoint", # The specified Multi-Region Access Point does not exist + "NoSuchUpload", # The specified multipart upload does not exist + "NoSuchVersion", # Indicates that the version ID specified in the request does not match an existing version + "NoSuchWebsiteConfiguration", # The specified bucket does not have a website configuration + "ReplicationConfigurationNotFoundError", # The replication configuration was not found + "ServerSideEncryptionConfigurationNotFoundError", # The server-side encryption configuration was not found +} + +# Range/invalid byte-range errors → 416 +RANGE_ERRORS = { + "InvalidRange", # The requested range cannot be satisfied +} + +# Server-side throttling, timeout, or transient errors → 503 +TRANSIENT_ERRORS = { + "BandwidthLimitExceeded", # Request bandwidth limit has been exceeded + "InternalError", # We encountered an internal error. Please try again + "PriorRequestNotComplete", # Your previous request to create the named bucket succeeded and you already own it + "RequestTimeout", # Your socket connection to the server was not read from or written to within the timeout period + "ServiceUnavailable", # Reduce your request rate. Service temporarily unavailable + "SlowDown", # Reduce your request rate + "TemporaryRedirect", # You are being redirected to the bucket while DNS updates +} + +# Fatal/unrecoverable → 400 +FATAL_ERRORS = { + "AccessControlListNotSupported", # The bucket does not allow ACLs + "AccessGrantAlreadyExists", # The specified access grant already exists + "AccessGrantsInstanceAlreadyExists", # Access Grants Instance already exists + "AccessGrantsInstanceNotEmptyError", # Please clean up locations before deleting the access grants instance + "AccessGrantsInstanceNotExistsError", # Access Grants Instance does not exist + "AccessGrantsInstanceResourcePolicyNotExists", # Access Grants Instance Resource Policy does not exist + "AccessGrantsLocationAlreadyExistsError", # The specified access grants location already exists + "AccessGrantsLocationNotEmptyError", # Please clean up access grants before deleting access grants location + "AccessGrantsLocationsQuotaExceededError", # The access grants location quota has been exceeded + "AccessGrantsQuotaExceededError", # The access grants quota has been exceeded + "AmbiguousGrantByEmailAddress", # The email address that you provided is associated with more than one account + "BadDigest", # The Content-MD5 you specified did not match what we received + "BucketAlreadyExists", # The requested bucket name is not available + "BucketAlreadyOwnedByYou", # The bucket that you tried to create already exists, and you own it + "CrossLocationLoggingProhibited", # Cross-location logging not allowed + "EntityTooLarge", # Your proposed upload is larger than the maximum allowed object size + "EntityTooSmall", # Your proposed upload is smaller than the minimum allowed object size + "IncompleteBody", # You did not provide the number of bytes specified by the Content-Length HTTP header + "IncorrectNumberOfFilesInPostRequest", # POST requires exactly one file upload per request + "InlineDataTooLarge", # Inline data exceeds the maximum allowed size + "InvalidAccessGrant", # The specified Access Grant is invalid + "InvalidAccessGrantsLocation", # The specified Access Grants Location is invalid + "InvalidAddressingHeader", # You must specify the Anonymous role + "InvalidArgument", # Invalid Argument + "InvalidBucketName", # The specified bucket is not valid + "InvalidBucketState", # The request is not valid with the current state of the bucket + "InvalidDigest", # The Content-MD5 you specified is not valid + "InvalidEncryptionAlgorithmError", # The encryption request you specified is not valid + "InvalidIamRole", # The specified IAM Role is invalid + "InvalidIdentityCenterInstance", # The specified identity center instance is invalid + "InvalidLocationConstraint", # The specified location constraint is not valid + "InvalidObjectState", # The operation is not valid for the current state of the object + "InvalidPart", # One or more of the specified parts could not be found + "InvalidPartOrder", # The list of parts was not in ascending order + "InvalidPolicyDocument", # The content of the form does not meet the conditions specified in the policy document + "InvalidRequest", # Please use AWS4-HMAC-SHA256 + "InvalidResourcePolicy", # The specified Resource Policy is invalid + "InvalidStorageClass", # The storage class you specified is not valid + "InvalidTag", # The tag provided was not a valid tag + "InvalidTags", # Tag keys cannot start with AWS reserved prefix for system tags + "InvalidTargetBucketForLogging", # The target bucket for logging does not exist, is not owned by you, or does not have the appropriate grants for the log-delivery group + "InvalidToken", # The provided token is malformed or otherwise invalid + "InvalidURI", # Couldn't parse the specified URI + "KeyTooLongError", # Your key is too long + "LambdaInvalidResponse", # Lambda function returned an invalid response + "LambdaInvocationFailed", # Lambda function invocation failed + "LambdaNotFound", # The AWS Lambda function was not found + "LambdaPermissionError", # The caller is not authorized to invoke the Lambda function + "LambdaResponseNotReceived", # The Lambda function exited without successfully calling WriteGetObjectResponse + "LambdaRuntimeError", # The Lambda function failed during execution + "LambdaTimeout", # The Lambda function did not respond in the allowed time + "MalformedACLError", # The XML you provided was not well-formed or did not validate against our published schema + "MalformedPolicyDocument", # Policy document is malformed + "MalformedPOSTRequest", # The body of your POST request is not well-formed multipart/form-data + "MalformedXML", # This happens when the user sends malformed XML + "MaxMessageLengthExceeded", # Your request was too big + "MaxPostPreDataLengthExceededError", # Your POST request fields preceding the upload file were too large + "MetadataTooLarge", # Your metadata headers exceed the maximum allowed metadata size + "MethodNotAllowed", # The specified method is not allowed against this resource + "MissingAttachment", # A SOAP attachment was expected, but none were found + "MissingContentLength", # You must provide the Content-Length HTTP header + "MissingRequestBodyError", # This happens when the user sends an empty XML document as a request + "MissingSecurityHeader", # Your request was missing a required header + "MultiRegionAccessPointAlreadyOwnedByYou", # You already have a Multi-Region Access Point with the same name + "MultiRegionAccessPointModifiedByAnotherRequest", # The action failed because another request is modifying the specified resource + "MultiRegionAccessPointNotReady", # The specified Multi-Region Access Point is not ready to be updated + "MultiRegionAccessPointSameBucketRegion", # The buckets used to create a Multi-Region Access Point cannot be in the same Region + "MultiRegionAccessPointUnsupportedRegion", # One of the buckets supplied to create the Multi-Region Access Point is in a Region that is not supported + "NoLoggingStatusForKey", # There is no such thing as a logging status subresource for a key + "NotImplemented", # A header you provided implies functionality that is not implemented + "NotSignedUp", # Your account is not signed up for the Amazon S3 service + "OperationAborted", # A conflicting conditional operation is currently in progress against this resource + "PermanentRedirect", # The bucket you are attempting to access must be addressed using the specified endpoint + "PreconditionFailed", # At least one of the preconditions you specified did not hold + "Redirect", # Temporary redirect + "RequestIsNotMultiPartContent", # Bucket POST must be of the enclosure-type multipart/form-data + "RequestTorrentOfBucketError", # Requesting the torrent file of a bucket is not permitted + "RestoreAlreadyInProgress", # Object restore is already in progress + "StsNotAuthorizedError", # An error occurred when calling the GetDataAccess operation: User is not authorized to perform sts:AssumeRole + "StsPackedPolicyTooLargeError", # An error occurred when calling the GetDataAccess operation: Serialized token too large for session + "StsValidationError", # STS validation error occurred + "TooManyBuckets", # You have attempted to create more buckets than allowed + "TooManyConfigurations", # You have attempted to create more Storage Lens group configurations than the 50 allowed + "TooManyElements", # The Element exceeds the maximum number of elements allowed within a logical operator + "TooManyTags", # The number of tags exceeds the limit of 50 tags + "UnexpectedContent", # This request does not support content + "UnresolvableGrantByEmailAddress", # The email address you provided does not match any account on record + "UserKeyMustBeSpecified", # The bucket POST must contain the specified field name + "ValidationError", # Validation errors might be returned and can occur for numerous reasons +} + class S3Url(object): def __init__( @@ -107,6 +255,7 @@ def __str__(self): ERROR_INVALID_RANGE = 11 ERROR_TRANSIENT = 12 ERROR_OUT_OF_DISK_SPACE = 13 +ERROR_INVALID_REQUEST = 14 def format_result_line(idx, prefix, url="", local=""): @@ -129,37 +278,20 @@ def normalize_client_error(err): try: return int(error_code) except ValueError: - if error_code in ("AccessDenied", "AllAccessDisabled", "InvalidAccessKeyId"): - return 403 - if error_code in ("NoSuchKey", "NoSuchBucket"): - return 404 - if error_code == "InvalidRange": - return 416 - # We "normalize" retriable server errors to 503. These are also considered - # transient by boto3 (see: - # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html) - if error_code in ( - "SlowDown", - "RequestTimeout", - "RequestTimeoutException", - "PriorRequestNotComplete", - "ConnectionError", - "HTTPClientError", - "Throttling", - "ThrottlingException", - "ThrottledException", - "RequestThrottledException", - "TooManyRequestsException", - "ProvisionedThroughputExceededException", - "TransactionInProgressException", - "RequestLimitExceeded", - "BandwidthLimitExceeded", - "LimitExceededException", - "RequestThrottled", - "EC2ThrottledException", - "InternalError", - ): - return 503 + pass + + if error_code in PERMISSION_ERRORS: + return 403 + elif error_code in NOT_FOUND_ERRORS: + return 404 + elif error_code in RANGE_ERRORS: + return 416 + elif error_code in FATAL_ERRORS: + return 400 + elif error_code in TRANSIENT_ERRORS: + return 503 + + # Default: return original string code if unmapped return error_code @@ -199,6 +331,8 @@ def op_info(url): to_return = {"error": ERROR_URL_ACCESS_DENIED, "raise_error": err} elif error_code == 416: to_return = {"error": ERROR_INVALID_RANGE, "raise_error": err} + elif error_code == 400: + to_return = {"error": ERROR_INVALID_REQUEST, "raise_error": err} elif error_code in (500, 502, 503, 504): to_return = {"error": ERROR_TRANSIENT, "raise_error": err} else: @@ -392,6 +526,9 @@ def handle_client_error(err, idx, result_file): elif error_code == 403: result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED)) result_file.flush() + elif error_code == 400: + result_file.write("%d %d\n" % (idx, -ERROR_INVALID_REQUEST)) + result_file.flush() elif error_code == 503: result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT)) result_file.flush() @@ -564,6 +701,8 @@ def get_info(self, url): return False, url, ERROR_URL_NOT_FOUND elif error_code == 403: return False, url, ERROR_URL_ACCESS_DENIED + elif error_code == 400: + return False, url, ERROR_INVALID_REQUEST # Transient errors are going to be retried by the aws_retry decorator else: raise @@ -612,6 +751,8 @@ def list_prefix(self, prefix_url, delimiter=""): return False, prefix_url, ERROR_URL_NOT_FOUND elif error_code == 403: return False, prefix_url, ERROR_URL_ACCESS_DENIED + elif error_code == 400: + return False, prefix_url, ERROR_INVALID_REQUEST # Transient errors are going to be retried by the aws_retry decorator else: raise @@ -655,6 +796,8 @@ def exit(exit_code, url): msg = "Transient error for url: %s" % url elif exit_code == ERROR_OUT_OF_DISK_SPACE: msg = "Out of disk space when downloading URL: %s" % url + elif exit_code == ERROR_INVALID_REQUEST: + msg = "Invalid request for URL: %s" % url else: msg = "Unknown error" print("s3op failed:\n%s" % msg, file=sys.stderr)