@@ -120,22 +120,99 @@ def self.get_md5_from_s3(bucket, url, path, aws_access_key_id, aws_secret_access
120
120
get_digests_from_s3 ( bucket , url , path , aws_access_key_id , aws_secret_access_key , token , region ) [ "md5" ]
121
121
end
122
122
123
- def self . get_digests_from_s3 ( bucket , url , path , aws_access_key_id , aws_secret_access_key , token , region )
124
- response = do_request ( "HEAD" , url , bucket , path , aws_access_key_id , aws_secret_access_key , token , region )
125
123
126
- etag = response . headers [ :etag ] . gsub ( '"' , '' )
127
- digest = response . headers [ :x_amz_meta_digest ]
124
+ def self . get_digests_from_headers ( headers )
125
+ etag = headers [ :etag ] . gsub ( '"' , '' )
126
+ digest = headers [ :x_amz_meta_digest ]
128
127
digests = digest . nil? ? { } : Hash [ digest . split ( "," ) . map { |a | a . split ( "=" ) } ]
129
-
130
128
return { "md5" => etag } . merge ( digests )
131
129
end
132
130
133
- def self . get_from_s3 ( bucket , url , path , aws_access_key_id , aws_secret_access_key , token , region = nil )
131
+ def self . get_digests_from_s3 ( bucket , path , aws_access_key_id , aws_secret_access_key , token , timeout = 300 , open_timeout = 10 , retries = 5 )
132
+ now , auth_string = get_s3_auth ( "HEAD" , bucket , path , aws_access_key_id , aws_secret_access_key , token )
133
+ max_tries = retries + 1
134
+ headers = build_headers ( now , auth_string , token )
135
+ saved_exception = nil
136
+
137
+ while ( max_tries > 0 )
138
+ begin
139
+
140
+ response = RestClient . head ( 'https://%s.s3.amazonaws.com%s' % [ bucket , path ] , headers )
141
+
142
+ etag = response . headers [ :etag ] . gsub ( '"' , '' )
143
+ digest = response . headers [ :x_amz_meta_digest ]
144
+ digests = digest . nil? ? { } : Hash [ digest . split ( "," ) . map { |a | a . split ( "=" ) } ]
145
+
146
+ return { "md5" => etag } . merge ( digests )
147
+
148
+ rescue => e
149
+ max_tries = max_tries - 1
150
+ saved_exception = e
151
+ end
152
+ end
153
+ raise saved_exception
154
+ end
155
+
156
+ def self . validate_download_checksum ( response )
157
+ # Default to not checking md5 sum of downloaded objects
158
+ # per http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
159
+ # If an object is created by either the Multipart Upload or Part Copy operation,
160
+ # the ETag is not an MD5 digest, regardless of the method of encryption
161
+ # however, if present, x-amz-meta-digest will contain the digest, so
162
+ # try if we see enough information and verify_md5 is set.
163
+ if response . headers [ :x_amz_meta_digest ]
164
+ return self . verify_md5_checksum ( response . headers [ :x_amz_meta_digest_md5 ] . gsub ( '"' , '' ) , response . file . path )
165
+ else
166
+ server_side_encryption_customer_algorithm = response . headers [ :x_amz_server_side_encryption_customer_algorithm ]
167
+ server_side_encryption = response . headers [ :x_amz_server_side_encryption ]
168
+ if server_side_encryption_customer_algorithm . nil? and server_side_encryption != "aws:kms"
169
+ return self . verify_md5_checksum ( response . headers [ :etag ] . gsub ( '"' , '' ) , response . file . path )
170
+ else
171
+ # If we do not have the x-amz-meta-digest-md5 header, we
172
+ # cannot validate objects encrypted with SSE-C or SSE-KMS,
173
+ # because the ETag will not be the MD5 digest. Assume it is
174
+ # valid in those cases.
175
+ return true
176
+ end
177
+ end
178
+ end
179
+
180
+
181
+ def self . get_from_s3 ( bucket , url , path , aws_access_key_id , aws_secret_access_key , token , verify_md5 = false , region = nil )
134
182
response = nil
135
183
retries = 5
136
184
for attempts in 0 ..retries
137
185
begin
138
186
response = do_request ( "GET" , url , bucket , path , aws_access_key_id , aws_secret_access_key , token , region )
187
+
188
+ # check the length of the downloaded object,
189
+ # make sure we didn't get nailed by
190
+ # a quirk in Net::HTTP class from the Ruby standard library.
191
+ # Net::HTTP has the behavior (and I would call this a bug) that if the
192
+ # connection gets reset in the middle of transferring the response,
193
+ # it silently truncates the response back to the caller without throwing an exception.
194
+ # ** See https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb#L291
195
+ # and https://github.com/ruby/ruby/blob/trunk/lib/net/protocol.rb#L99 .
196
+ # It attempts to read up to Content-Length worth of bytes, but if hits an early EOF,
197
+ # it just returns without throwing an exception (the ignore_eof flag).
198
+
199
+ length = response . headers [ :content_length ] . to_i ( )
200
+ if not length . nil? and response . file . size ( ) != length
201
+ raise "Downloaded object size (#{ response . file . size ( ) } ) does not match expected content_length (#{ length } )"
202
+ end
203
+
204
+ # default to not checking md5 sum of downloaded objects
205
+ # per http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
206
+ # If an object is created by either the Multipart Upload or Part Copy operation,
207
+ # the ETag is not an MD5 digest, regardless of the method of encryption
208
+ # however, if present, x-amz-meta-digest will contain the digest, so
209
+ # try if we see enough information and verify_md5 is set.
210
+ if verify_md5
211
+ if not self . validate_download_checksum ( response )
212
+ raise "Downloaded object has an md5sum which differs from the expected value provided by S3"
213
+ end
214
+ end
215
+
139
216
return response
140
217
# break
141
218
rescue client ::MovedPermanently , client ::Found , client ::TemporaryRedirect => e
@@ -159,6 +236,25 @@ def self.get_from_s3(bucket, url, path, aws_access_key_id, aws_secret_access_key
159
236
end
160
237
end
161
238
239
+ def self . get_s3_auth ( method , bucket , path , aws_access_key_id , aws_secret_access_key , token )
240
+ now = Time . now ( ) . utc . strftime ( '%a, %d %b %Y %H:%M:%S GMT' )
241
+ string_to_sign = "#{ method } \n \n \n %s\n " % [ now ]
242
+
243
+ if token
244
+ string_to_sign += "x-amz-security-token:#{ token } \n "
245
+ end
246
+
247
+ string_to_sign += "/%s%s" % [ bucket , path ]
248
+
249
+ digest = digest = OpenSSL ::Digest ::Digest . new ( 'sha1' )
250
+ signed = OpenSSL ::HMAC . digest ( digest , aws_secret_access_key , string_to_sign )
251
+ signed_base64 = Base64 . encode64 ( signed )
252
+
253
+ auth_string = 'AWS %s:%s' % [ aws_access_key_id , signed_base64 ]
254
+
255
+ [ now , auth_string ]
256
+ end
257
+
162
258
def self . aes256_decrypt ( key , file )
163
259
Chef ::Log . debug ( "Decrypting S3 file." )
164
260
key = key . strip
0 commit comments