Skip to content

Commit

Permalink
Merge pull request girder#901 from girder/range-headers-on-download
Browse files Browse the repository at this point in the history
Support range downloading
  • Loading branch information
zachmullen committed Jul 27, 2015
2 parents 2eb3d05 + 8aded0c commit 6a85d97
Show file tree
Hide file tree
Showing 13 changed files with 216 additions and 55 deletions.
8 changes: 7 additions & 1 deletion clients/web/src/views/body/AssetstoresView.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,13 @@ girder.views.AssetstoresView = girder.View.extend({
timeout: 4000
});
this.collection.fetch({}, true);
}, this).save();
}, this).off('g:error').on('g:error', function (err) {
girder.events.trigger('g:alert', {
icon: 'cancel',
text: err.responseJSON.message,
type: 'danger'
});
}).save();
},

deleteAssetstore: function (evt) {
Expand Down
5 changes: 5 additions & 0 deletions girder/api/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,16 @@ def endpointDecorator(self, *args, **kwargs):
try:
val = fun(self, args, kwargs)

# If this is a partial response, we set the status appropriately
if 'Content-Range' in cherrypy.response.headers:
cherrypy.response.status = 206

if isinstance(val, types.FunctionType):
# If the endpoint returned a function, we assume it's a
# generator function for a streaming response.
cherrypy.response.stream = True
return val()

if isinstance(val, cherrypy.lib.file_generator):
# Don't do any post-processing of static files
return val
Expand Down
25 changes: 23 additions & 2 deletions girder/api/v1/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,35 @@ def download(self, file, params):
Defers to the underlying assetstore adapter to stream a file out.
Requires read permission on the folder that contains the file's item.
"""
offset = int(params.get('offset', 0))
return self.model('file').download(file, offset)
rangeHeader = cherrypy.lib.httputil.get_ranges(
cherrypy.request.headers.get('Range'), file.get('size', 0))

# The HTTP Range header takes precedence over query params
if rangeHeader and len(rangeHeader):
# Currently we only support a single range.
offset, endByte = rangeHeader[0]
else:
offset = int(params.get('offset', 0))
endByte = params.get('endByte')

if endByte is not None:
endByte = int(endByte)

return self.model('file').download(file, offset, endByte=endByte)
download.cookieAuth = True
download.description = (
Description('Download a file.')
.notes('This endpoint also accepts the HTTP "Range" header for partial '
'file downloads.')
.param('id', 'The ID of the file.', paramType='path')
.param('offset', 'Start downloading at this offset in bytes within '
'the file.', dataType='integer', required=False)
.param('endByte', 'If you only wish to download part of the file, '
'pass this as the index of the last byte to download. Unlike '
'the HTTP Range header, the endByte parameter is non-inclusive, '
'so you should set it to the index of the byte one past the '
'final byte you wish to recieve.', dataType='integer',
required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied on the parent folder.', 403))

Expand Down
12 changes: 10 additions & 2 deletions girder/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,24 @@ def remove(self, file, updateItemSize=True, **kwargs):

Model.remove(self, file)

def download(self, file, offset=0, headers=True):
def download(self, file, offset=0, headers=True, endByte=None):
"""
Use the appropriate assetstore adapter for whatever assetstore the
file is stored in, and call downloadFile on it. If the file is a link
file rather than a file in an assetstore, we redirect to it.
:param file: The file to download.
:param offset: The start byte within the file.
:type offset: int
:param headers: Whether to set headers (i.e. is this an HTTP request
for a single file, or something else).
:type headers: bool
"""
if file.get('assetstoreId'):
assetstore = self.model('assetstore').load(file['assetstoreId'])
adapter = assetstore_utilities.getAssetstoreAdapter(assetstore)
return adapter.downloadFile(file, offset=offset, headers=headers)
return adapter.downloadFile(
file, offset=offset, headers=headers, endByte=endByte)
elif file.get('linkUrl'):
if headers:
raise cherrypy.HTTPRedirect(file['linkUrl'])
Expand Down
27 changes: 26 additions & 1 deletion girder/utility/abstract_assetstore_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
###############################################################################

import cherrypy
import os
import six

Expand Down Expand Up @@ -123,7 +124,7 @@ def deleteFile(self, file):
raise Exception('Must override deleteFile in %s.'
% self.__class__.__name__) # pragma: no cover

def downloadFile(self, file, offset=0, headers=True):
def downloadFile(self, file, offset=0, headers=True, endByte=None):
"""
This method is in charge of returning a value to the RESTful endpoint
that can be used to download the file. This can return a generator
Expand All @@ -136,6 +137,9 @@ def downloadFile(self, file, offset=0, headers=True):
:type offset: int
:param headers: Flag for whether headers should be sent on the response.
:type headers: bool
:param endByte: Final byte to download. If ``None``, downloads to the
end of the file.
:type endByte: int or None
"""
raise Exception('Must override downloadFile in %s.'
% self.__class__.__name__) # pragma: no cover
Expand Down Expand Up @@ -172,6 +176,27 @@ def getChunkSize(self, chunk):
else:
return len(chunk)

def setContentHeaders(self, file, offset, endByte):
"""
Sets the Content-Length, Content-Disposition, Content-Type, and also
the Content-Range header if this is a partial download.
:param file: The file being downloaded.
:param offset: The start byte of the download.
:type offset: int
:param endByte: The end byte of the download (non-inclusive).
:type endByte: int
"""
cherrypy.response.headers['Content-Type'] = \
file.get('mimeType') or 'application/octet-stream'
cherrypy.response.headers['Content-Disposition'] = \
'attachment; filename="%s"' % file['name']
cherrypy.response.headers['Content-Length'] = max(endByte - offset, 0)

if (offset or endByte < file['size']) and file['size']:
cherrypy.response.headers['Content-Range'] = 'bytes %d-%d/%d' % (
offset, endByte - 1, file['size'])

def checkUploadSize(self, upload, chunkSize):
"""Check if the upload is valid based on the chunk size. If this
raises an exception, then the caller should clean up and reraise the
Expand Down
24 changes: 15 additions & 9 deletions girder/utility/filesystem_assetstore_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,15 @@ def finalizeUpload(self, upload, file):

return file

def downloadFile(self, file, offset=0, headers=True):
def downloadFile(self, file, offset=0, headers=True, endByte=None,
**kwargs):
"""
Returns a generator function that will be used to stream the file from
disk to the response.
"""
if endByte is None or endByte > file['size']:
endByte = file['size']

path = os.path.join(self.assetstore['root'], file['path'])
if not os.path.isfile(path):
raise GirderException(
Expand All @@ -217,21 +221,23 @@ def downloadFile(self, file, offset=0, headers=True):
'file-does-not-exist')

if headers:
mimeType = file.get('mimeType', 'application/octet-stream')
if not mimeType:
mimeType = 'application/octet-stream'
cherrypy.response.headers['Content-Type'] = mimeType
cherrypy.response.headers['Content-Length'] = file['size'] - offset
cherrypy.response.headers['Content-Disposition'] = \
'attachment; filename="%s"' % file['name']
cherrypy.response.headers['Accept-Ranges'] = 'bytes'
self.setContentHeaders(file, offset, endByte)

def stream():
bytesRead = offset
with open(path, 'rb') as f:
if offset > 0:
f.seek(offset)

while True:
data = f.read(BUF_SIZE)
readLen = min(BUF_SIZE, endByte - bytesRead)
if readLen <= 0:
break

data = f.read(readLen)
bytesRead += readLen

if not data:
break
yield data
Expand Down
36 changes: 24 additions & 12 deletions girder/utility/gridfs_assetstore_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,22 +202,21 @@ def finalizeUpload(self, upload, file):

return file

def downloadFile(self, file, offset=0, headers=True):
def downloadFile(self, file, offset=0, headers=True, endByte=None,
**kwargs):
"""
Returns a generator function that will be used to stream the file from
the database to the response.
"""
if endByte is None or endByte > file['size']:
endByte = file['size']

if headers:
mimeType = file.get('mimeType', 'application/octet-stream')
if not mimeType:
mimeType = 'application/octet-stream'
cherrypy.response.headers['Content-Type'] = mimeType
cherrypy.response.headers['Content-Length'] = file['size'] - offset
cherrypy.response.headers['Content-Disposition'] = \
'attachment; filename="%s"' % file['name']
cherrypy.response.headers['Accept-Ranges'] = 'bytes'
self.setContentHeaders(file, offset, endByte)

# If the file is empty, we stop here
if file['size'] - offset <= 0:
if endByte - offset <= 0:
return lambda: ''

n = 0
Expand All @@ -235,12 +234,25 @@ def downloadFile(self, file, offset=0, headers=True):

def stream():
co = chunkOffset # Can't assign to outer scope without "nonlocal"
position = offset
shouldBreak = False

for chunk in cursor:
chunkLen = len(chunk['data'])

if position + chunkLen > endByte:
chunkLen = endByte - position + co
shouldBreak = True

yield chunk['data'][co:chunkLen]

if shouldBreak:
break

position += chunkLen - co

if co > 0:
yield chunk['data'][co:]
co = 0
else:
yield chunk['data']

return stream

Expand Down
13 changes: 6 additions & 7 deletions girder/utility/s3_assetstore_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from girder.models.model_base import ValidationException
from girder import logger, events

BUF_LEN = 65536 # Buffer size for download stream


class S3AssetstoreAdapter(AbstractAssetstoreAdapter):
"""
Expand Down Expand Up @@ -300,7 +302,8 @@ def finalizeUpload(self, upload, file):
}
return file

def downloadFile(self, file, offset=0, headers=True):
def downloadFile(self, file, offset=0, headers=True, endByte=None,
**kwargs):
"""
When downloading a single file with HTTP, we redirect to S3. Otherwise,
e.g. when downloading as part of a zip stream, we connect to S3 and
Expand All @@ -316,11 +319,7 @@ def downloadFile(self, file, offset=0, headers=True):
url = urlFn(key=file['s3Key'])
raise cherrypy.HTTPRedirect(url)
else:
cherrypy.response.headers['Content-Length'] = '0'
cherrypy.response.headers['Content-Type'] = \
'application/octet-stream'
cherrypy.response.headers['Content-Disposition'] = \
'attachment; filename="{}"'.format(file['name'])
self.setContentHeaders(file, 0, 0)

def stream():
yield ''
Expand All @@ -329,7 +328,7 @@ def stream():
def stream():
if file['size'] > 0:
pipe = requests.get(urlFn(key=file['s3Key']), stream=True)
for chunk in pipe.iter_content(chunk_size=65536):
for chunk in pipe.iter_content(chunk_size=BUF_LEN):
if chunk:
yield chunk
else:
Expand Down
31 changes: 29 additions & 2 deletions plugins/hdfs_assetstore/plugin_tests/assetstore_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class MockSnakebiteClient(object):
def __init__(self, port=None, **kwargs):
self.root = _mockRoot
self.port = port
self.chunkSize = kwargs.get('chunkSize', 3)

def _convertPath(self, path):
if path[0] == '/':
Expand Down Expand Up @@ -109,8 +110,14 @@ def ls(self, paths, **kwargs):

def cat(self, paths, **kwargs):
for path in paths:
with open(self._convertPath(path), 'rb') as f:
yield f.read()
def stream():
with open(self._convertPath(path), 'rb') as f:
while True:
data = f.read(self.chunkSize)
if not data:
break
yield data
yield stream()

def touchz(self, paths, **kwargs):
for path in paths:
Expand Down Expand Up @@ -298,6 +305,26 @@ def testAssetstore(self):
self.assertStatusOk(resp)
self.assertEqual(resp.collapse_body().strip(), 'hello')

# Test download with range header
resp = self.request(path='/file/{}/download'.format(file['_id']),
user=self.admin, isJson=False,
additionalHeaders=[('Range', 'bytes=1-3')])
self.assertStatus(resp, 206)
self.assertEqual('ell', self.getBody(resp))
self.assertEqual(resp.headers['Accept-Ranges'], 'bytes')
self.assertEqual(resp.headers['Content-Length'], 3)
self.assertEqual(resp.headers['Content-Range'], 'bytes 1-3/6')

# Test download with range header with skipped chunk
resp = self.request(path='/file/{}/download'.format(file['_id']),
user=self.admin, isJson=False,
additionalHeaders=[('Range', 'bytes=4-')])
self.assertStatus(resp, 206)
self.assertEqual('o\n', self.getBody(resp))
self.assertEqual(resp.headers['Accept-Ranges'], 'bytes')
self.assertEqual(resp.headers['Content-Length'], 2)
self.assertEqual(resp.headers['Content-Range'], 'bytes 4-5/6')

helloTxtPath = os.path.join(_mockRoot, 'to_import', 'hello.txt')

# Deleting an imported file should not delete the backing HDFS file
Expand Down
Loading

0 comments on commit 6a85d97

Please sign in to comment.