Merge pull request girder#901 from girder/range-headers-on-download

Support range downloading
Xarthisius · Jul 27, 2015 · 6a85d97 · 6a85d97
2 parents 2eb3d05 + 8aded0c
commit 6a85d97
Show file tree

Hide file tree

Showing 13 changed files with 216 additions and 55 deletions.
diff --git a/clients/web/src/views/body/AssetstoresView.js b/clients/web/src/views/body/AssetstoresView.js
@@ -104,7 +104,13 @@ girder.views.AssetstoresView = girder.View.extend({
                 timeout: 4000
             });
             this.collection.fetch({}, true);
-        }, this).save();
+        }, this).off('g:error').on('g:error', function (err) {
+            girder.events.trigger('g:alert', {
+                icon: 'cancel',
+                text: err.responseJSON.message,
+                type: 'danger'
+            });
+        }).save();
     },
 
     deleteAssetstore: function (evt) {

diff --git a/girder/api/rest.py b/girder/api/rest.py
@@ -294,11 +294,16 @@ def endpointDecorator(self, *args, **kwargs):
         try:
             val = fun(self, args, kwargs)
 
+            # If this is a partial response, we set the status appropriately
+            if 'Content-Range' in cherrypy.response.headers:
+                cherrypy.response.status = 206
+
             if isinstance(val, types.FunctionType):
                 # If the endpoint returned a function, we assume it's a
                 # generator function for a streaming response.
                 cherrypy.response.stream = True
                 return val()
+
             if isinstance(val, cherrypy.lib.file_generator):
                 # Don't do any post-processing of static files
                 return val

diff --git a/girder/api/v1/file.py b/girder/api/v1/file.py
@@ -212,14 +212,35 @@ def download(self, file, params):
         Defers to the underlying assetstore adapter to stream a file out.
         Requires read permission on the folder that contains the file's item.
         """
-        offset = int(params.get('offset', 0))
-        return self.model('file').download(file, offset)
+        rangeHeader = cherrypy.lib.httputil.get_ranges(
+            cherrypy.request.headers.get('Range'), file.get('size', 0))
+
+        # The HTTP Range header takes precedence over query params
+        if rangeHeader and len(rangeHeader):
+            # Currently we only support a single range.
+            offset, endByte = rangeHeader[0]
+        else:
+            offset = int(params.get('offset', 0))
+            endByte = params.get('endByte')
+
+            if endByte is not None:
+                endByte = int(endByte)
+
+        return self.model('file').download(file, offset, endByte=endByte)
     download.cookieAuth = True
     download.description = (
         Description('Download a file.')
+        .notes('This endpoint also accepts the HTTP "Range" header for partial '
+               'file downloads.')
         .param('id', 'The ID of the file.', paramType='path')
         .param('offset', 'Start downloading at this offset in bytes within '
                'the file.', dataType='integer', required=False)
+        .param('endByte', 'If you only wish to download part of the file, '
+               'pass this as the index of the last byte to download. Unlike '
+               'the HTTP Range header, the endByte parameter is non-inclusive, '
+               'so you should set it to the index of the byte one past the '
+               'final byte you wish to recieve.', dataType='integer',
+               required=False)
         .errorResponse('ID was invalid.')
         .errorResponse('Read access was denied on the parent folder.', 403))
 

diff --git a/girder/models/file.py b/girder/models/file.py
@@ -61,16 +61,24 @@ def remove(self, file, updateItemSize=True, **kwargs):
 
         Model.remove(self, file)
 
-    def download(self, file, offset=0, headers=True):
+    def download(self, file, offset=0, headers=True, endByte=None):
         """
         Use the appropriate assetstore adapter for whatever assetstore the
         file is stored in, and call downloadFile on it. If the file is a link
         file rather than a file in an assetstore, we redirect to it.
+
+        :param file: The file to download.
+        :param offset: The start byte within the file.
+        :type offset: int
+        :param headers: Whether to set headers (i.e. is this an HTTP request
+            for a single file, or something else).
+        :type headers: bool
         """
         if file.get('assetstoreId'):
             assetstore = self.model('assetstore').load(file['assetstoreId'])
             adapter = assetstore_utilities.getAssetstoreAdapter(assetstore)
-            return adapter.downloadFile(file, offset=offset, headers=headers)
+            return adapter.downloadFile(
+                file, offset=offset, headers=headers, endByte=endByte)
         elif file.get('linkUrl'):
             if headers:
                 raise cherrypy.HTTPRedirect(file['linkUrl'])

diff --git a/girder/utility/abstract_assetstore_adapter.py b/girder/utility/abstract_assetstore_adapter.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 ###############################################################################
 
+import cherrypy
 import os
 import six
 
@@ -123,7 +124,7 @@ def deleteFile(self, file):
         raise Exception('Must override deleteFile in %s.'
                         % self.__class__.__name__)  # pragma: no cover
 
-    def downloadFile(self, file, offset=0, headers=True):
+    def downloadFile(self, file, offset=0, headers=True, endByte=None):
         """
         This method is in charge of returning a value to the RESTful endpoint
         that can be used to download the file. This can return a generator
@@ -136,6 +137,9 @@ def downloadFile(self, file, offset=0, headers=True):
         :type offset: int
         :param headers: Flag for whether headers should be sent on the response.
         :type headers: bool
+        :param endByte: Final byte to download. If ``None``, downloads to the
+            end of the file.
+        :type endByte: int or None
         """
         raise Exception('Must override downloadFile in %s.'
                         % self.__class__.__name__)  # pragma: no cover
@@ -172,6 +176,27 @@ def getChunkSize(self, chunk):
         else:
             return len(chunk)
 
+    def setContentHeaders(self, file, offset, endByte):
+        """
+        Sets the Content-Length, Content-Disposition, Content-Type, and also
+        the Content-Range header if this is a partial download.
+
+        :param file: The file being downloaded.
+        :param offset: The start byte of the download.
+        :type offset: int
+        :param endByte: The end byte of the download (non-inclusive).
+        :type endByte: int
+        """
+        cherrypy.response.headers['Content-Type'] = \
+            file.get('mimeType') or 'application/octet-stream'
+        cherrypy.response.headers['Content-Disposition'] = \
+            'attachment; filename="%s"' % file['name']
+        cherrypy.response.headers['Content-Length'] = max(endByte - offset, 0)
+
+        if (offset or endByte < file['size']) and file['size']:
+            cherrypy.response.headers['Content-Range'] = 'bytes %d-%d/%d' % (
+                offset, endByte - 1, file['size'])
+
     def checkUploadSize(self, upload, chunkSize):
         """Check if the upload is valid based on the chunk size.  If this
         raises an exception, then the caller should clean up and reraise the

diff --git a/girder/utility/filesystem_assetstore_adapter.py b/girder/utility/filesystem_assetstore_adapter.py
@@ -204,11 +204,15 @@ def finalizeUpload(self, upload, file):
 
         return file
 
-    def downloadFile(self, file, offset=0, headers=True):
+    def downloadFile(self, file, offset=0, headers=True, endByte=None,
+                     **kwargs):
         """
         Returns a generator function that will be used to stream the file from
         disk to the response.
         """
+        if endByte is None or endByte > file['size']:
+            endByte = file['size']
+
         path = os.path.join(self.assetstore['root'], file['path'])
         if not os.path.isfile(path):
             raise GirderException(
@@ -217,21 +221,23 @@ def downloadFile(self, file, offset=0, headers=True):
                 'file-does-not-exist')
 
         if headers:
-            mimeType = file.get('mimeType', 'application/octet-stream')
-            if not mimeType:
-                mimeType = 'application/octet-stream'
-            cherrypy.response.headers['Content-Type'] = mimeType
-            cherrypy.response.headers['Content-Length'] = file['size'] - offset
-            cherrypy.response.headers['Content-Disposition'] = \
-                'attachment; filename="%s"' % file['name']
+            cherrypy.response.headers['Accept-Ranges'] = 'bytes'
+            self.setContentHeaders(file, offset, endByte)
 
         def stream():
+            bytesRead = offset
             with open(path, 'rb') as f:
                 if offset > 0:
                     f.seek(offset)
 
                 while True:
-                    data = f.read(BUF_SIZE)
+                    readLen = min(BUF_SIZE, endByte - bytesRead)
+                    if readLen <= 0:
+                        break
+
+                    data = f.read(readLen)
+                    bytesRead += readLen
+
                     if not data:
                         break
                     yield data

diff --git a/girder/utility/gridfs_assetstore_adapter.py b/girder/utility/gridfs_assetstore_adapter.py
@@ -202,22 +202,21 @@ def finalizeUpload(self, upload, file):
 
         return file
 
-    def downloadFile(self, file, offset=0, headers=True):
+    def downloadFile(self, file, offset=0, headers=True, endByte=None,
+                     **kwargs):
         """
         Returns a generator function that will be used to stream the file from
         the database to the response.
         """
+        if endByte is None or endByte > file['size']:
+            endByte = file['size']
+
         if headers:
-            mimeType = file.get('mimeType', 'application/octet-stream')
-            if not mimeType:
-                mimeType = 'application/octet-stream'
-            cherrypy.response.headers['Content-Type'] = mimeType
-            cherrypy.response.headers['Content-Length'] = file['size'] - offset
-            cherrypy.response.headers['Content-Disposition'] = \
-                'attachment; filename="%s"' % file['name']
+            cherrypy.response.headers['Accept-Ranges'] = 'bytes'
+            self.setContentHeaders(file, offset, endByte)
 
         # If the file is empty, we stop here
-        if file['size'] - offset <= 0:
+        if endByte - offset <= 0:
             return lambda: ''
 
         n = 0
@@ -235,12 +234,25 @@ def downloadFile(self, file, offset=0, headers=True):
 
         def stream():
             co = chunkOffset  # Can't assign to outer scope without "nonlocal"
+            position = offset
+            shouldBreak = False
+
             for chunk in cursor:
+                chunkLen = len(chunk['data'])
+
+                if position + chunkLen > endByte:
+                    chunkLen = endByte - position + co
+                    shouldBreak = True
+
+                yield chunk['data'][co:chunkLen]
+
+                if shouldBreak:
+                    break
+
+                position += chunkLen - co
+
                 if co > 0:
-                    yield chunk['data'][co:]
                     co = 0
-                else:
-                    yield chunk['data']
 
         return stream
 

diff --git a/girder/utility/s3_assetstore_adapter.py b/girder/utility/s3_assetstore_adapter.py
@@ -30,6 +30,8 @@
 from girder.models.model_base import ValidationException
 from girder import logger, events
 
+BUF_LEN = 65536  # Buffer size for download stream
+
 
 class S3AssetstoreAdapter(AbstractAssetstoreAdapter):
     """
@@ -300,7 +302,8 @@ def finalizeUpload(self, upload, file):
                 }
         return file
 
-    def downloadFile(self, file, offset=0, headers=True):
+    def downloadFile(self, file, offset=0, headers=True, endByte=None,
+                     **kwargs):
         """
         When downloading a single file with HTTP, we redirect to S3. Otherwise,
         e.g. when downloading as part of a zip stream, we connect to S3 and
@@ -316,11 +319,7 @@ def downloadFile(self, file, offset=0, headers=True):
                 url = urlFn(key=file['s3Key'])
                 raise cherrypy.HTTPRedirect(url)
             else:
-                cherrypy.response.headers['Content-Length'] = '0'
-                cherrypy.response.headers['Content-Type'] = \
-                    'application/octet-stream'
-                cherrypy.response.headers['Content-Disposition'] = \
-                    'attachment; filename="{}"'.format(file['name'])
+                self.setContentHeaders(file, 0, 0)
 
                 def stream():
                     yield ''
@@ -329,7 +328,7 @@ def stream():
             def stream():
                 if file['size'] > 0:
                     pipe = requests.get(urlFn(key=file['s3Key']), stream=True)
-                    for chunk in pipe.iter_content(chunk_size=65536):
+                    for chunk in pipe.iter_content(chunk_size=BUF_LEN):
                         if chunk:
                             yield chunk
                 else:

diff --git a/plugins/hdfs_assetstore/plugin_tests/assetstore_test.py b/plugins/hdfs_assetstore/plugin_tests/assetstore_test.py
@@ -40,6 +40,7 @@ class MockSnakebiteClient(object):
     def __init__(self, port=None, **kwargs):
         self.root = _mockRoot
         self.port = port
+        self.chunkSize = kwargs.get('chunkSize', 3)
 
     def _convertPath(self, path):
         if path[0] == '/':
@@ -109,8 +110,14 @@ def ls(self, paths, **kwargs):
 
     def cat(self, paths, **kwargs):
         for path in paths:
-            with open(self._convertPath(path), 'rb') as f:
-                yield f.read()
+            def stream():
+                with open(self._convertPath(path), 'rb') as f:
+                    while True:
+                        data = f.read(self.chunkSize)
+                        if not data:
+                            break
+                        yield data
+            yield stream()
 
     def touchz(self, paths, **kwargs):
         for path in paths:
@@ -298,6 +305,26 @@ def testAssetstore(self):
         self.assertStatusOk(resp)
         self.assertEqual(resp.collapse_body().strip(), 'hello')
 
+        # Test download with range header
+        resp = self.request(path='/file/{}/download'.format(file['_id']),
+                            user=self.admin, isJson=False,
+                            additionalHeaders=[('Range', 'bytes=1-3')])
+        self.assertStatus(resp, 206)
+        self.assertEqual('ell', self.getBody(resp))
+        self.assertEqual(resp.headers['Accept-Ranges'], 'bytes')
+        self.assertEqual(resp.headers['Content-Length'], 3)
+        self.assertEqual(resp.headers['Content-Range'], 'bytes 1-3/6')
+
+        # Test download with range header with skipped chunk
+        resp = self.request(path='/file/{}/download'.format(file['_id']),
+                            user=self.admin, isJson=False,
+                            additionalHeaders=[('Range', 'bytes=4-')])
+        self.assertStatus(resp, 206)
+        self.assertEqual('o\n', self.getBody(resp))
+        self.assertEqual(resp.headers['Accept-Ranges'], 'bytes')
+        self.assertEqual(resp.headers['Content-Length'], 2)
+        self.assertEqual(resp.headers['Content-Range'], 'bytes 4-5/6')
+
         helloTxtPath = os.path.join(_mockRoot, 'to_import', 'hello.txt')
 
         # Deleting an imported file should not delete the backing HDFS file