Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

avoid reading in whole blob during NamedBlobFile validation (slows TUS uploads) #155

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/155.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Speed up uploads by not reading blob into memory during NamedBlobFile field validation @djay75
106 changes: 65 additions & 41 deletions plone/namedfile/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from plone.namedfile.file import NamedBlobImage as BlobImageValueType
from plone.namedfile.file import NamedFile as FileValueType
from plone.namedfile.file import NamedImage as ImageValueType
from plone.namedfile.interfaces import INamedBlobFile
from plone.namedfile.interfaces import INamedBlobFile, INamedTyped
from plone.namedfile.interfaces import INamedBlobFileField
from plone.namedfile.interfaces import INamedBlobImage
from plone.namedfile.interfaces import INamedBlobImageField
Expand All @@ -20,7 +20,10 @@
from zope.interface import Interface
from zope.schema import Object
from zope.schema import ValidationError

from zope.schema import Field
from zope.schema._bootstrapinterfaces import SchemaNotProvided
from zope.schema._bootstrapinterfaces import SchemaNotCorrectlyImplemented
from zope.schema._bootstrapfields import get_validation_errors

_ = MessageFactory("plone")

Expand Down Expand Up @@ -59,69 +62,90 @@ def validate_file_field(field, value):
validate_binary_field(IPluggableFileFieldValidation, field, value)


@implementer(INamedFileField)
class NamedFile(Object):
"""A NamedFile field"""

_type = FileValueType
schema = INamedFile
class CustomValidator(object):
""" Mixin that overrides Object._validate with the same code except it will
use self.validation_schema instead of self.schema. It will also execute
self.extra_validator if it exists.
"""

def __init__(self, **kw):
if "schema" in kw:
self.schema = kw.pop("schema")
super().__init__(schema=self.schema, **kw)

def _validate(self, value):
super()._validate(value)
validate_file_field(self, value)
super(Object, self)._validate(value)

# schema has to be provided by value
if not self.validation_schema.providedBy(value):
raise SchemaNotProvided(self.validation_schema, value).with_field_and_value(
self, value)

# check the value against schema
schema_error_dict, invariant_errors = get_validation_errors(
self.validation_schema,
value,
self.validate_invariants
)

if schema_error_dict or invariant_errors:
errors = list(schema_error_dict.values()) + invariant_errors
exception = SchemaNotCorrectlyImplemented(
errors,
self.__name__,
schema_error_dict,
invariant_errors
).with_field_and_value(self, value)

try:
raise exception
finally:
# Break cycles
del exception
del invariant_errors
del schema_error_dict
del errors

if hasattr(self, 'extra_validator'):
self.extra_validator(value)


@implementer(INamedFileField)
class NamedFile(CustomValidator, Object):
"""A NamedFile field"""

_type = FileValueType
schema = INamedFile
validation_schema = INamedFile
extra_validator = validate_file_field


@implementer(INamedImageField)
class NamedImage(Object):
class NamedImage(CustomValidator, Object):
"""A NamedImage field"""

_type = ImageValueType
schema = INamedImage

def __init__(self, **kw):
if "schema" in kw:
self.schema = kw.pop("schema")
super().__init__(schema=self.schema, **kw)

def _validate(self, value):
super()._validate(value)
validate_image_field(self, value)
validation_schema = INamedImage
extra_validator = validate_image_field


@implementer(INamedBlobFileField)
class NamedBlobFile(Object):
class NamedBlobFile(CustomValidator, Object):
"""A NamedBlobFile field"""

_type = BlobFileValueType
schema = INamedBlobFile

def __init__(self, **kw):
if "schema" in kw:
self.schema = kw.pop("schema")
super().__init__(schema=self.schema, **kw)

def _validate(self, value):
super()._validate(value)
validate_file_field(self, value)
schema = INamedFile
validation_schema = INamedTyped # Note: Don't validate data as will read in whole file
extra_validator = validate_file_field


@implementer(INamedBlobImageField)
class NamedBlobImage(Object):
class NamedBlobImage(CustomValidator, Object):
"""A NamedBlobImage field"""

_type = BlobImageValueType
schema = INamedBlobImage

def __init__(self, **kw):
if "schema" in kw:
self.schema = kw.pop("schema")
super().__init__(schema=self.schema, **kw)
schema = INamedImage
validation_schema = INamedTyped # Note: Don't validate data as will read in whole file
extra_validator = validate_image_field

def _validate(self, value):
super()._validate(value)
validate_image_field(self, value)
12 changes: 9 additions & 3 deletions plone/namedfile/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
HAVE_BLOBS = True


class IFile(Interface):

class ITyped(Interface):
contentType = schema.NativeStringLine(
title="Content Type",
description="The content type identifies the type of data.",
Expand All @@ -16,6 +16,8 @@ class IFile(Interface):
missing_value="",
)

class IFile(Interface):

data = schema.Bytes(
title="Data",
description="The actual content of the object.",
Expand Down Expand Up @@ -79,11 +81,15 @@ class INamed(Interface):
filename = schema.TextLine(title="Filename", required=False, default=None)


class INamedFile(INamed, IFile):
class INamedTyped(INamed, ITyped):
"""An item with a filename and contentType"""


class INamedFile(INamedTyped, IFile):
"""A non-BLOB file with a filename"""


class INamedImage(INamed, IImage):
class INamedImage(INamedTyped, IImage):
"""A non-BLOB image with a filename"""


Expand Down
43 changes: 43 additions & 0 deletions plone/namedfile/tests/test_storable.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from OFS.Image import Pdata
from plone.namedfile.file import FileChunk
from plone.namedfile.file import NamedBlobImage
from plone.namedfile.file import NamedBlobFile
from plone.namedfile import field
from plone.namedfile.testing import PLONE_NAMEDFILE_FUNCTIONAL_TESTING
from plone.namedfile.tests import getFile

Expand Down Expand Up @@ -56,3 +58,44 @@ def test_opened_file_storable(self):
if os.path.exists(path):
os.remove(path)
self.assertEqual(303, fi.getSize())

def test_upload_no_read(self):
# ensure we don't read the whole file into memory

import ZODB.blob

old_open = ZODB.blob.Blob.open
blob_read = 0
blob_write = 0

def count_open(self, mode="r"):
nonlocal blob_read, blob_write
blob_read += 1 if "r" in mode else 0
blob_write += 1 if "w" in mode else 0
return old_open(self, mode)

with unittest.mock.patch.object(ZODB.blob.Blob, 'open', count_open):
data = getFile("image.gif")
f = tempfile.NamedTemporaryFile(delete=False)
try:
path = f.name
f.write(data)
f.close()
with open(path, "rb") as f:
fi = NamedBlobFile(f, filename="image.gif")
finally:
if os.path.exists(path):
os.remove(path)
self.assertEqual(303, fi.getSize())
self.assertEqual(blob_read, 1, "blob should have only been opened to get size")
self.assertEqual(
blob_write,
1,
"Slow write to blob instead of os rename. Should be only 1 on init",
)
blob_read = 0

blob_field = field.NamedBlobFile()
blob_field.validate(fi)

self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory")
Loading