Skip to content
This repository has been archived by the owner on Jan 29, 2022. It is now read-only.

Add user-based quota enforcement #133

Merged
merged 4 commits into from
Feb 2, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dkc/core/admin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .file import FileAdmin
from .folder import FolderAdmin
from .quota import QuotaAdmin
from .terms import TermsAdmin

__all__ = ['FileAdmin', 'FolderAdmin', 'TermsAdmin']
__all__ = ['FileAdmin', 'FolderAdmin', 'QuotaAdmin', 'TermsAdmin']
39 changes: 39 additions & 0 deletions dkc/core/admin/quota.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from django.contrib import admin
from django.db import models
from django_admin_display import admin_display
import humanize

from dkc.core.models import Quota


@admin.register(Quota)
class QuotaAdmin(admin.ModelAdmin):
brianhelba marked this conversation as resolved.
Show resolved Hide resolved
list_display = ['id', 'user', 'human_used', 'human_allowed', 'usage_percent']
list_select_related = ['user']

search_fields = ['user__username']

readonly_fields = ['used', 'usage_percent']

@admin_display(short_description='Used', admin_order_field='used')
def human_used(self, obj: Quota) -> str:
return humanize.naturalsize(obj.used)
brianhelba marked this conversation as resolved.
Show resolved Hide resolved

@admin_display(short_description='Allowed', admin_order_field='used')
def human_allowed(self, obj: Quota) -> str:
return humanize.naturalsize(obj.allowed)
brianhelba marked this conversation as resolved.
Show resolved Hide resolved

@admin_display(
short_description='% Used',
admin_order_field=models.Case(
# Prevent division by zero, and sort the result as effectively 100%
models.When(allowed=0, then=1),
# Multiply by 1.0 to force floating-point division
default=(models.F('used') * 1.0 / models.F('allowed')),
output_field=models.FloatField(),
),
)
def usage_percent(self, obj: Quota) -> str:
if obj.allowed == 0:
return '--'
return '{:.1%}'.format(obj.used / obj.allowed)
62 changes: 62 additions & 0 deletions dkc/core/migrations/0008_quota.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Generated by Django 3.1.5 on 2021-01-31 23:46

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.db.models.expressions

import dkc.core.models.quota


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('core', '0007_non_nullable_creator'),
]

operations = [
migrations.RemoveField(
model_name='quota',
name='allocation',
),
migrations.AddField(
model_name='quota',
name='allowed',
field=models.PositiveBigIntegerField(default=dkc.core.models.quota._default_user_quota),
),
migrations.AddField(
model_name='quota',
name='used',
field=models.PositiveBigIntegerField(default=0),
),
migrations.AddField(
model_name='tree',
name='quota',
field=models.ForeignKey(
default=1,
on_delete=django.db.models.deletion.PROTECT,
related_name='trees',
to='core.quota',
),
preserve_default=False,
),
migrations.AlterField(
model_name='quota',
name='user',
field=models.OneToOneField(
default=1,
on_delete=django.db.models.deletion.CASCADE,
related_name='quota',
to='auth.user',
),
preserve_default=False,
),
migrations.AddConstraint(
model_name='quota',
constraint=models.CheckConstraint(
check=models.Q(used__lte=django.db.models.expressions.F('allowed')),
name='used_lte_allowed',
),
),
]
10 changes: 7 additions & 3 deletions dkc/core/models/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.contrib.auth.models import User
from django.core import validators
from django.core.exceptions import ValidationError
from django.db import models
from django.db import models, transaction
from django.dispatch import receiver
from django_extensions.db.models import TimeStampedModel
from girder_utils.models import JSONObjectField
Expand Down Expand Up @@ -101,16 +101,20 @@ def abs_path(self) -> str:
def public(self) -> bool:
return self.tree.public

@transaction.atomic
def increment_size(self, amount: int) -> None:
"""
Increment or decrement the Folder size.

This Folder and all of its ancestors' sizes will be updated atomically. ``amount`` may be
negative, but an operation resulting in a negative final size is illegal.
This Folder, all of its ancestors' sizes, and its quota will be updated atomically.
``amount`` may be negative, but an operation resulting in a negative final size is illegal.
"""
if amount == 0:
return

# Do this first, in case it fails
self.tree.quota.increment(amount)

ancestor_pks = [folder.pk for folder in self.ancestors]
Folder.objects.filter(pk__in=ancestor_pks).update(size=(models.F('size') + amount))

Expand Down
60 changes: 48 additions & 12 deletions dkc/core/models/quota.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,62 @@
from typing import Type

from django.conf import settings
from django.contrib.auth.models import User
from django.db import models
from django.core.exceptions import ValidationError
from django.db import IntegrityError, models, transaction
from django.db.models.signals import post_save
from django.dispatch import receiver
from girder_utils.models import SelectRelatedManager


def _default_user_quota() -> int:
return settings.DKC_DEFAULT_QUOTA


class Quota(models.Model):
allocation = models.BigIntegerField(default=0)
user = models.OneToOneField(User, null=True, on_delete=models.CASCADE)
# root_set
class Meta:
constraints = [
models.CheckConstraint(
check=models.Q(used__lte=models.F('allowed')),
name='used_lte_allowed',
)
]

used = models.PositiveBigIntegerField(default=0)
allowed = models.PositiveBigIntegerField(default=_default_user_quota)

# OneToOneField ensures that only one Quota per User exists
user = models.OneToOneField(User, on_delete=models.CASCADE, related_name='quota')
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually, with "project-based" quotas, this can be nullable.

Ideally, the relationship would point the other way, but since User is practically not directly extensible, this is a reasonable substitute. Given the _create_user_quota signal handler, we can rely on user.quota always existing for now.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we pull this out of here and have a separate associative table to map users to quotas? Then, every user could have an entry in that table, but not every quota would have to have an entry there (i.e. for any non-user associated quotas).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would be a Profile model (Option 2 here), which would also store any other user-related info that we wanted to extend.

Do you foresee any upcoming features where we want to store additional information (beyond just foreign key relationships) with the user? If so, I think it's worth a detour to add the Profile now.

If not, I think it's worth merging this approach as-is, to start giving us working quota support. As we extend to non-user-associated ("project") quotas, we can rework the implementation internally.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add the profile model, it's not much code and should be straightforward. Just make it a commit on this same topic.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started to create a Profile model. This is something that's supported by Factory Boy: https://factoryboy.readthedocs.io/en/latest/recipes.html#example-django-s-profile , but a significant complication is the fact that we'll have to annotate all of our factories (not just UserFactory and ProfileFactory) with @factory.django.mute_signals(post_save), which will disable any post_save handlers during essentially all tests. Eventually, we might need this, but it will introduce some drag.

IMO, the current implementation is adequate. At runtime, the reverse relation user.quota is adequate to support this feature and any lookup failures will immediately log 500 errors. If we can't depend on signals to support some behavior (instead requiring DB-level guarantees), I think we'd be giving up an important measure of power that Django provides.

In some ways, attaching the ForeignKey to Quota is superior, particularly once that field is nullable (to support project quotas) in the future, since we'll be able to easily filter user quotas from project quotas (I'm not seeing how to do this if the ForeignKey reference points to Quota).

Given the downside of suppressing all post_save signals, I think we should only proceed with Profile if we are going to need it to store additional information on each user. Otherwise, I'd advocate to merge this as-is, and proceed with developing more advanced features on top.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just want to be clear that putting the user field here is a relational hack. We're misusing nullability to model what should be a relation. We can merge this as is, but I'd like to create an issue to address this. Insofar as this repository is meant to model best practices, one of the things I really want us to exemplify is proper use of relational databases with Django, which means we'll need to work through the warts. As nice as FactoryBoy is, we can't let it be the library driving our design choices.

Let's make the ticket, I'll take a crack at this myself later on.


@transaction.atomic
def increment(self, amount: int) -> None:
"""
Increment or decrement the quota.

This function increments (or decrements, if ``amount`` is negative) the usage
values associated with this quota. If this increment would exceed the max
allotment for this quota, a ``ValidationError`` is raised and the transaction
is rolled back.
"""
if amount == 0:
return

# The user is needed to stringify, so join it when directly querying for a quota
objects = SelectRelatedManager('user')
try:
# Use an .update query instead of a .save, to avoid assigning an F-expression on the
# local instance, which might need to be rolled back on a failure
Quota.objects.filter(pk=self.pk).update(used=(models.F('used') + amount))
except IntegrityError as e:
if '"used_lte_allowed"' in str(e):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish we didn't have to do this -- do you think we'd have any luck upstreaming a feature request to psycopg2 to provide more structured info on their IntegrityErrors?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Not to hold up this PR or anything, just an idle thought.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's where the IntegrityError is generated.

The PostgreSQL docs state:

For some types of errors, the server reports the name of a database object (a table, table column, data type, or constraint) associated with the error; for example, the name of the unique constraint that caused a unique_violation error. Such names are supplied in separate fields of the error report message so that applications need not try to extract them from the possibly-localized human-readable text of the message. As of PostgreSQL 9.3, complete coverage for this feature exists only for errors in SQLSTATE class 23 (integrity constraint violation), but this is likely to be expanded in future.

So, it looks like it's possible in principle for only this particular case. Psycopg 2 supports >= PostgreSQL 7.4, so support would need to be conditional.

@zachmullen I would start by filing an issue in Psycopg 2, but if they decline it there, perhaps they'd consider it in (the apparently actively-developed) Psycopg 3. Let me know if you do so, so I can subscribe to it.

raise ValidationError(
'Root folder size quota would be exceeded: ' f'{self.used}B > {self.allowed}B.'
)
else:
raise

def __str__(self):
return f'Quota for <{self.user}>; {self.allocation} bytes'
# Update with the new value
self.refresh_from_db(fields=['used'])


@receiver(post_save, sender=User)
def create_user_quota(sender: Type[User], instance: User, created: bool, **kwargs):
def _create_user_quota(sender: Type[User], instance: User, created: bool, **kwargs):
if created:
quota = Quota(user=instance)
quota.save()
Quota.objects.create(user=instance)
4 changes: 4 additions & 0 deletions dkc/core/models/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@

from dkc.core.permissions import Permission, PermissionGrant

from .quota import Quota

if TYPE_CHECKING:
# Prevent circular import
from .folder import Folder


class Tree(models.Model):
public: bool = models.BooleanField(default=False)
# Prevent deletion of a Quota if it has Trees referencing it
quota = models.ForeignKey(Quota, on_delete=models.PROTECT, related_name='trees')

@property
def root_folder(self) -> Folder:
Expand Down
3 changes: 2 additions & 1 deletion dkc/core/rest/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ def perform_create(self, serializer: FolderSerializer):
if not tree.has_permission(user, permission=Permission.write):
raise PermissionDenied()
else:
tree = Tree.objects.create()
# Use the user's quota for the new tree
tree = Tree.objects.create(quota=user.quota)
zachmullen marked this conversation as resolved.
Show resolved Hide resolved
tree.grant_permission(PermissionGrant(user_or_group=user, permission=Permission.admin))
serializer.save(tree=tree, creator=user)

Expand Down
18 changes: 12 additions & 6 deletions dkc/core/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,15 @@ class Meta:

class TreeFactory(factory.django.DjangoModelFactory):
public = False
# No need to instantiate a quota, just fetch from the user creating this
quota = factory.SelfAttribute('creator.quota')

class Meta:
model = Tree

class Params:
creator = factory.SubFactory(UserFactory)


class FolderFactory(factory.django.DjangoModelFactory):
class Meta:
Expand All @@ -34,7 +39,10 @@ class Meta:
user_metadata = _metadata_faker
parent = None
tree = factory.Maybe(
'parent', factory.SelfAttribute('parent.tree'), factory.SubFactory(TreeFactory)
'parent',
factory.SelfAttribute('parent.tree'),
# Make the new tree be created by (and use the quota of) this folder's creator
factory.SubFactory(TreeFactory, creator=factory.SelfAttribute('..creator')),
)
creator = factory.SubFactory(UserFactory)

Expand All @@ -51,13 +59,11 @@ class Meta:
creator = factory.SubFactory(UserFactory)


class TreeWithRootFactory(factory.django.DjangoModelFactory):
class Meta:
model = Tree

class TreeWithRootFactory(TreeFactory):
@factory.post_generation
def root_folder(self, create, *args, **kwargs):
return FolderFactory(tree=self)
# Make the new folder be created by the same user as it's tree's quota
return FolderFactory(tree=self, creator=self.quota.user)


class TermsFactory(factory.django.DjangoModelFactory):
Expand Down
5 changes: 2 additions & 3 deletions dkc/core/tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ def test_file_abs_path(folder, folder_factory, file_factory):
assert grandchild.abs_path == f'/{folder.name}/{child.name}/{grandchild.name}'


def test_file_checksum(file_factory):
# Use "build" strategy, so database is not required
file = file_factory.build()
@pytest.mark.django_db
def test_file_checksum(file):
file.compute_sha512()
assert len(file.sha512) == 128

Expand Down
32 changes: 17 additions & 15 deletions dkc/core/tests/test_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,22 @@
from dkc.core.models.folder import MAX_DEPTH


@pytest.mark.django_db
def test_folder_name_invalid(folder_factory):
folder = folder_factory.build(name='name / withslash')
folder = folder_factory(name='name / withslash')

# Since the folder is not saved and added to a tree, other validation errors are also present,
# so it's critical to match the error by string content
with pytest.raises(ValidationError, match='Name may not contain forward slashes'):
folder.full_clean()


def test_is_root_root(folder_factory):
folder = folder_factory.build()
@pytest.mark.django_db
def test_is_root_root(folder):
assert folder.is_root is True


def test_is_root_child(folder_factory):
folder = folder_factory.build()
child = folder_factory.build(parent=folder)
assert child.is_root is False
@pytest.mark.django_db
def test_is_root_child(child_folder):
assert child_folder.is_root is False


@pytest.mark.django_db
Expand Down Expand Up @@ -87,11 +85,9 @@ def test_folder_sibling_names_unique_files(file, folder_factory):

@pytest.mark.django_db
def test_root_folder_names_unique(folder, folder_factory):
other_root = folder_factory.build(name=folder.name)
zachmullen marked this conversation as resolved.
Show resolved Hide resolved
# Make sure foreign key references exist in database first
other_root.creator.save()
other_root.tree.save()
with pytest.raises(IntegrityError):
other_root = folder_factory()
other_root.name = folder.name
with pytest.raises(IntegrityError, match=r'folder_name'):
other_root.save()


Expand All @@ -107,6 +103,8 @@ def test_folder_names_not_globally_unique(folder_factory):
def test_increment_size(folder_factory, amount):
initial_size = 100
root = folder_factory(size=initial_size)
root.tree.quota.used = initial_size
root.tree.quota.save()
child = folder_factory(parent=root, size=initial_size)
grandchild = folder_factory(parent=child, size=initial_size)

Expand All @@ -118,14 +116,18 @@ def test_increment_size(folder_factory, amount):
assert grandchild.size == new_size
assert grandchild.parent.size == new_size
assert grandchild.parent.parent.size == new_size
assert grandchild.parent.parent.tree.quota.used == new_size


@pytest.mark.django_db
def test_increment_size_negative(folder_factory):
# Make the root too small
root = folder_factory(size=5)
root.tree.quota.used = 10
root.tree.quota.save()
child = folder_factory(parent=root, size=10)

# Increment the child, which tests enforcement across propagation
with pytest.raises(IntegrityError, match=r'size'):
# An IntegrityError is expected, since this should cause a 500 if it actually happens
with pytest.raises(IntegrityError, match=r'folder_size'):
child.increment_size(-10)
Loading