diff --git a/api/caching/tasks.py b/api/caching/tasks.py
index 0b7a4b6670f..fa1b87be843 100644
--- a/api/caching/tasks.py
+++ b/api/caching/tasks.py
@@ -1,11 +1,12 @@
+import logging
from urllib.parse import urlparse
+
+from django.apps import apps
+from django.contrib.contenttypes.models import ContentType
from django.db import connection
from django.db.models import Sum
-
import requests
-import logging
-from django.apps import apps
from api.caching.utils import storage_usage_cache
from framework.postcommit_tasks.handlers import enqueue_postcommit_task
@@ -16,6 +17,9 @@
logger = logging.getLogger(__name__)
+_DEFAULT_FILEVERSION_PAGE_SIZE = 500000
+
+
def get_varnish_servers():
# TODO: this should get the varnish servers from HAProxy or a setting
return settings.VARNISH_SERVERS
@@ -111,35 +115,59 @@ def ban_url(instance):
@app.task(max_retries=5, default_retry_delay=10)
-def update_storage_usage_cache(target_id, target_guid, per_page=500000):
+def update_storage_usage_cache(target_id, target_guid, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return
+ from osf.models import Guid
+ storage_usage_total = compute_storage_usage_total(Guid.load(target_guid).referent, per_page=per_page)
+ key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
+ storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+
+
+def compute_storage_usage_total(target_obj, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
sql = """
SELECT count(size), sum(size) from
(SELECT size FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
- LEFT JOIN django_content_type type on file.target_content_type_id = type.id
WHERE file.provider = 'osfstorage'
- AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
- AND file.target_object_id=%s
+ AND file.target_object_id=%(target_pk)s
+ AND file.target_content_type_id=%(target_content_type_pk)s
ORDER BY version.id
- LIMIT %s OFFSET %s) file_page
+ LIMIT %(per_page)s OFFSET %(offset)s
+ ) file_page
"""
- count = per_page
+ last_count = 1 # initialize non-zero
offset = 0
storage_usage_total = 0
+ content_type_pk = ContentType.objects.get_for_model(target_obj).pk
with connection.cursor() as cursor:
- while count:
- cursor.execute(sql, [target_id, per_page, offset])
- result = cursor.fetchall()
- storage_usage_total += int(result[0][1]) if result[0][1] else 0
- count = int(result[0][0]) if result[0][0] else 0
- offset += count
-
- key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
- storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+ while last_count:
+ cursor.execute(
+ sql, {
+ 'target_pk': target_obj.pk,
+ 'target_content_type_pk': content_type_pk,
+ 'per_page': per_page,
+ 'offset': offset,
+ },
+ )
+ this_count, size_sum = cursor.fetchall()[0]
+ storage_usage_total += int(size_sum or 0)
+ last_count = (this_count or 0)
+ offset += last_count
+ return storage_usage_total
+
+
+def get_storage_usage_total(target_obj):
+ if not settings.ENABLE_STORAGE_USAGE_CACHE:
+ return compute_storage_usage_total(target_obj)
+ _cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
+ _storage_usage_total = storage_usage_cache.get(_cache_key)
+ if _storage_usage_total is None:
+ _storage_usage_total = compute_storage_usage_total(target_obj)
+ storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
+ return _storage_usage_total
def update_storage_usage(target):
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
index ba0da33fec5..617e22b237d 100644
--- a/osf/metadata/osf_gathering.py
+++ b/osf/metadata/osf_gathering.py
@@ -8,6 +8,7 @@
from django import db
import rdflib
+from api.caching.tasks import get_storage_usage_total
from osf import models as osfdb
from osf.metadata import gather
from osf.metadata.rdfutils import (
@@ -218,19 +219,24 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.hasOsfAddon: None,
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.ProjectComponent: {
OSF.hasOsfAddon: None,
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Registration: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.RegistrationComponent: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Preprint: {
+ OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.File: {
@@ -1172,3 +1178,13 @@ def gather_storage_region(focus):
_region_ref = rdflib.URIRef(_region.absolute_api_v2_url)
yield (OSF.storageRegion, _region_ref)
yield (_region_ref, SKOS.prefLabel, rdflib.Literal(_region.name, lang='en'))
+
+
+@gather.er(
+ OSF.storageByteCount,
+ focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
+)
+def gather_storage_byte_count(focus):
+ _storage_usage_total = get_storage_usage_total(focus.dbmodel)
+ if _storage_usage_total is not None:
+ yield (OSF.storageByteCount, _storage_usage_total)
diff --git a/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
index 8ac4aa1b988..9ff0732a509 100644
--- a/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/preprint_supplement.turtle
@@ -1,6 +1,7 @@
@prefix osf: .
@prefix skos: .
- osf:storageRegion .
+ osf:storageByteCount 1337 ;
+ osf:storageRegion .
skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/expected_metadata_files/project_supplement.turtle b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
index 70363ed33a3..d055e97554f 100644
--- a/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/project_supplement.turtle
@@ -3,6 +3,7 @@
@prefix skos: .
osf:hasOsfAddon ;
+ osf:storageByteCount 7 ;
osf:storageRegion .
a osf:AddonImplementation ;
diff --git a/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
index 9c2599245e7..9e8201b7915 100644
--- a/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
+++ b/osf_tests/metadata/expected_metadata_files/registration_supplement.turtle
@@ -1,6 +1,7 @@
@prefix osf: .
@prefix skos: .
- osf:storageRegion .
+ osf:storageByteCount 17 ;
+ osf:storageRegion .
skos:prefLabel "United States"@en .
diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py
index 016b2578295..73355562e68 100644
--- a/osf_tests/metadata/test_osf_gathering.py
+++ b/osf_tests/metadata/test_osf_gathering.py
@@ -871,3 +871,14 @@ def test_gather_qualified_attributions(self):
(_attribution_readonly, PROV.agent, self.userfocus__readonly),
(_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']),
})
+
+ def test_gather_storage_byte_count(self):
+ assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
+ (self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
+ })
+ assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
+ (self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
+ })
+ assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
+ (self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
+ })