Skip to content

Commit

Permalink
osf:storageByteCount supplementary metadata [ENG-6187]
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 24, 2024
1 parent cfcd19a commit c373c67
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 19 deletions.
58 changes: 41 additions & 17 deletions api/caching/tasks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from urllib.parse import urlparse

from django.apps import apps
from django.contrib.contenttypes.models import ContentType
from django.db import connection
from django.db.models import Sum

import requests
import logging

from django.apps import apps
from api.caching.utils import storage_usage_cache
from framework.postcommit_tasks.handlers import enqueue_postcommit_task

Expand Down Expand Up @@ -114,32 +115,55 @@ def ban_url(instance):
def update_storage_usage_cache(target_id, target_guid, per_page=500000):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return
from osf.models import Guid
storage_usage_total = compute_storage_usage_total(Guid.load(target_guid).referent)
key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)


def compute_storage_usage_total(target_obj, per_page=500000):
sql = """
SELECT count(size), sum(size) from
(SELECT size FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
LEFT JOIN django_content_type type on file.target_content_type_id = type.id
WHERE file.provider = 'osfstorage'
AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
AND file.target_object_id=%s
AND file.target_object_id=%(target_pk)s
AND file.target_content_type_id=%(target_content_type_pk)s
ORDER BY version.id
LIMIT %s OFFSET %s) file_page
LIMIT %(per_page)s OFFSET %(offset)s
) file_page
"""
count = per_page
last_count = 1 # initialize non-zero
offset = 0
storage_usage_total = 0
with connection.cursor() as cursor:
while count:
cursor.execute(sql, [target_id, per_page, offset])
result = cursor.fetchall()
storage_usage_total += int(result[0][1]) if result[0][1] else 0
count = int(result[0][0]) if result[0][0] else 0
offset += count

key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
while last_count:
cursor.execute(
sql, {
'target_pk': target_obj.pk,
'target_content_type_pk': ContentType.objects.get_for_model(target_obj).pk,
'per_page': per_page,
'offset': offset,
},
)
page_count, size_sum = cursor.fetchall()[0]
storage_usage_total += int(size_sum or 0)
last_count = (page_count or 0)
offset += last_count
return storage_usage_total


def get_storage_usage_total(target_obj):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return compute_storage_usage_total(target_obj)
_cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
_storage_usage_total = storage_usage_cache.get(_cache_key)
if _storage_usage_total is None:
_storage_usage_total = compute_storage_usage_total(target_obj)
storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
return _storage_usage_total


def update_storage_usage(target):
Expand Down
16 changes: 16 additions & 0 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django import db
import rdflib

from api.caching.tasks import get_storage_usage_total
from osf import models as osfdb
from osf.metadata import gather
from osf.metadata.rdfutils import (
Expand Down Expand Up @@ -213,19 +214,24 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.ProjectComponent: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Registration: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.RegistrationComponent: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Preprint: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.File: {
Expand Down Expand Up @@ -1149,3 +1155,13 @@ def gather_storage_region(focus):
_region_ref = rdflib.URIRef(_region.absolute_api_v2_url)
yield (OSF.storageRegion, _region_ref)
yield (_region_ref, SKOS.prefLabel, rdflib.Literal(_region.name, lang='en'))


@gather.er(
OSF.storageByteCount,
focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
)
def gather_storage_byte_count(focus):
_storage_usage_total = get_storage_usage_total(focus.dbmodel)
if _storage_usage_total is not None:
yield (OSF.storageByteCount, _storage_usage_total)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w4ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w4ibb> osf:storageByteCount 1337 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w2ibb> osf:hasOsfAddon <urn:osf.io:addons:gitlab> ;
osf:storageByteCount 7 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<urn:osf.io:addons:gitlab> a osf:AddonImplementation ;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w5ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w5ibb> osf:storageByteCount 17 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
11 changes: 11 additions & 0 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,14 @@ def test_gather_storage_region(self):
(self.preprintfocus.iri, OSF.storageRegion, _default_region_ref),
(_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
})

def test_gather_storage_byte_count(self):
assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
(self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
(self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
(self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
})

0 comments on commit c373c67

Please sign in to comment.