Skip to content

Commit

Permalink
fix: handle surprising file metadata states
Browse files Browse the repository at this point in the history
- handle FileVersion with null creator
- do not index osfstorage file without versions
  • Loading branch information
aaxelb committed Aug 25, 2023
1 parent d1b2b46 commit 2fccd10
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 13 deletions.
14 changes: 8 additions & 6 deletions addons/osfstorage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,9 @@ class OsfStorageFile(OsfStorageFileNode, File):

@property
def _hashes(self):
latest_version = self.versions.latest('identifier')
if not latest_version:
try:
latest_version = self.versions.latest('identifier')
except FileVersion.DoesNotExist:
return None
return {
'sha1': latest_version.metadata.get('sha1', ''),
Expand All @@ -243,8 +244,9 @@ def _hashes(self):

@property
def last_known_metadata(self):
latest_version = self.versions.latest('identifier')
if not latest_version:
try:
latest_version = self.versions.latest('identifier')
except FileVersion.DoesNotExist:
size = None
else:
size = latest_version.size
Expand All @@ -265,10 +267,10 @@ def should_update_search(self):
return False
if any(qa_substring in target.title for qa_substring in website_settings.DO_NOT_INDEX_LIST['titles']):
return False

if not self.name or self.is_deleted:
return False

if not self.versions.exists():
return False
spam_check_field = (
'is_spammy'
if website_settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH
Expand Down
3 changes: 2 additions & 1 deletion osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,8 @@ def gather_versions(focus):

def _gather_fileversion(fileversion, fileversion_iri):
yield (fileversion_iri, RDF.type, OSF.FileVersion)
yield (fileversion_iri, DCTERMS.creator, OsfFocus(fileversion.creator))
if fileversion.creator is not None:
yield (fileversion_iri, DCTERMS.creator, OsfFocus(fileversion.creator))
yield (fileversion_iri, DCTERMS.created, fileversion.created)
yield (fileversion_iri, DCTERMS.modified, fileversion.modified)
yield (fileversion_iri, DCTERMS['format'], fileversion.content_type)
Expand Down
36 changes: 30 additions & 6 deletions osf_tests/test_elastic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from addons.wiki.models import WikiPage
from addons.osfstorage.models import OsfStorageFile
from addons.osfstorage import settings as osfstorage_settings

from scripts.populate_institutions import main as populate_institutions

Expand Down Expand Up @@ -75,6 +76,16 @@ def wrapped(*args, **kwargs):
return wrapped
return test_wrapper

def create_file_version(file, user):
file.create_version(user, {
'object': '06d80e',
'service': 'cloud',
osfstorage_settings.WATERBUTLER_RESOURCE: 'osf',
}, {
'size': 7,
'contentType': 'img/png'
}).save()

@pytest.mark.enable_search
@pytest.mark.enable_enqueue_task
class TestCollectionsSearch(OsfTestCase):
Expand Down Expand Up @@ -531,6 +542,7 @@ def setUp(self):
name='panda.txt',
materialized_path='/panda.txt')
self.file.save()
create_file_version(self.file, self.user)
self.published_preprint = factories.PreprintFactory(
creator=self.user,
title='My Fairy King',
Expand Down Expand Up @@ -607,6 +619,7 @@ def test_set_primary_file(self):
name='panda.txt',
materialized_path='/panda.txt')
self.file.save()
create_file_version(self.file, self.user)
self.published_preprint.set_primary_file(self.file, auth=Auth(self.user), save=True)
docs = query(self.published_preprint.title)['results']
assert_equal(len(docs), 2)
Expand Down Expand Up @@ -1445,22 +1458,26 @@ class TestSearchFiles(OsfTestCase):

def setUp(self):
super(TestSearchFiles, self).setUp()
self.node = factories.ProjectFactory(is_public=True, title='Otis')
self.user = factories.UserFactory(fullname='David Bowie')
self.node = factories.ProjectFactory(is_public=True, title='Otis', creator=self.user)
self.osf_storage = self.node.get_addon('osfstorage')
self.root = self.osf_storage.get_root()

def test_search_file(self):
self.root.append_file('Shake.wav')
file_ = self.root.append_file('Shake.wav')
create_file_version(file_, self.user)
find = query_file('Shake.wav')['results']
assert_equal(len(find), 1)

def test_search_file_name_without_separator(self):
self.root.append_file('Shake.wav')
file_ = self.root.append_file('Shake.wav')
create_file_version(file_, self.user)
find = query_file('Shake')['results']
assert_equal(len(find), 1)

def test_delete_file(self):
file_ = self.root.append_file('I\'ve Got Dreams To Remember.wav')
create_file_version(file_, self.user)
find = query_file('I\'ve Got Dreams To Remember.wav')['results']
assert_equal(len(find), 1)
file_.delete()
Expand All @@ -1469,6 +1486,7 @@ def test_delete_file(self):

def test_add_tag(self):
file_ = self.root.append_file('That\'s How Strong My Love Is.mp3')
create_file_version(file_, self.user)
tag = Tag(name='Redding')
tag.save()
file_.tags.add(tag)
Expand All @@ -1478,6 +1496,7 @@ def test_add_tag(self):

def test_remove_tag(self):
file_ = self.root.append_file('I\'ve Been Loving You Too Long.mp3')
create_file_version(file_, self.user)
tag = Tag(name='Blue')
tag.save()
file_.tags.add(tag)
Expand All @@ -1490,7 +1509,8 @@ def test_remove_tag(self):
assert_equal(len(find), 0)

def test_make_node_private(self):
self.root.append_file('Change_Gonna_Come.wav')
file_ = self.root.append_file('Change_Gonna_Come.wav')
create_file_version(file_, self.user)
find = query_file('Change_Gonna_Come.wav')['results']
assert_equal(len(find), 1)
self.node.is_public = False
Expand All @@ -1502,7 +1522,8 @@ def test_make_node_private(self):
def test_make_private_node_public(self):
self.node.is_public = False
self.node.save()
self.root.append_file('Try a Little Tenderness.flac')
file_ = self.root.append_file('Try a Little Tenderness.flac')
create_file_version(file_, self.user)
find = query_file('Try a Little Tenderness.flac')['results']
assert_equal(len(find), 0)
self.node.is_public = True
Expand All @@ -1515,7 +1536,8 @@ def test_delete_node(self):
node = factories.ProjectFactory(is_public=True, title='The Soul Album')
osf_storage = node.get_addon('osfstorage')
root = osf_storage.get_root()
root.append_file('The Dock of the Bay.mp3')
file_ = root.append_file('The Dock of the Bay.mp3')
create_file_version(file_, self.user)
find = query_file('The Dock of the Bay.mp3')['results']
assert_equal(len(find), 1)
node.is_deleted = True
Expand All @@ -1526,13 +1548,15 @@ def test_delete_node(self):

def test_file_download_url_guid(self):
file_ = self.root.append_file('Timber.mp3')
create_file_version(file_, self.user)
file_guid = file_.get_guid(create=True)
file_.save()
find = query_file('Timber.mp3')['results']
assert_equal(find[0]['guid_url'], '/' + file_guid._id + '/')

def test_file_download_url_no_guid(self):
file_ = self.root.append_file('Timber.mp3')
create_file_version(file_, self.user)
path = file_.path
deep_url = '/' + file_.target._id + '/files/osfstorage' + path + '/'
find = query_file('Timber.mp3')['results']
Expand Down

0 comments on commit 2fccd10

Please sign in to comment.