-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Look for Dataverse renamed files on upload
Dataverse 'ingests' certain file types. These file types get renamed. In upload when Waterbutler tries to find the correct metadata to return, it will 500 since it was not looking for the renamed file.
- Loading branch information
1 parent
473191c
commit 3ada43b
Showing
7 changed files
with
192 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import pytest | ||
|
||
from waterbutler.providers.dataverse import utils as dv_utils | ||
|
||
|
||
@pytest.fixture | ||
def format_dict(): | ||
return { | ||
'xlsx': { | ||
'originalFileFormat': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', | ||
'originalFormatLabel': 'MS Excel (XLSX)', | ||
'contentType': 'text/tab-separated-values', | ||
|
||
}, | ||
'RData': { | ||
'originalFileFormat': 'application/x-rlang-transport', | ||
'originalFormatLabel': 'R Data', | ||
'contentType': 'text/tab-separated-values' | ||
|
||
}, | ||
'sav': { | ||
'originalFileFormat': 'application/x-spss-sav', | ||
'originalFormatLabel': 'SPSS SAV', | ||
'contentType': 'text/tab-separated-values' | ||
}, | ||
'dta': { | ||
'originalFileFormat': 'application/x-stata', | ||
'originalFormatLabel': 'Stata Binary', | ||
'contentType': 'text/tab-separated-values' | ||
|
||
}, | ||
'por': { | ||
'originalFileFormat': 'application/x-spss-por', | ||
'originalFormatLabel': 'SPSS Portable', | ||
'contentType': 'text/tab-separated-values' | ||
|
||
}, | ||
'csv': { | ||
'originalFileFormat': 'text/csv', | ||
'originalFormatLabel': 'Comma Separated Values', | ||
'contentType': 'text/tab-separated-values' | ||
} | ||
} | ||
|
||
|
||
class TestUtils: | ||
|
||
def test_original_ext_from_raw_metadata(self, format_dict): | ||
for key in format_dict: | ||
assert key == dv_utils.original_ext_from_raw_metadata(format_dict[key]) | ||
|
||
def test_original_ext_from_raw_metadata_none_case(self, format_dict): | ||
for key in format_dict: | ||
format_dict[key]['originalFormatLabel'] = 'blarg' | ||
assert dv_utils.original_ext_from_raw_metadata(format_dict[key]) is None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
ORIGINAL_FORMATS = { | ||
'xlsx': { | ||
'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', | ||
'original_label': 'MS Excel (XLSX)', | ||
'content_type': 'text/tab-separated-values', | ||
|
||
}, | ||
# Rdata can come in a few different forms, so just list all of them here | ||
'RData': { | ||
'original_format': 'application/x-rlang-transport', | ||
'original_label': 'R Data', | ||
'content_type': 'text/tab-separated-values' | ||
|
||
}, | ||
'rdata': { | ||
'original_format': 'application/x-rlang-transport', | ||
'original_label': 'R Data', | ||
'content_type': 'text/tab-separated-values' | ||
|
||
}, | ||
'Rdata': { | ||
'original_format': 'application/x-rlang-transport', | ||
'original_label': 'R Data', | ||
'content_type': 'text/tab-separated-values' | ||
|
||
}, | ||
'sav': { | ||
'original_format': 'application/x-spss-sav', | ||
'original_label': 'SPSS SAV', | ||
'content_type': 'text/tab-separated-values' | ||
}, | ||
'dta': { | ||
'original_format': 'application/x-stata', | ||
'original_label': 'Stata Binary', | ||
'content_type': 'text/tab-separated-values' | ||
|
||
}, | ||
'por': { | ||
'original_format': 'application/x-spss-por', | ||
'original_label': 'SPSS Portable', | ||
'content_type': 'text/tab-separated-values' | ||
|
||
}, | ||
'csv': { | ||
'original_format': 'text/csv', | ||
'original_label': 'Comma Separated Values', | ||
'content_type': 'text/tab-separated-values' | ||
} | ||
} | ||
|
||
|
||
def original_ext_from_raw_metadata(data): | ||
"""Use the raw metadata to figure out the original extension.""" | ||
label = data.get('originalFormatLabel', None) | ||
file_format = data.get('originalFileFormat', None) | ||
content_type = data.get('contentType', None) | ||
|
||
if not label or not file_format or not content_type: | ||
return None | ||
|
||
for key in ORIGINAL_FORMATS: | ||
if (label == ORIGINAL_FORMATS[key]['original_label'] and | ||
file_format == ORIGINAL_FORMATS[key]['original_format'] and | ||
content_type == ORIGINAL_FORMATS[key]['content_type']): | ||
|
||
return key | ||
|
||
return None |