Skip to content

Commit

Permalink
Merge pull request #105 from pangaea-data-publisher/robbranch2
Browse files Browse the repository at this point in the history
robbranch2
  • Loading branch information
huberrob authored Dec 17, 2020
2 parents dee895c + 2ba571b commit fc88682
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 11 deletions.
34 changes: 26 additions & 8 deletions fuji_server/controllers/fair_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class FAIRCheck:
FILES_LIMIT = None
LOG_SUCCESS = 25
VALID_RESOURCE_TYPES = []
FUJI_VERSION = 'v1.0.1'

def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True):
uid_bytes = uid.encode('utf-8')
Expand All @@ -105,7 +106,7 @@ def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True):
self.landing_url = None # url of the landing page of self.pid_url
self.landing_html = None
self.landing_origin = None # schema + authority of the landing page e.g. https://www.pangaea.de
self.signposting_header_links = dict()
self.signposting_header_links = []
self.pid_scheme = None
self.id_scheme= None
self.logger = logging.getLogger(self.__class__.__name__)
Expand Down Expand Up @@ -369,7 +370,7 @@ def retrieve_metadata_embedded(self, extruct_metadata):
'FsF-F3-01M : Found data links in HTML head (link rel=item) : ' + str(len(links)))
if self.metadata_merged.get('object_content_identifier') is None:
self.metadata_merged['object_content_identifier'] = links
self.metadata_sources.append((MetaDataCollector.Sources.SIGN_POSTING.value,'linked'))
self.metadata_sources.append((MetaDataCollector.Sources.TYPED_LINK.value,'linked'))

#Now if an identifier has been detected in the metadata, potentially check for persistent identifier has to be repeated..
if self.metadata_merged.get('object_identifier'):
Expand All @@ -380,7 +381,8 @@ def retrieve_metadata_embedded(self, extruct_metadata):
found_pids_in_metadata.remove('url')
found_id = found_pids_in_metadata[0]
if found_id in Mapper.VALID_PIDS.value:
self.logger.info('FsF-F1-02D : Found object identifier in metadata, repeating PID check')
self.logger.info('FsF-F2-01M : Found object identifier in metadata, repeating PID check for FsF-F1-02D')
self.logger.log(self.LOG_SUCCESS, 'FsF-F1-02D : Found object identifier in metadata during FsF-F2-01M, PID check was repeated')
self.repeat_pid_check = True
self.pid_scheme = found_id
self.id = self.metadata_merged.get('object_identifier')
Expand All @@ -405,6 +407,13 @@ def get_html_typed_links(self, rel="item"):
datalinks.append({'url': href, 'type': l.attrib.get('type'), 'rel': l.attrib.get('rel'), 'profile': l.attrib.get('format')})
return datalinks

def get_signposting_links(self, rel="item"):
signlinks =[]
for signposting_links in self.signposting_header_links:
if signposting_links.get('rel') == rel:
signlinks.append(signposting_links)
return signlinks

def get_guessed_xml_link(self):
# in case object landing page URL ends with '.html' or '/html'
# try to find out if there is some xml content if suffix is replaced by 'xml
Expand Down Expand Up @@ -506,16 +515,25 @@ def retrieve_metadata_external(self):
self.logger.info('FsF-F2-01M : Datacite metadata UNAVAILABLE')
else:
self.logger.info('FsF-F2-01M : Not a PID, therefore Datacite metadata (json) not requested.')
#dcat style
sign_header_links = []
#signposting header links
if self.get_signposting_links('describedby'):
sign_header_links = self.get_signposting_links('describedby')
self.metadata_sources.append((MetaDataCollector.Sources.SIGN_POSTING.value, 'signposting'))
#dcat style meta links
typed_metadata_links = self.get_html_typed_links(rel='alternate')
#ddi style
#ddi style meta links
rel_meta_links = self.get_html_typed_links(rel='meta')
#signposting style
sign_metadata_links = self.get_html_typed_links(rel='describedby')
#signposting style meta links
sign_meta_links = self.get_html_typed_links(rel='describedby')


typed_metadata_links.extend(sign_metadata_links)

typed_metadata_links.extend(sign_meta_links)
typed_metadata_links.extend(rel_meta_links)
typed_metadata_links.extend(sign_header_links)
guessed_metadata_link = self.get_guessed_xml_link()

if guessed_metadata_link is not None:
typed_metadata_links.append(guessed_metadata_link)

Expand Down
2 changes: 1 addition & 1 deletion fuji_server/controllers/fair_object_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ def assess_by_id(body): # noqa: E501
metric_spec = Preprocessor.metric_specification
metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH)
totalmetrics = len(results)
final_response = FAIRResults(timestamp= timestmp, test_id= ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results)
final_response = FAIRResults(timestamp= timestmp, software_version=ft.FUJI_VERSION,test_id= ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results)
return final_response

3 changes: 3 additions & 0 deletions fuji_server/evaluators/fair_evaluator_minimal_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def evaluate(self):
if source_mechanism == 'linked':
self.setEvaluationCriteriumScore('FsF-F2-01M-1c', 0,'pass')
self.setEvaluationCriteriumScore('FsF-F2-01M-1', 0, 'pass')
if source_mechanism == 'signposting':
self.setEvaluationCriteriumScore('FsF-F2-01M-1d', 0,'pass')
self.setEvaluationCriteriumScore('FsF-F2-01M-1', 0, 'pass')
self.result.test_status = test_status
self.result.metric_tests = self.metric_tests
self.result.score = self.score
Expand Down
24 changes: 22 additions & 2 deletions fuji_server/evaluators/fair_evaluator_persistent_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,39 @@ def evaluate(self):
# identify signposting links in header
header_link_string = requestHelper.getHTTPResponse().getheader('Link')
if header_link_string is not None:
self.logger.info('FsF-F1-02D : Found signposting links in response header of landingpage')

for preparsed_link in header_link_string.split(','):
found_link = None
found_type, type_match = None, None
found_rel, rel_match = None, None
parsed_link = preparsed_link.strip().split(';')
found_link = parsed_link[0].strip()
found_rel = re.search('rel=\"([a-z-]+)\"', parsed_link[1])
for link_prop in parsed_link[1:]:
if str(link_prop).startswith('rel="'):
rel_match = re.search('rel=\"(.*?)\"', link_prop)
elif str(link_prop).startswith('type="'):
type_match = re.search('type=\"(.*?)\"', link_prop)
if type_match:
found_type = type_match[1]
if rel_match:
found_rel = rel_match[1]
signposting_link_dict = {'url': found_link[1:-1], 'type': found_type, 'rel': found_rel}
if found_link:
self.fuji.signposting_header_links.append(signposting_link_dict)
'''
if found_rel:
if self.fuji.signposting_header_links.get(found_rel[1]):
self.fuji.signposting_header_links[found_rel[1]].append(found_link[1:-1])
else:
self.fuji.signposting_header_links[found_rel[1]]=[found_link[1:-1]]
'''

#check if there is a cite-as signposting link
if self.fuji.pid_scheme is None:
signposting_pid = self.fuji.signposting_header_links.get('cite-as')
signposting_pid_link = self.fuji.get_signposting_links('cite-as')
if signposting_pid_link:
signposting_pid = signposting_pid_link[0].get('url')
if signposting_pid:
found_ids = idutils.detect_identifier_schemes(signposting_pid[0])
if len(found_ids) > 1:
Expand Down

0 comments on commit fc88682

Please sign in to comment.