Skip to content

Commit

Permalink
Merge pull request #136 from pangaea-data-publisher/robbranch2
Browse files Browse the repository at this point in the history
v1.0.6
  • Loading branch information
huberrob authored Feb 23, 2021
2 parents 1e87eaa + 27d0aa4 commit 978ad88
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 18 deletions.
22 changes: 15 additions & 7 deletions fuji_server/client/ex_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
debug = True

muchotestpids=[
'10.15493/DEFF.10000003','https://phaidra.cab.unipd.it/view/o:267291',
'10.15493/DEFF.10000003','https://phaidra.cab.unipd.it/view/o:267291','https://jyx.jyu.fi/handle/123456789/39205',
'doi:10.1038/nphys1170','doi:10.17882/42182','https://deims.org/sites/default/files/data/elter_va_fruska_gora_temperature_0.xls',
'10.25504/FAIRsharing.2bdvmk','http://bio2rdf.org/affymetrix:1415765_at','doi:10.18129/B9.bioc.BiocGenerics',
'https://data.noaa.gov/dataset/dataset/w00411-nos-hydrographic-survey-2015-08-15','10.6075/J0513WJD','10.7280/D1P075',
Expand Down Expand Up @@ -146,13 +146,15 @@
#testpids=['http://doi.org/10.1007/s10531-013-0468-6']
#rdf
#testpids=['http://tun.fi/JX.1099769']
testpids=['https://ortus.rtu.lv/science/en/datamodule/3']
#testpids=['https://ortus.rtu.lv/science/en/datamodule/3']
#rdf
#testpids=['https://databank.ora.ox.ac.uk/UniversityCollege/datasets/04156fde-dabb-48fd-baf6-533182f74b5b']
#testpids=['https://data.gov.lv/dati/lv/dataset/maksatnespejas-procesi']
testpids=['http://doi.org/10.17882/42182']
#testpids = muchotestpids
testpids =['https://datadoi.ee/handle/33/48']
#testpids=['http://doi.org/10.17882/42182']
testpids = muchotestpids
#testpids =['https://repo.clarino.uib.no/xmlui/handle/11509/103']
#testpids=['https://data.aussda.at/dataset.xhtml?persistentId=doi:10.11587/QQ7HTL']
testpids =['https://www.proteinatlas.org/ENSG00000180739-S1PR5/tissue/primary+data']
startpid=''
def effectivehandlers(logger):
handlers = logger.handlers
Expand Down Expand Up @@ -195,19 +197,25 @@ def main():
start=False
usedatacite = True
tracemalloc.start()
n=1
for identifier in testpids:

print (identifier)
print(n)
n+=1
if identifier==startpid or not startpid:
start=True
if start:
ft = FAIRCheck(uid=identifier, test_debug=True, use_datacite=usedatacite)

# print(effectivehandlers(ft.logger))
uid_result, pid_result = ft.check_unique_persistent()
core_metadata_result = ft.check_minimal_metatadata()
ft.retrieve_metadata_embedded(ft.extruct_result)
include_embedded= True
if ft.repeat_pid_check:
uid_result, pid_result = ft.check_unique_persistent()
ft.retrieve_metadata_external()

core_metadata_result = ft.check_minimal_metatadata()
content_identifier_included_result = ft.check_content_identifier_included()
access_level_result=ft.check_data_access_level()
license_result = ft.check_license()
Expand Down
19 changes: 11 additions & 8 deletions fuji_server/controllers/fair_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,13 @@ class FAIRCheck:
FILES_LIMIT = None
LOG_SUCCESS = 25
VALID_RESOURCE_TYPES = []
FUJI_VERSION = 'v1.0.5d'
FUJI_VERSION = 'v1.0.6'

def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True):
uid_bytes = uid.encode('utf-8')
self.test_id = hashlib.sha1(uid_bytes).hexdigest()
#str(base64.urlsafe_b64encode(uid_bytes), "utf-8") # an id we can use for caching etc
self.id = uid
self.id = self.input_id = uid
self.oaipmh_endpoint = oaipmh
self.pid_url = None # full pid # e.g., "https://doi.org/10.1594/pangaea.906092 or url (non-pid)
self.landing_url = None # url of the landing page of self.pid_url
Expand Down Expand Up @@ -141,7 +141,7 @@ def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True):
self.embedded_retrieved = False
FAIRCheck.load_predata()
self.extruct = None
self.extruct_result = None
self.extruct_result = {}
self.tika_content_types_list = []


Expand Down Expand Up @@ -199,9 +199,10 @@ def retrieve_metadata(self, extruct_metadata):
else:
self.logger.warning('FsF-F2-01M : NO structured metadata embedded in HTML')
'''
if self.reference_elements: # this will be always true as we need datacite client id
self.retrieve_metadata_embedded(embedded_exists)
self.retrieve_metadata_external()
#if self.reference_elements: # this will be always true as we need datacite client id
# if include_embedded ==True:
# self.retrieve_metadata_embedded(embedded_exists)
# self.retrieve_metadata_external()

# ========= clean merged metadata, delete all entries which are None or ''
data_objects = self.metadata_merged.get('object_content_identifier')
Expand Down Expand Up @@ -268,7 +269,7 @@ def retrieve_apis_standards(self):
else:
self.logger.warning('{} : Skipped external ressources (OAI, re3data) checks since landing page could not be resolved'.format('FsF-R1.3-01M'))

def retrieve_metadata_embedded(self, extruct_metadata):
def retrieve_metadata_embedded(self, extruct_metadata ={}):
isPid = False
if self.pid_scheme:
isPid = True
Expand Down Expand Up @@ -611,6 +612,8 @@ def retrieve_metadata_external(self):

if typed_metadata_links is not None:
typed_rdf_collector = None
#unique entries for typed links
typed_metadata_links = [dict(t) for t in {tuple(d.items()) for d in typed_metadata_links}]
for metadata_link in typed_metadata_links:
if metadata_link['type'] in ['application/rdf+xml','text/n3','text/ttl','application/ld+json']:
self.logger.info('FsF-F2-01M : Found e.g. Typed Links in HTML Header linking to RDF Metadata -: ('+str(metadata_link['type'])+' '+str(metadata_link['url'])+')')
Expand Down Expand Up @@ -682,7 +685,7 @@ def check_persistent_identifier(self):
def check_unique_persistent(self):
return self.check_unique_identifier(), self.check_persistent_identifier()

def check_minimal_metatadata(self):
def check_minimal_metatadata(self,include_embedded = True):
core_metadata_check = FAIREvaluatorCoreMetadata(self)
core_metadata_check.set_metric('FsF-F2-01M', metrics=FAIRCheck.METRICS)
return core_metadata_check.getResult()
Expand Down
6 changes: 5 additions & 1 deletion fuji_server/controllers/fair_object_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,13 @@ def assess_by_id(body): # noqa: E501
ft = FAIRCheck(uid=identifier, test_debug=debug, oaipmh=oai, use_datacite=usedatacite)

uid_result, pid_result = ft.check_unique_persistent()
core_metadata_result = ft.check_minimal_metatadata()
ft.retrieve_metadata_embedded(ft.extruct_result)
include_embedded = True
if ft.repeat_pid_check:
uid_result, pid_result = ft.check_unique_persistent()
ft.retrieve_metadata_external()

core_metadata_result = ft.check_minimal_metatadata()
content_identifier_included_result = ft.check_content_identifier_included()
access_level_result = ft.check_data_access_level()
license_result = ft.check_license()
Expand Down
1 change: 1 addition & 0 deletions fuji_server/evaluators/fair_evaluator_minimal_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from fuji_server.helper.metadata_mapper import Mapper

class FAIREvaluatorCoreMetadata(FAIREvaluator):

def evaluate(self):
if self.fuji.landing_url is None:
self.logger.warning('FsF-F2-01M : Metadata checks probably unreliable: landing page URL could not be determined')
Expand Down
15 changes: 13 additions & 2 deletions fuji_server/evaluators/fair_evaluator_persistent_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
class FAIREvaluatorPersistentIdentifier(FAIREvaluator):

def evaluate(self):

self.result = Persistence(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
self.output = PersistenceOutput()
# ======= CHECK IDENTIFIER PERSISTENCE =======
Expand All @@ -50,10 +49,17 @@ def evaluate(self):
requestHelper = RequestHelper(check_url, self.logger)
requestHelper.setAcceptType(AcceptTypes.html) # request
neg_source, self.fuji.extruct_result = requestHelper.content_negotiate('FsF-F1-02D', ignore_html = False)
if type(self.fuji.extruct_result) != dict:
self.fuji.extruct_result ={}
r = requestHelper.getHTTPResponse()

if r:
self.fuji.landing_url = requestHelper.redirect_url
#in case the test has been repeated because a PID has been found in metadata
if self.fuji.repeat_pid_check == True:
if self.fuji.landing_url != self.fuji.input_id:
self.logger.warning('FsF-F1-02D : Landing page URL resolved from PID found in metadata does not match with input URL')

if r.status == 200:
# identify signposting links in header
header_link_string = requestHelper.getHTTPResponse().getheader('Link')
Expand All @@ -64,18 +70,23 @@ def evaluate(self):
found_link = None
found_type, type_match = None, None
found_rel, rel_match = None, None
found_formats, formats_match = None, None
parsed_link = preparsed_link.strip().split(';')
found_link = parsed_link[0].strip()
for link_prop in parsed_link[1:]:
if str(link_prop).startswith('rel="'):
rel_match = re.search('rel=\"(.*?)\"', link_prop)
elif str(link_prop).startswith('type="'):
type_match = re.search('type=\"(.*?)\"', link_prop)
elif str(link_prop).startswith('formats="'):
formats_match = re.search('formats=\"(.*?)\"', link_prop)
if type_match:
found_type = type_match[1]
if rel_match:
found_rel = rel_match[1]
signposting_link_dict = {'url': found_link[1:-1], 'type': found_type, 'rel': found_rel}
if formats_match:
found_formats = formats_match[1]
signposting_link_dict = {'url': found_link[1:-1], 'type': found_type, 'rel': found_rel, 'profile':found_formats}
if found_link:
self.fuji.signposting_header_links.append(signposting_link_dict)

Expand Down

0 comments on commit 978ad88

Please sign in to comment.