Skip to content

Commit

Permalink
Merge pull request #134 from pangaea-data-publisher/robbranch2
Browse files Browse the repository at this point in the history
robbranch2
  • Loading branch information
huberrob authored Feb 15, 2021
2 parents 2610b85 + 56d2c48 commit 1e87eaa
Show file tree
Hide file tree
Showing 33 changed files with 283 additions and 215 deletions.
2 changes: 1 addition & 1 deletion fuji_server/client/ex_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
#testpids=['https://data.gov.lv/dati/lv/dataset/maksatnespejas-procesi']
testpids=['http://doi.org/10.17882/42182']
#testpids = muchotestpids
testpids =['10.15152/QDB.121']
testpids =['https://datadoi.ee/handle/33/48']
startpid=''
def effectivehandlers(logger):
handlers = logger.handlers
Expand Down
137 changes: 92 additions & 45 deletions fuji_server/controllers/fair_check.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions fuji_server/evaluators/fair_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, fuji_instance):
self.fuji=fuji_instance
self.metric_identifier = None
self.metrics = None
self.metric_number = None
self.result = None
self.metric_tests = dict()
self.isDebug=self.fuji.isDebug
Expand All @@ -47,6 +48,7 @@ def set_metric(self, metric_identifier, metrics):
self.total_score = int(self.metrics.get(metric_identifier).get('total_score'))
self.score = FAIRResultCommonScore(total=self.total_score)
self.metric_name = self.metrics.get(metric_identifier).get('metric_name')
self.metric_number = self.metrics.get(metric_identifier).get('metric_number')
self.initializeEvaluationCriteria()


Expand Down
14 changes: 7 additions & 7 deletions fuji_server/evaluators/fair_evaluator_community_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

class FAIREvaluatorCommunityMetadata(FAIREvaluator):
def evaluate(self):
self.result = CommunityEndorsedStandard(id=self.fuji.count, metric_identifier=self.metric_identifier,
self.result = CommunityEndorsedStandard(id=self.metric_number, metric_identifier=self.metric_identifier,
metric_name=self.metric_name)

standards_detected: List[CommunityEndorsedStandardOutputInner] = []
Expand All @@ -39,7 +39,7 @@ def evaluate(self):
# ============== retrieve community standards by collected namespace uris
if len(self.fuji.namespace_uri) > 0:
no_match = []
self.logger.info('FsF-R1.3-01M : Namespaces included in the metadata - {}'.format(self.fuji.namespace_uri))
self.logger.info('FsF-R1.3-01M : Namespaces included in the metadata -: {}'.format(self.fuji.namespace_uri))
for std_ns in self.fuji.namespace_uri:
std_ns_temp = self.fuji.lookup_metadatastandard_by_uri(std_ns)
# if std_ns_temp in FAIRCheck.COMMUNITY_METADATA_STANDARDS_URIS:
Expand All @@ -48,11 +48,11 @@ def evaluate(self):
std_name = self.fuji.COMMUNITY_METADATA_STANDARDS_URIS.get(std_ns_temp).get('title')
if subject and all(elem == "Multidisciplinary" for elem in subject):
self.logger.info(
'FsF-R1.3-01M : Skipped non-disciplinary standard found through namespaces - {}'.format(
'FsF-R1.3-01M : Skipped non-disciplinary standard found through namespaces -: {}'.format(
std_ns))
else:
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through namespaces - {}'.format(
'FsF-R1.3-01M : Found disciplinary standard through namespaces -: {}'.format(
std_ns))
nsout = CommunityEndorsedStandardOutputInner()
nsout.metadata_standard = std_name # use here original standard uri detected
Expand All @@ -63,7 +63,7 @@ def evaluate(self):
no_match.append(std_ns)
if len(no_match) > 0:
self.logger.info(
'FsF-R1.3-01M : The following standards found through namespaces are excluded as they are not listed in RDA metadata catalog - {}'.format(
'FsF-R1.3-01M : The following standards found through namespaces are excluded as they are not listed in RDA metadata catalog -: {}'.format(
no_match))
if standards_detected:
self.setEvaluationCriteriumScore('FsF-R1.3-01M-1a', 1, 'pass')
Expand All @@ -78,11 +78,11 @@ def evaluate(self):
if standard_found:
subject = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('subject_areas')
if subject and all(elem == "Multidisciplinary" for elem in subject):
self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard - {}'.format(s))
self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard -: {}'.format(s))
else:
self.setEvaluationCriteriumScore('FsF-R1.3-01M-1b', 1, 'pass')
self.logger.log(self.fuji.LOG_SUCCESS,
'FsF-R1.3-01M : Found disciplinary standard through re3data - {}'.format(
'FsF-R1.3-01M : Found disciplinary standard through re3data -: {}'.format(
s))
out = CommunityEndorsedStandardOutputInner()
out.metadata_standard = s
Expand Down
19 changes: 9 additions & 10 deletions fuji_server/evaluators/fair_evaluator_content_included.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,26 @@

class FAIREvaluatorContentIncluded(FAIREvaluator):
def evaluate(self):
self.result = IdentifierIncluded(id=self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
self.result = IdentifierIncluded(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
self.output = IdentifierIncludedOutput()

id_object = self.fuji.metadata_merged.get('object_identifier')
self.output.object_identifier_included = id_object
contents = self.fuji.metadata_merged.get('object_content_identifier')
if id_object is not None:
self.logger.info('FsF-F3-01M : Object identifier specified {}'.format(id_object))
self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
score = 0
content_list = []
if contents:
if isinstance(contents, dict):
contents = [contents]
contents = [c for c in contents if c]
number_of_contents = len(contents)
self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F3-01M : Number of object content identifier found - {}'.format(number_of_contents))
self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F3-01M : Number of object content identifier found -: {}'.format(number_of_contents))

if number_of_contents >= self.fuji.FILES_LIMIT:
self.logger.info(
'FsF-F3-01M : The total number of object (content) specified is above threshold, so use the first {} content identifiers'.format(
'FsF-F3-01M : The total number of object (content) specified is above threshold, so use the first -: {} content identifiers'.format(
self.fuji.FILES_LIMIT))
contents = contents[:self.fuji.FILES_LIMIT]

Expand All @@ -65,11 +65,11 @@ def evaluate(self):
content_link['header_content_type'] = str(content_link['header_content_type']).split(';')[0]
content_link['header_content_length'] = response.getheader('Content-Length')
if content_link['header_content_type'] != content_link.get('type'):
self.logger.warning('FsF-F3-01M : Content type given in metadata (' + str(content_link.get(
'type')) + ') differs from content type given in Header response (' + str(
self.logger.warning('FsF-F3-01M : Content type given in metadata differs from content type given in Header response -: (' + str(content_link.get(
'type')) + ') vs. (' + str(
content_link['header_content_type']) + ')')
self.logger.info(
'FsF-F3-01M : Replacing metadata content type with content type from Header response: ' + str(
'FsF-F3-01M : Replacing metadata content type with content type from Header response -: ' + str(
content_link['header_content_type']))
content_link['type'] = content_link['header_content_type']
# will pass even if the url cannot be accessed which is OK
Expand All @@ -79,8 +79,7 @@ def evaluate(self):
did_output_content.content_identifier_active = False
#content_list.append(did_output_content)
except urllib.error.HTTPError as e:
self.logger.warning(
'FsF-F3-01M : Content identifier {0} inaccessible, HTTPError code {1} '.format(
self.logger.warning('FsF-F3-01M : Content identifier inaccessible -: {0} , HTTPError code {1} '.format(
content_link.get('url'), e.code))
except urllib.error.URLError as e:
self.logger.exception(e.reason)
Expand All @@ -91,7 +90,7 @@ def evaluate(self):
did_output_content.content_identifier_active = True
content_list.append(did_output_content)
else:
self.logger.warning('FsF-F3-01M : Object (content) url is empty - {}'.format(content_link))
self.logger.warning('FsF-F3-01M : Object (content) url is empty -: {}'.format(content_link))
else:
self.logger.warning('FsF-F3-01M : Data (content) identifier is missing.')

Expand Down
29 changes: 22 additions & 7 deletions fuji_server/evaluators/fair_evaluator_data_access_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def evaluate(self):
#2) Eprints AccessRights Vocabulary: check for http://purl.org/eprint/accessRights/
#3) EU publications access rights check for http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC
#4) Openaire Guidelines <dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
self.result = DataAccessLevel(self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
self.result = DataAccessLevel(self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
self.output = DataAccessOutput()
licence_evaluator = FAIREvaluatorLicense(self.fuji)
#rights_regex = r'((\/licenses|purl.org\/coar\/access_right|purl\.org\/eprint\/accessRights|europa\.eu\/resource\/authority\/access-right)\/{1}(\S*))'
Expand All @@ -52,11 +52,15 @@ def evaluate(self):
#access_rights can be None or []
if access_rights:
self.logger.info('FsF-A1-01M : Found access rights information in dedicated metadata element')

if isinstance(access_rights, str):
access_rights = [access_rights]
for access_right in access_rights:
self.logger.info('FsF-A1-01M : Access information specified - {}'.format(access_right))
#TODO: remove new lines also from other logger messages or handle this elsewhere
access_right = re.sub(r"[\r\n]+", ' ', access_right)
self.logger.info('FsF-A1-01M : Access information specified -: {}'.format(access_right.replace('\n', ' ')))
if not licence_evaluator.isLicense(value=access_right, metric_id=self.metric_identifier): # exclude license-based text from access_rights

rights_match = re.search(rights_regex, access_right, re.IGNORECASE)
if rights_match is not None:
last_group = len(rights_match.groups())
Expand All @@ -65,14 +69,25 @@ def evaluate(self):
if re.search(right_code, filtered_rights, re.IGNORECASE):
access_level = right_status
access_details['access_condition'] = rights_match[1] #overwrite existing condition
self.logger.info('FsF-A1-01M : Access level recognized as ' + str(right_status))
self.logger.info('FsF-A1-01M : Standardized actionable access level recognized as -:' + str(right_status))
break
break
else:
self.logger.info('FsF-A1-01M : Not a standardized access level')
self.logger.info('FsF-A1-01M : Not a standardized, actionable access level')
else:
self.logger.warning('FsF-A1-01M : Access condition looks like license, therefore the following is ignored - {}'.format(access_right))
self.logger.warning('FsF-A1-01M : Access condition looks like license, therefore the following is ignored -: {}'.format(access_right))
exclude.append(access_right)

if not access_level:
lower_case_access_dict = dict((k.lower(), v) for k, v in Mapper.ACCESS_RIGHT_CODES.value.items())
for access_right in access_rights:
if access_right.lower() in lower_case_access_dict:
self.logger.info('FsF-A1-01M : Non-actionable (term only) standard access level recognized as -:' + str(
lower_case_access_dict.get(access_right.lower())))
access_level = lower_case_access_dict.get(access_right.lower())
access_details['access_condition'] = access_right
break

if not access_details and access_rights:
access_rights = set(access_rights) - set(exclude)
if access_rights :
Expand All @@ -97,7 +112,7 @@ def evaluate(self):
if access_level == 'embargoed':
available_date = self.fuji.metadata_merged.get('publication_date')
if available_date:
self.logger.info('FsF-A1-01M : Embargoed access, available date - {}'.format(available_date))
self.logger.info('FsF-A1-01M : Embargoed access, available date -: {}'.format(available_date))
access_details['available_date'] = available_date
else:
self.logger.warning('FsF-A1-01M : Embargoed access, available date NOT found')
Expand All @@ -112,7 +127,7 @@ def evaluate(self):
if access_level: #must be one of ['public', 'embargoed', 'restricted', 'closed_metadataonly']
self.output.access_level = access_level
self.setEvaluationCriteriumScore('FsF-A1-01M-1', 1, 'pass')
self.logger.log(self.fuji.LOG_SUCCESS, 'FsF-A1-01M : Access level to data could successfully be determined: '+access_level)
self.logger.log(self.fuji.LOG_SUCCESS, 'FsF-A1-01M : Access level to data could successfully be determined -: '+access_level)
else:
self.logger.warning('FsF-A1-01M : Unable to determine the access level')
self.output.access_details = access_details
Expand Down
Loading

0 comments on commit 1e87eaa

Please sign in to comment.