Merge pull request #134 from pangaea-data-publisher/robbranch2

robbranch2
pangaea-data-publisher · Feb 15, 2021 · 1e87eaa · 1e87eaa
2 parents 2610b85 + 56d2c48
commit 1e87eaa
Show file tree

Hide file tree

Showing 33 changed files with 283 additions and 215 deletions.
diff --git a/fuji_server/client/ex_evaluate.py b/fuji_server/client/ex_evaluate.py
@@ -152,7 +152,7 @@
 #testpids=['https://data.gov.lv/dati/lv/dataset/maksatnespejas-procesi']
 testpids=['http://doi.org/10.17882/42182']
 #testpids = muchotestpids
-testpids =['10.15152/QDB.121']
+testpids =['https://datadoi.ee/handle/33/48']
 startpid=''
 def effectivehandlers(logger):
     handlers = logger.handlers

diff --git a/fuji_server/controllers/fair_check.py b/fuji_server/controllers/fair_check.py
diff --git a/fuji_server/evaluators/fair_evaluator.py b/fuji_server/evaluators/fair_evaluator.py
@@ -33,6 +33,7 @@ def __init__(self, fuji_instance):
         self.fuji=fuji_instance
         self.metric_identifier = None
         self.metrics = None
+        self.metric_number = None
         self.result = None
         self.metric_tests = dict()
         self.isDebug=self.fuji.isDebug
@@ -47,6 +48,7 @@ def set_metric(self, metric_identifier, metrics):
             self.total_score = int(self.metrics.get(metric_identifier).get('total_score'))
             self.score = FAIRResultCommonScore(total=self.total_score)
             self.metric_name = self.metrics.get(metric_identifier).get('metric_name')
+            self.metric_number = self.metrics.get(metric_identifier).get('metric_number')
             self.initializeEvaluationCriteria()
 
 

diff --git a/fuji_server/evaluators/fair_evaluator_community_metadata.py b/fuji_server/evaluators/fair_evaluator_community_metadata.py
@@ -30,7 +30,7 @@
 
 class FAIREvaluatorCommunityMetadata(FAIREvaluator):
     def evaluate(self):
-        self.result = CommunityEndorsedStandard(id=self.fuji.count, metric_identifier=self.metric_identifier,
+        self.result = CommunityEndorsedStandard(id=self.metric_number, metric_identifier=self.metric_identifier,
                                          metric_name=self.metric_name)
 
         standards_detected: List[CommunityEndorsedStandardOutputInner] = []
@@ -39,7 +39,7 @@ def evaluate(self):
         # ============== retrieve community standards by collected namespace uris
         if len(self.fuji.namespace_uri) > 0:
             no_match = []
-            self.logger.info('FsF-R1.3-01M : Namespaces included in the metadata - {}'.format(self.fuji.namespace_uri))
+            self.logger.info('FsF-R1.3-01M : Namespaces included in the metadata -: {}'.format(self.fuji.namespace_uri))
             for std_ns in self.fuji.namespace_uri:
                 std_ns_temp = self.fuji.lookup_metadatastandard_by_uri(std_ns)
                 # if std_ns_temp in FAIRCheck.COMMUNITY_METADATA_STANDARDS_URIS:
@@ -48,11 +48,11 @@ def evaluate(self):
                     std_name = self.fuji.COMMUNITY_METADATA_STANDARDS_URIS.get(std_ns_temp).get('title')
                     if subject and all(elem == "Multidisciplinary" for elem in subject):
                         self.logger.info(
-                            'FsF-R1.3-01M : Skipped non-disciplinary standard found through namespaces - {}'.format(
+                            'FsF-R1.3-01M : Skipped non-disciplinary standard found through namespaces -: {}'.format(
                                 std_ns))
                     else:
                         self.logger.log(self.fuji.LOG_SUCCESS,
-                            'FsF-R1.3-01M : Found disciplinary standard through namespaces - {}'.format(
+                            'FsF-R1.3-01M : Found disciplinary standard through namespaces -: {}'.format(
                                 std_ns))
                         nsout = CommunityEndorsedStandardOutputInner()
                         nsout.metadata_standard = std_name  # use here original standard uri detected
@@ -63,7 +63,7 @@ def evaluate(self):
                     no_match.append(std_ns)
             if len(no_match) > 0:
                 self.logger.info(
-                    'FsF-R1.3-01M : The following standards found through namespaces are excluded as they are not listed in RDA metadata catalog - {}'.format(
+                    'FsF-R1.3-01M : The following standards found through namespaces are excluded as they are not listed in RDA metadata catalog -: {}'.format(
                         no_match))
         if standards_detected:
             self.setEvaluationCriteriumScore('FsF-R1.3-01M-1a', 1, 'pass')
@@ -78,11 +78,11 @@ def evaluate(self):
                     if standard_found:
                         subject = self.fuji.COMMUNITY_STANDARDS.get(standard_found).get('subject_areas')
                         if subject and all(elem == "Multidisciplinary" for elem in subject):
-                            self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard - {}'.format(s))
+                            self.logger.info('FsF-R1.3-01M : Skipped non-disciplinary standard -: {}'.format(s))
                         else:
                             self.setEvaluationCriteriumScore('FsF-R1.3-01M-1b', 1, 'pass')
                             self.logger.log(self.fuji.LOG_SUCCESS,
-                                            'FsF-R1.3-01M : Found disciplinary standard through re3data - {}'.format(
+                                            'FsF-R1.3-01M : Found disciplinary standard through re3data -: {}'.format(
                                                 s))
                             out = CommunityEndorsedStandardOutputInner()
                             out.metadata_standard = s

diff --git a/fuji_server/evaluators/fair_evaluator_content_included.py b/fuji_server/evaluators/fair_evaluator_content_included.py
@@ -29,26 +29,26 @@
 
 class FAIREvaluatorContentIncluded(FAIREvaluator):
     def evaluate(self):
-        self.result = IdentifierIncluded(id=self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
+        self.result = IdentifierIncluded(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
         self.output = IdentifierIncludedOutput()
 
         id_object = self.fuji.metadata_merged.get('object_identifier')
         self.output.object_identifier_included = id_object
         contents = self.fuji.metadata_merged.get('object_content_identifier')
         if id_object is not None:
-            self.logger.info('FsF-F3-01M : Object identifier specified {}'.format(id_object))
+            self.logger.info('FsF-F3-01M : Object identifier specified -: {}'.format(id_object))
         score = 0
         content_list = []
         if contents:
             if isinstance(contents, dict):
                 contents = [contents]
             contents = [c for c in contents if c]
             number_of_contents = len(contents)
-            self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F3-01M : Number of object content identifier found - {}'.format(number_of_contents))
+            self.logger.log(self.fuji.LOG_SUCCESS,'FsF-F3-01M : Number of object content identifier found -: {}'.format(number_of_contents))
 
             if number_of_contents >= self.fuji.FILES_LIMIT:
                 self.logger.info(
-                    'FsF-F3-01M : The total number of object (content) specified is above threshold, so use the first {} content identifiers'.format(
+                    'FsF-F3-01M : The total number of object (content) specified is above threshold, so use the first -: {} content identifiers'.format(
                         self.fuji.FILES_LIMIT))
                 contents = contents[:self.fuji.FILES_LIMIT]
 
@@ -65,11 +65,11 @@ def evaluate(self):
                         content_link['header_content_type'] = str(content_link['header_content_type']).split(';')[0]
                         content_link['header_content_length'] = response.getheader('Content-Length')
                         if content_link['header_content_type'] != content_link.get('type'):
-                            self.logger.warning('FsF-F3-01M : Content type given in metadata (' + str(content_link.get(
-                                'type')) + ') differs from content type given in Header response (' + str(
+                            self.logger.warning('FsF-F3-01M : Content type given in metadata differs from content type given in Header response -: (' + str(content_link.get(
+                                'type')) + ') vs. (' + str(
                                 content_link['header_content_type']) + ')')
                             self.logger.info(
-                                'FsF-F3-01M : Replacing metadata content type with content type from Header response: ' + str(
+                                'FsF-F3-01M : Replacing metadata content type with content type from Header response -: ' + str(
                                     content_link['header_content_type']))
                             content_link['type'] = content_link['header_content_type']
                         # will pass even if the url cannot be accessed which is OK
@@ -79,8 +79,7 @@ def evaluate(self):
                         did_output_content.content_identifier_active = False
                         #content_list.append(did_output_content)
                     except urllib.error.HTTPError as e:
-                        self.logger.warning(
-                            'FsF-F3-01M : Content identifier {0} inaccessible, HTTPError code {1} '.format(
+                        self.logger.warning('FsF-F3-01M : Content identifier inaccessible -: {0} , HTTPError code {1} '.format(
                                 content_link.get('url'), e.code))
                     except urllib.error.URLError as e:
                         self.logger.exception(e.reason)
@@ -91,7 +90,7 @@ def evaluate(self):
                         did_output_content.content_identifier_active = True
                     content_list.append(did_output_content)
                 else:
-                    self.logger.warning('FsF-F3-01M : Object (content) url is empty - {}'.format(content_link))
+                    self.logger.warning('FsF-F3-01M : Object (content) url is empty -: {}'.format(content_link))
         else:
             self.logger.warning('FsF-F3-01M : Data (content) identifier is missing.')
 

diff --git a/fuji_server/evaluators/fair_evaluator_data_access_level.py b/fuji_server/evaluators/fair_evaluator_data_access_level.py
@@ -36,7 +36,7 @@ def evaluate(self):
         #2) Eprints AccessRights Vocabulary: check for http://purl.org/eprint/accessRights/
         #3) EU publications access rights check for http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC
         #4) Openaire Guidelines <dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
-        self.result = DataAccessLevel(self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
+        self.result = DataAccessLevel(self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name)
         self.output = DataAccessOutput()
         licence_evaluator = FAIREvaluatorLicense(self.fuji)
         #rights_regex = r'((\/licenses|purl.org\/coar\/access_right|purl\.org\/eprint\/accessRights|europa\.eu\/resource\/authority\/access-right)\/{1}(\S*))'
@@ -52,11 +52,15 @@ def evaluate(self):
         #access_rights can be None or []
         if access_rights:
             self.logger.info('FsF-A1-01M : Found access rights information in dedicated metadata element')
+
             if isinstance(access_rights, str):
                 access_rights = [access_rights]
             for access_right in access_rights:
-                self.logger.info('FsF-A1-01M : Access information specified - {}'.format(access_right))
+                #TODO: remove new lines also from other logger messages or handle this elsewhere
+                access_right = re.sub(r"[\r\n]+", ' ', access_right)
+                self.logger.info('FsF-A1-01M : Access information specified -: {}'.format(access_right.replace('\n', ' ')))
                 if not licence_evaluator.isLicense(value=access_right, metric_id=self.metric_identifier):  # exclude license-based text from access_rights
+
                     rights_match = re.search(rights_regex, access_right, re.IGNORECASE)
                     if rights_match is not None:
                         last_group = len(rights_match.groups())
@@ -65,14 +69,25 @@ def evaluate(self):
                             if re.search(right_code, filtered_rights, re.IGNORECASE):
                                 access_level = right_status
                                 access_details['access_condition'] = rights_match[1] #overwrite existing condition
-                                self.logger.info('FsF-A1-01M : Access level recognized as ' + str(right_status))
+                                self.logger.info('FsF-A1-01M : Standardized actionable access level recognized as -:' + str(right_status))
                                 break
                         break
                     else:
-                        self.logger.info('FsF-A1-01M : Not a standardized access level')
+                        self.logger.info('FsF-A1-01M : Not a standardized, actionable access level')
                 else:
-                    self.logger.warning('FsF-A1-01M : Access condition looks like license, therefore the following is ignored - {}'.format(access_right))
+                    self.logger.warning('FsF-A1-01M : Access condition looks like license, therefore the following is ignored -: {}'.format(access_right))
                     exclude.append(access_right)
+
+            if not access_level:
+                lower_case_access_dict = dict((k.lower(), v) for k, v in Mapper.ACCESS_RIGHT_CODES.value.items())
+                for access_right in access_rights:
+                    if access_right.lower() in lower_case_access_dict:
+                        self.logger.info('FsF-A1-01M : Non-actionable (term only) standard access level recognized as -:' + str(
+                            lower_case_access_dict.get(access_right.lower())))
+                        access_level = lower_case_access_dict.get(access_right.lower())
+                        access_details['access_condition'] = access_right
+                        break
+
             if not access_details and access_rights:
                 access_rights = set(access_rights) - set(exclude)
                 if access_rights :
@@ -97,7 +112,7 @@ def evaluate(self):
         if access_level == 'embargoed':
             available_date = self.fuji.metadata_merged.get('publication_date')
             if available_date:
-                self.logger.info('FsF-A1-01M : Embargoed access, available date - {}'.format(available_date))
+                self.logger.info('FsF-A1-01M : Embargoed access, available date -: {}'.format(available_date))
                 access_details['available_date'] = available_date
             else:
                 self.logger.warning('FsF-A1-01M : Embargoed access, available date NOT found')
@@ -112,7 +127,7 @@ def evaluate(self):
         if access_level: #must be one of ['public', 'embargoed', 'restricted', 'closed_metadataonly']
             self.output.access_level = access_level
             self.setEvaluationCriteriumScore('FsF-A1-01M-1', 1, 'pass')
-            self.logger.log(self.fuji.LOG_SUCCESS, 'FsF-A1-01M : Access level to data could successfully be determined: '+access_level)
+            self.logger.log(self.fuji.LOG_SUCCESS, 'FsF-A1-01M : Access level to data could successfully be determined -: '+access_level)
         else:
             self.logger.warning('FsF-A1-01M : Unable to determine the access level')
         self.output.access_details = access_details