Merge pull request #673 from ramanakumars/feedback_fix

Added functionality to question extract to process survey tasks (for user skill calculation)
zooniverse · Jan 23, 2023 · 90153a9 · 90153a9
2 parents 25e97f4 + 1c18269
commit 90153a9
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 11 deletions.
diff --git a/panoptes_aggregation/extractors/question_extractor.py b/panoptes_aggregation/extractors/question_extractor.py
@@ -55,7 +55,15 @@ def question_extractor(classification, **kwargs):
         annotation = classification['annotations'][0]
         if isinstance(annotation['value'], list):
             for answer in annotation['value']:
-                answers[slugify_or_null(answer)] += 1
+                # allow survey tasks to be extracted using the
+                # question extractor for the purpose of
+                # calculating user skill
+                if isinstance(answer, dict):
+                    answers[slugify_or_null(answer['choice'])] += 1
+                else:
+                    # if extracting from a question task
+                    # use the traditional option
+                    answers[slugify_or_null(answer)] += 1
         else:
             answers[slugify_or_null(annotation['value'])] += 1
     return dict(answers)
diff --git a/panoptes_aggregation/reducers/user_skill_reducer.py b/panoptes_aggregation/reducers/user_skill_reducer.py
@@ -110,8 +110,10 @@ def get_confusion_matrix(extracts, relevant_reduction, binary, null_class):
             # user selects that class
             for extracti in extracts:
                 user_classifications += [key for key in extracti.keys() if isinstance(extracti[key], int) & (extracti[key] == 1)]
-                classes += [key for key in extracti.keys() if isinstance(extracti[key], int)]
-                true_values.extend(extracti['feedback'][true_key])
+
+                # convert all answers to lower case to be consistent across both lists
+                classes += [key.lower() for key in extracti.keys() if isinstance(extracti[key], int)]
+                true_values.extend(list(map(lambda e: e.lower(), extracti['feedback'][true_key])))
 
         # get a full list of classes as the union of the two sets of labels
         classes = np.sort(np.unique([*np.unique(classes), *np.unique(true_values)]))
@@ -144,10 +146,11 @@ def get_confusion_matrix(extracts, relevant_reduction, binary, null_class):
         for j, extract in enumerate(extracts):
 
             # find a list of user classified labels in this extract
-            user_class_i = [key for key in extract.keys() if isinstance(extract[key], int) & (extract[key] == 1)]
+            user_class_i = [key.lower() for key in extract.keys() if isinstance(extract[key], int) & (extract[key] == 1)]
+            true_keys = [key.lower() for key in extract['feedback'][true_key]]
 
             # get a full list of classifications
-            classi = np.sort(np.unique([*np.unique(extract['feedback'][true_key]),
+            classi = np.sort(np.unique([*np.unique(true_keys),
                                         *np.unique(user_class_i)]))
             classi = classi.tolist()
 
@@ -158,7 +161,7 @@ def get_confusion_matrix(extracts, relevant_reduction, binary, null_class):
 
             # loop through the true classes and populate the corresponding
             # indices in the list
-            for value in extract['feedback'][true_key]:
+            for value in true_keys:
                 true_count_i[classi.index(value)] = value
 
             # do the same for the user classifications

diff --git a/panoptes_aggregation/tests/extractor_tests/test_question_extractor.py b/panoptes_aggregation/tests/extractor_tests/test_question_extractor.py
@@ -115,12 +115,14 @@
     "pluck.true_value": "4"
 }
 
-TestSinglePluck = ExtractorTest(extractors.question_extractor,
+TestSinglePluck = ExtractorTest(
+    extractors.question_extractor,
     single_pluck_classification,
     single_pluck_expected,
     "Test pluck field functionality with a question extractor",
     kwargs={'pluck': single_pluck_keys},
-    test_name='TestSinglePluck')
+    test_name='TestSinglePluck'
+)
 
 
 feedback_pluck_classification = {
@@ -210,3 +212,26 @@
     kwargs={'pluck': feedback_empty_pluck_keys},
     test_name='TestFeedbackEmptyPluck'
 )
+
+# Test the additional functionality to use
+# survey task classifications using question extractor
+single_survey_classification = {
+    'annotations': [{
+        "task": "T0",
+        "task_label": "A single survey question",
+        "value": [
+            {"choice": "cheetah"},
+            {"choice": "leopard"}
+        ]
+    }]
+}
+
+single_survey_expected = {'cheetah': 1, "leopard": 1}
+
+TestSingle = ExtractorTest(
+    extractors.question_extractor,
+    single_survey_classification,
+    single_survey_expected,
+    'Test single survey question',
+    test_name='TestSingleSurvey'
+)
diff --git a/panoptes_aggregation/tests/reducer_tests/test_user_skill_reducer.py b/panoptes_aggregation/tests/reducer_tests/test_user_skill_reducer.py
@@ -56,9 +56,9 @@ def process(data):
 
 reduced_data = {
     "classes": [
-        "CHEETAH",
-        "WILDEBEEST",
-        "ZEBRA",
+        "cheetah",
+        "wildebeest",
+        "zebra",
         "NONE"
     ],
     "confusion_simple": [