-
Notifications
You must be signed in to change notification settings - Fork 203
feat: implement grading strategies for peer evaluations #2196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
85bf487
eb4b911
9b4d8e2
c3c73c2
a4fe366
cde0f5d
09e7b9f
d804bdb
fcc3c8b
3936e2d
9351c35
3a3f841
0cd23e1
a31f654
ced373a
a955bde
99a8bc1
4101928
f4da665
d323dc9
0d8975b
7c97379
b1df638
e507ab5
61f952c
9ba4f80
4c6f474
97ff348
da58dc8
69bc17a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
Initialization Information for Open Assessment Module | ||
""" | ||
|
||
__version__ = '6.9.0' | ||
__version__ = '6.10.0' |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,12 @@ | |
KEY_SEPARATOR = '/' | ||
|
||
|
||
class PeerGradingStrategy: | ||
"""Grading strategies for peer assessments.""" | ||
MEAN = "mean" | ||
MEDIAN = "median" | ||
|
||
|
||
class InvalidRubricSelection(Exception): | ||
""" | ||
The specified criterion/option do not exist in the rubric. | ||
|
@@ -488,6 +494,27 @@ def create(cls, rubric, scorer_id, submission_uuid, score_type, feedback=None, s | |
|
||
return cls.objects.create(**assessment_params) | ||
|
||
@classmethod | ||
def get_score_dict(cls, scores_dict, grading_strategy): | ||
"""Determine the score in a dictionary of lists of scores based on the | ||
grading strategy calculation configuration. | ||
|
||
Args: | ||
scores_dict (dict): A dictionary of lists of int values. These int values | ||
are reduced to a single value that represents the median. | ||
grading_strategy (str): The type of score to calculate. Defaults to "median". | ||
|
||
Returns: | ||
(dict): A dictionary with criterion name keys and median score | ||
values. | ||
""" | ||
assert grading_strategy in [ | ||
PeerGradingStrategy.MEDIAN, | ||
PeerGradingStrategy.MEAN, | ||
], "Invalid grading strategy." | ||
|
||
return getattr(cls, f"get_{grading_strategy}_score_dict")(scores_dict) | ||
|
||
@classmethod | ||
def get_median_score_dict(cls, scores_dict): | ||
"""Determine the median score in a dictionary of lists of scores | ||
|
@@ -518,6 +545,36 @@ def get_median_score_dict(cls, scores_dict): | |
median_scores[criterion] = criterion_score | ||
return median_scores | ||
|
||
@classmethod | ||
def get_mean_score_dict(cls, scores_dict): | ||
"""Determine the mean score in a dictionary of lists of scores | ||
|
||
For a dictionary of lists, where each list contains a set of scores, | ||
determine the mean value in each list. | ||
|
||
Args: | ||
scores_dict (dict): A dictionary of lists of int values. These int | ||
values are reduced to a single value that represents the mean. | ||
|
||
Returns: | ||
(dict): A dictionary with criterion name keys and mean score | ||
values. | ||
|
||
Examples: | ||
>>> scores = { | ||
>>> "foo": [5, 6, 12, 16, 22, 53], | ||
>>> "bar": [5, 6, 12, 16, 22, 53, 102] | ||
>>> } | ||
>>> Assessment.get_mean_score_dict(scores) | ||
{"foo": 19, "bar": 31} | ||
|
||
""" | ||
mean_scores = {} | ||
for criterion, criterion_scores in scores_dict.items(): | ||
criterion_score = Assessment.get_mean_score(criterion_scores) | ||
mean_scores[criterion] = criterion_score | ||
return mean_scores | ||
|
||
@staticmethod | ||
def get_median_score(scores): | ||
"""Determine the median score in a list of scores | ||
|
@@ -552,6 +609,28 @@ def get_median_score(scores): | |
) | ||
return median_score | ||
|
||
@staticmethod | ||
def get_mean_score(scores): | ||
mariajgrimaldi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Calculate the mean score from a list of scores | ||
|
||
Args: | ||
scores (list): A list of int values. These int values | ||
are reduced to a single value that represents the mean. | ||
|
||
Returns: | ||
(int): The mean score. | ||
|
||
Examples: | ||
>>> scores = [5, 6, 12, 16, 22, 53] | ||
>>> Assessment.get_mean_score(scores) | ||
19 | ||
|
||
""" | ||
total_criterion_scores = len(scores) | ||
if total_criterion_scores == 0: | ||
return 0 | ||
return math.ceil(sum(scores) / float(total_criterion_scores)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This threw me for a loop. So, I tried replicating the same scenario. Here's the criteria I used: While researching, I found that the assessments are retrieved, ordered, and then truncated by the number of peer grades needed. Could you confirm that the number of peer grades configured while testing was 5? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also added an extra test here with the exact values: https://github.com/openedx/edx-ora2/pull/2196/files#diff-7fe9b2681acaf897f2dd62a13553badf288fd6144832ce0562c4a58bd2640e7fR2381 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also reproduced the same scenario with the graded-by-3 configurations and got 2 since the 1st three peers graded 1, 0, and 3 in that order. Please let me know if the submission order also affected your tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wow, submission order does affect the final score! That's annoying.
Thanks for that -- I also ran this test with the numbers shuffled in a different order, to confirm that it's not the "mean score" logic that's causing this issue. It must be something done after that. --- a/openassessment/assessment/test/test_peer.py
+++ b/openassessment/assessment/test/test_peer.py
@@ -2375,6 +2375,7 @@ class TestPeerApi(CacheResetTest):
self.assertEqual(31, Assessment.get_mean_score([5, 6, 12, 16, 22, 53, 102]))
self.assertEqual(31, Assessment.get_mean_score([16, 6, 12, 102, 22, 53, 5]))
self.assertEqual(2, Assessment.get_mean_score([0, 1, 3, 1, 5]))
+ self.assertEqual(2, Assessment.get_mean_score([5, 3, 1, 1, 0]))
Yes, 5 peer grades were required, and only 5 were received. I used the default graded-by-5 configuration, with Audit user received the same "Content" peer assessments as Honor, but in a different order, and so they got different scores. (Apologies about the "Ideas" assessments, I thought I did them all the same too, but apparently not.) |
||
|
||
@classmethod | ||
def scores_by_criterion(cls, assessments): | ||
"""Create a dictionary of lists for scores associated with criterion | ||
|
Uh oh!
There was an error while loading. Please reload this page.