-
Notifications
You must be signed in to change notification settings - Fork 146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Average grades in export #2366
base: main
Are you sure you want to change the base?
Average grades in export #2366
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,24 +112,29 @@ def filter_text_and_heading_questions(questions: Iterable[Question]) -> list[Que | |
return filtered_questions | ||
|
||
@staticmethod | ||
def filter_evaluations( | ||
semesters: Iterable[Semester], | ||
evaluation_states: Iterable[Evaluation.State], | ||
program_ids: Iterable[int], | ||
course_type_ids: Iterable[int], | ||
contributor: UserProfile | None, | ||
include_not_enough_voters: bool, | ||
def filter_evaluations( # noqa: PLR0912 | ||
semesters: Iterable[Semester] | None = None, | ||
evaluation_states: Iterable[Evaluation.State] | None = None, | ||
program_ids: Iterable[int] | None = None, | ||
course_type_ids: Iterable[int] | None = None, | ||
contributor: UserProfile | None = None, | ||
include_not_enough_voters: bool = False, | ||
) -> tuple[list[tuple[Evaluation, OrderedDict[int, list[QuestionResult]]]], list[Questionnaire], bool]: | ||
# pylint: disable=too-many-locals | ||
course_results_exist = False | ||
evaluations_with_results = [] | ||
used_questionnaires: set[Questionnaire] = set() | ||
evaluations_filter = Q( | ||
course__semester__in=semesters, | ||
state__in=evaluation_states, | ||
course__programs__in=program_ids, | ||
course__type__in=course_type_ids, | ||
) | ||
|
||
evaluations_filter = Q() | ||
if semesters: | ||
evaluations_filter &= Q(course__semester__in=semesters) | ||
if evaluation_states: | ||
evaluations_filter &= Q(state__in=evaluation_states) | ||
if program_ids: | ||
evaluations_filter &= Q(course__programs__in=program_ids) | ||
if course_type_ids: | ||
evaluations_filter &= Q(course__type__in=course_type_ids) | ||
|
||
if contributor: | ||
evaluations_filter = evaluations_filter & ( | ||
Q(course__responsibles__in=[contributor]) | Q(contributions__contributor__in=[contributor]) | ||
|
@@ -198,6 +203,8 @@ def write_headings_and_evaluation_info( | |
else: | ||
self.write_cell(export_name, "headline") | ||
|
||
self.write_cell("Average for this Question", "evaluation") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think "Average for this Question" would need to be translated here. The styles used here and in the lines changed below confuse me. Why is this using "evaluation"? Why is the empty cell in the course type column using the "program" style below? Please double-check that these all make sense, and if we really need some unintuitive value, add some explanation as to why. |
||
|
||
for evaluation, __ in evaluations_with_results: | ||
title = evaluation.full_name | ||
if len(semesters) > 1: | ||
|
@@ -208,17 +215,19 @@ def write_headings_and_evaluation_info( | |
|
||
self.next_row() | ||
self.write_cell(_("Programs"), "bold") | ||
self.write_cell("", "program") | ||
for evaluation, __ in evaluations_with_results: | ||
self.write_cell("\n".join([d.name for d in evaluation.course.programs.all()]), "program") | ||
|
||
self.next_row() | ||
self.write_cell(_("Course Type"), "bold") | ||
self.write_cell("", "program") | ||
for evaluation, __ in evaluations_with_results: | ||
self.write_cell(evaluation.course.type.name, "border_left_right") | ||
|
||
self.next_row() | ||
# One more cell is needed for the question column | ||
self.write_empty_row_with_styles(["default"] + ["border_left_right"] * len(evaluations_with_results)) | ||
self.write_empty_row_with_styles(["default"] + ["border_left_right"] * (len(evaluations_with_results) + 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment does not adequately explain the code anymore. Suggestion:
|
||
|
||
def write_overall_results( | ||
self, | ||
|
@@ -228,14 +237,17 @@ def write_overall_results( | |
annotated_evaluations = [e for e, __ in evaluations_with_results] | ||
|
||
self.write_cell(_("Overall Average Grade"), "bold") | ||
self.write_cell("", "border_left_right") | ||
averages = (distribution_to_grade(calculate_average_distribution(e)) for e in annotated_evaluations) | ||
self.write_row(averages, lambda avg: self.grade_to_style(avg) if avg else "border_left_right") | ||
|
||
self.write_cell(_("Total voters/Total participants"), "bold") | ||
self.write_cell("", "total_voters") | ||
voter_ratios = (f"{e.num_voters}/{e.num_participants}" for e in annotated_evaluations) | ||
self.write_row(voter_ratios, style="total_voters") | ||
|
||
self.write_cell(_("Evaluation rate"), "bold") | ||
self.write_cell("", "evaluation_rate") | ||
# round down like in progress bar | ||
participant_percentages = ( | ||
f"{int((e.num_voters / e.num_participants) * 100) if e.num_participants > 0 else 0}%" | ||
|
@@ -249,17 +261,19 @@ def write_overall_results( | |
|
||
# Borders only if there is a course grade below. Offset by one column | ||
self.write_empty_row_with_styles( | ||
["default"] + ["border_left_right" if gt1 else "default" for gt1 in count_gt_1] | ||
["default", "default"] + ["border_left_right" if gt1 else "default" for gt1 in count_gt_1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment doesn't match code anymore |
||
) | ||
|
||
self.write_cell(_("Evaluation weight"), "bold") | ||
weight_percentages = ( | ||
self.write_cell("") | ||
weight_percentages = tuple( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to write an empty cell for the average col There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tuple was something with typing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The expected type below is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should introduce a style "missing_average" or so to make these |
||
f"{e.weight_percentage}%" if gt1 else None | ||
for e, gt1 in zip(annotated_evaluations, count_gt_1, strict=True) | ||
) | ||
self.write_row(weight_percentages, lambda s: "evaluation_weight" if s is not None else "default") | ||
|
||
self.write_cell(_("Course Grade"), "bold") | ||
self.write_cell("") | ||
for evaluation, gt1 in zip(annotated_evaluations, count_gt_1, strict=True): | ||
if not gt1: | ||
self.write_cell() | ||
|
@@ -271,58 +285,118 @@ def write_overall_results( | |
self.next_row() | ||
|
||
# Same reasoning as above. | ||
self.write_empty_row_with_styles(["default"] + ["border_top" if gt1 else "default" for gt1 in count_gt_1]) | ||
self.write_empty_row_with_styles( | ||
["default", "default"] + ["border_top" if gt1 else "default" for gt1 in count_gt_1] | ||
) | ||
|
||
@classmethod | ||
def _calculate_display_result( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. think |
||
cls, questionnaire_id: int, question: Question, results: OrderedDict[int, list[QuestionResult]] | ||
) -> tuple[float | None, float | None]: | ||
values = [] | ||
count_sum = 0 | ||
approval_count = 0 | ||
|
||
for grade_result in results[questionnaire_id]: | ||
if grade_result.question.id != question.id or not RatingResult.has_answers(grade_result): | ||
continue | ||
|
||
values.append(grade_result.average * grade_result.count_sum) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All we ever do with this list is sum up the values in the end, so instead of building the list in memory, we should just keep track of the sum. |
||
count_sum += grade_result.count_sum | ||
if grade_result.question.is_yes_no_question: | ||
approval_count += grade_result.approval_count | ||
|
||
if not values: | ||
return None, None | ||
|
||
avg = sum(values) / count_sum | ||
if question.is_yes_no_question: | ||
percent_approval = approval_count / count_sum if count_sum > 0 else 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "percent" implies 0-100, but it is 0-1. Suggestion: |
||
return avg, percent_approval | ||
return avg, None | ||
|
||
@classmethod | ||
def _calculate_display_result_average( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically should also be called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One is using the other function. They look very similar but do very different things. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The second one is an average of averages (why?). I also don't like the current names, I don't know what a "display result average" is, let's emphasize what the sample space of this average is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My thought here was that with addition being commutative and division being distributive, the "average of averages" should just be like computing the 1-level-average correctly. I now see that the average-of-averages approach has different weights (-> it doesn't matter that on evaluation1, 50 people voted, while it was 200 people on evaluation2), which probably makes sense for the number we show in the exporter, so I'm fine with keeping the two methods. Agree with @niklasmohrin on the naming, I could see |
||
cls, | ||
evaluations_with_results: list[tuple[Evaluation, OrderedDict[int, list[QuestionResult]]]], | ||
questionnaire_id: int, | ||
question: Question, | ||
) -> tuple[float | None, float | None]: | ||
avg_values = [] | ||
count_avg = 0 | ||
avg_approval = [] | ||
count_approval = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same performance consideration here: Please don't build lists, just keep track of two numbers instead |
||
|
||
for __, results in evaluations_with_results: | ||
if ( | ||
results.get(questionnaire_id) is None | ||
): # ignore all results without the questionaire for average calculation | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the code, it is very clear to me that this snippet skips results without a questionnaire. I'd much more appreciate the comment telling me 1. why this can happen in the first place (aren't all results always mapped to some question which needs to be part of some questionaire?) and 2. why we want to skip them |
||
avg, percent_approval = cls._calculate_display_result(questionnaire_id, question, results) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @janno42 how much is performance a concern with this exporter? For production, I can see this making the exporter take 7x longer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not time critical but shouldn't take longer than ~10 seconds (with ~100 evaluations in the semester). |
||
if avg is not None: | ||
avg_values.append(avg) | ||
count_avg += 1 | ||
if percent_approval is not None: | ||
avg_approval.append(percent_approval) | ||
count_approval += 1 | ||
|
||
return sum(avg_values) / count_avg if count_avg else None, ( | ||
sum(avg_approval) / count_approval if count_approval else None | ||
) | ||
|
||
def write_questionnaire( | ||
self, | ||
questionnaire: Questionnaire, | ||
evaluations_with_results: list[tuple[Evaluation, OrderedDict[int, list[QuestionResult]]]], | ||
contributor: UserProfile | None, | ||
all_evaluations_with_results: list[tuple[Evaluation, OrderedDict[int, list[QuestionResult]]]], | ||
) -> None: | ||
if contributor and questionnaire.type == Questionnaire.Type.CONTRIBUTOR: | ||
self.write_cell(f"{questionnaire.public_name} ({contributor.full_name})", "bold") | ||
else: | ||
self.write_cell(questionnaire.public_name, "bold") | ||
|
||
self.write_cell("", "border_left_right") | ||
|
||
# first cell of row is printed above | ||
self.write_empty_row_with_styles(["border_left_right"] * len(evaluations_with_results)) | ||
|
||
for question in self.filter_text_and_heading_questions(questionnaire.questions.all()): | ||
self.write_cell(question.text, "italic" if question.is_heading_question else "default") | ||
|
||
question_average, question_approval_count = self._calculate_display_result_average( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: |
||
all_evaluations_with_results, questionnaire.id, question | ||
) | ||
|
||
if question_average is not None: | ||
if question.is_yes_no_question: | ||
self.write_cell(f"{question_approval_count:.0%}", self.grade_to_style(question_average)) | ||
else: | ||
self.write_cell(question_average, self.grade_to_style(question_average)) | ||
else: | ||
self.write_cell("", "border_left_right") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be unnested:
In what situation can we have a question result here that doesn't have an average? We're putting the result here, so there must be at least our result, so there should always be an average, or am I missing something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Think its fine |
||
|
||
# evaluations | ||
for __, results in evaluations_with_results: | ||
if questionnaire.id not in results or question.is_heading_question: | ||
self.write_cell(style="border_left_right") | ||
continue | ||
|
||
values = [] | ||
count_sum = 0 | ||
approval_count = 0 | ||
|
||
for grade_result in results[questionnaire.id]: | ||
if grade_result.question.id != question.id or not RatingResult.has_answers(grade_result): | ||
continue | ||
avg, percent_approval = self._calculate_display_result(questionnaire.id, question, results) | ||
|
||
values.append(grade_result.average * grade_result.count_sum) | ||
count_sum += grade_result.count_sum | ||
if grade_result.question.is_yes_no_question: | ||
approval_count += grade_result.approval_count | ||
|
||
if not values: | ||
if avg is None: | ||
self.write_cell(style="border_left_right") | ||
continue | ||
|
||
avg = sum(values) / count_sum | ||
if question.is_yes_no_question: | ||
percent_approval = approval_count / count_sum if count_sum > 0 else 0 | ||
self.write_cell(f"{percent_approval:.0%}", self.grade_to_style(avg)) | ||
else: | ||
self.write_cell(avg, self.grade_to_style(avg)) | ||
self.next_row() | ||
|
||
self.write_empty_row_with_styles(["default"] + ["border_left_right"] * len(evaluations_with_results)) | ||
self.write_empty_row_with_styles(["default"] + ["border_left_right"] * (len(evaluations_with_results) + 1)) | ||
|
||
# pylint: disable=arguments-differ | ||
# pylint: disable=arguments-differ,too-many-locals | ||
def export_impl( | ||
self, | ||
semesters: QuerySetOrSequence[Semester], | ||
|
@@ -335,6 +409,8 @@ def export_impl( | |
# We want to throw early here, since workbook.save() will throw an IndexError otherwise. | ||
assert len(selection_list) > 0 | ||
|
||
all_evaluations_with_results, _, _ = self.filter_evaluations(evaluation_states=[Evaluation.State.PUBLISHED]) | ||
|
||
for sheet_counter, (program_ids, course_type_ids) in enumerate(selection_list, 1): | ||
self.cur_sheet = self.workbook.add_sheet("Sheet " + str(sheet_counter)) | ||
self.cur_row = 0 | ||
|
@@ -358,7 +434,9 @@ def export_impl( | |
) | ||
|
||
for questionnaire in used_questionnaires: | ||
self.write_questionnaire(questionnaire, evaluations_with_results, contributor) | ||
self.write_questionnaire( | ||
questionnaire, evaluations_with_results, contributor, all_evaluations_with_results | ||
) | ||
|
||
self.write_overall_results(evaluations_with_results, course_results_exist) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
if
s should all useis not None
checks to distuingish between empty iterable and None.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But then empty iterables are not covered, and the code breaks, we want to skip both.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean by "not covered"? If I put in
evaluation_states=[]
, I expect that the result is empty. I don't think we are relying on this behavior anywhere, and theis not None
version seems more intuitive