Refactor milestone answers statistics calculation

- refactor statistics calculation - add `update_stats` function which updates milestones and milestone group statistics - optional `update_existing_statistics` argument - if `True`, then statistics updated using new answers as before - if `False`, then recalculates all statistics using all answers (may be needed if e.g. some junk answers are deleted by an admin) - reduce duplication - `AnswerSession` - add `expired` flag: initially False - set to True by `get_or_create_current_milestone_answer_session` if it was created 7 or more days ago - set to True when stats are updated if it was created 9 days or more ago - includes a grace period to avoid setting a currently in use answer session to expired - once an answer session is expired, then answers can no longer be modified / submitted by the user - this should ensure that answers cannot be modified after they have been included in the statistics - resolves #219 - add `included_in_statistics` flag: initially False - set to True once the answers from this session are included in the statistics - `MilestoneAnswer` - remove `included_in_milestone_statistics` and `included_in_milestonegroup_statistics` flags - this is now done at the level of an answer session rather than for each individual answer - milestone feedback functions - insert a `TrafficLight.invalid.value` instead of raising an exception if there are no statistics for a milestone id or group - no longer recalculate stats when constructing feedback to avoid slowing down a user request in this case - add `/update-milestone-age-scores` admin endpoint to recalculate the statistics - TODO: add tests, add button(s) to admin interface? - TODO: add scheduled calling of this function
ssciwr · Feb 28, 2025 · 9973b78 · 9973b78
1 parent 5d8832b
commit 9973b78
Show file tree

Hide file tree

Showing 14 changed files with 531 additions and 690 deletions.
diff --git a/mondey_backend/src/mondey_backend/models/milestones.py b/mondey_backend/src/mondey_backend/models/milestones.py
@@ -158,8 +158,6 @@ class MilestoneAnswer(SQLModel, table=True):
     )
     milestone_group_id: int = Field(default=None, foreign_key="milestonegroup.id")
     answer: int
-    included_in_milestone_statistics: bool = False
-    included_in_milestonegroup_statistics: bool = False
 
 
 class MilestoneAnswerSession(SQLModel, table=True):
@@ -171,6 +169,8 @@ class MilestoneAnswerSession(SQLModel, table=True):
             "server_default": text("CURRENT_TIMESTAMP"),
         }
     )
+    expired: bool = False
+    included_in_statistics: bool = False
     answers: Mapped[dict[int, MilestoneAnswer]] = dict_relationship(key="milestone_id")
 
 

diff --git a/mondey_backend/src/mondey_backend/routers/admin_routers/milestones.py b/mondey_backend/src/mondey_backend/routers/admin_routers/milestones.py
@@ -20,6 +20,7 @@
 from ...models.milestones import SubmittedMilestoneImage
 from ...models.milestones import SubmittedMilestoneImagePublic
 from ...models.utils import ItemOrder
+from ..statistics import update_stats
 from ..utils import add
 from ..utils import get
 from ..utils import milestone_group_image_path
@@ -195,4 +196,11 @@ def get_milestone_age_scores(
 
         return collection
 
+    @router.post(
+        "/update-milestone-age-scores/{incremental}",
+        response_model=str,
+    )
+    def update_milestone_age_scores(session: SessionDep, incremental: bool) -> str:
+        return update_stats(session, incremental)
+
     return router
diff --git a/mondey_backend/src/mondey_backend/routers/scores.py b/mondey_backend/src/mondey_backend/routers/scores.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
 import logging
-from datetime import datetime
-from datetime import timedelta
 from enum import Enum
 from typing import cast
 
@@ -15,8 +13,6 @@
 from ..models.milestones import MilestoneAnswerSession
 from ..models.milestones import MilestoneGroupAgeScore
 from ..models.milestones import MilestoneGroupAgeScoreCollection
-from .statistics import calculate_milestone_statistics_by_age
-from .statistics import calculate_milestonegroup_statistics_by_age
 from .utils import get_child_age_in_months
 
 
@@ -88,7 +84,6 @@ def compute_milestonegroup_feedback_summary(
     by first calculating the mean score over all milestones that belong to the milestonegroup that
     are relevant for the child when the given answersession was created. The mean is then
     compared against the mean and standard deviation over the known population of children for the child's age.
-    When the statistics is outdated (older than a week currently) or there is none, it is recomputed and updated in the database.
     See `compute_feedback_simple` for the feedback logic.
 
     Parameters
@@ -123,53 +118,35 @@ def compute_milestonegroup_feedback_summary(
     logger.debug(f"  child age in months: {age}")
     # extract milestonegroups
     groups = set(answer.milestone_group_id for answer in answersession.answers.values())
-    today = datetime.now()
 
     # for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
-    # if the statistics is older than a week, we update it with the current data
     feedback: dict[int, int] = {}
     for group in groups:
         logger.debug(f"  group: {group}")
         stats = session.get(MilestoneGroupAgeScoreCollection, group)
-        logger.debug(f"  old stats: {stats}")
-        if stats is not None:
+        if stats is None:
+            logger.debug("  no stats")
+            feedback[group] = TrafficLight.invalid.value
+        else:
+            logger.debug(f"  stats: {stats}")
             for i, score in enumerate(stats.scores):
                 if score.count > 0:
                     logger.debug(
-                        f"   old score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
+                        f"   score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
                     )
-
-        if stats is None or stats.created_at < today - timedelta(days=7):
-            new_stats = calculate_milestonegroup_statistics_by_age(session, group)
-
-            if new_stats is None:
-                raise ValueError("No statistics for milestone group: ", group)
-
-            # update stuff in database
-            for i, new_score in enumerate(new_stats.scores):
-                if new_score.count > 0:
-                    logger.debug(
-                        f"   new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
-                    )
-                session.merge(new_score)
-
-            session.merge(new_stats)
-            session.commit()
-            stats = new_stats
-
-        # extract the answers for the current milestone group
-        group_answers = [
-            answer.answer + 1
-            for answer in answersession.answers.values()
-            if answer.milestone_group_id == group
-        ]
-        logger.debug(
-            f'  group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
-        )
-        # use the statistics recorded for a certain age as the basis for the feedback computation
-        feedback[group] = compute_feedback_simple(
-            stats.scores[age], float(np.mean(group_answers))
-        )
+            # extract the answers for the current milestone group
+            group_answers = [
+                answer.answer + 1
+                for answer in answersession.answers.values()
+                if answer.milestone_group_id == group
+            ]
+            logger.debug(
+                f'  group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
+            )
+            # use the statistics recorded for a certain age as the basis for the feedback computation
+            feedback[group] = compute_feedback_simple(
+                stats.scores[age], float(np.mean(group_answers))
+            )
     logger.debug(f"summary feedback: {feedback}")
     return feedback
 
@@ -179,8 +156,8 @@ def compute_milestonegroup_feedback_detailed(
 ) -> dict[int, dict[int, int]]:
     """
     Compute the per-milestone (detailed) feedback for all answers in a given answersession.
-    This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age. If this statistics is outdated (older than a week currently) or is
-    missing, it is recomputed and updated in the database. See `compute_feedback_simple` for the feedback logic.
+    This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age.
+    See `compute_feedback_simple` for the feedback logic.
     Return a dictionary mapping milestonegroup -> [milestone -> feedback].
     Parameters
     ----------
@@ -214,49 +191,28 @@ def compute_milestonegroup_feedback_detailed(
 
     age = get_child_age_in_months(child, answersession.created_at)
     logger.debug(f"  child age in months: {age}")
-    today = datetime.today()
 
     # for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
     feedback: dict[int, dict[int, int]] = {}
     for milestone_id, answer in answersession.answers.items():
-        # try to get statistics for the current milestone and update it if it's not there
-        # or is too old
+        logger.debug(f"  milestone id: {milestone_id}, answer: {answer.answer + 1}")
         stats = session.get(MilestoneAgeScoreCollection, milestone_id)
-        logger.debug(f"  old stats: {stats}")
-        if stats is not None:
+        logger.debug(f"  stats: {stats}")
+        if answer.milestone_group_id not in feedback:
+            feedback[answer.milestone_group_id] = {}
+        if stats is None:
+            feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
+                TrafficLight.invalid.value
+            )
+        else:
             for i, score in enumerate(stats.scores):
                 if score.count > 0:
                     logger.debug(
-                        f"   old score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
+                        f"   score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
                     )
-
-        if stats is None or stats.created_at < today - timedelta(days=7):
-            new_stats = calculate_milestone_statistics_by_age(session, milestone_id)
-
-            if new_stats is None:
-                raise ValueError(
-                    "No new statistics could be calculated for milestone: ",
-                    milestone_id,
-                )
-
-            # update stuff in database
-            for i, new_score in enumerate(new_stats.scores):
-                if new_score.count > 0:
-                    logger.debug(
-                        f"   new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
-                    )
-                session.merge(new_score)
-
-            session.merge(new_stats)
-            session.commit()
-            stats = new_stats
-
-        if answer.milestone_group_id not in feedback:
-            feedback[answer.milestone_group_id] = {}
-
-        feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
-            compute_feedback_simple(stats.scores[age], answer.answer + 1)
-        )
+            feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
+                compute_feedback_simple(stats.scores[age], answer.answer + 1)
+            )
 
     logger.debug(f" detailed feedback: {feedback}")