Skip to content

Commit

Permalink
Refactor milestone answers statistics calculation
Browse files Browse the repository at this point in the history
- refactor statistics calculation
  - add `update_stats` function which updates milestones and milestone group statistics
    - optional `update_existing_statistics` argument
      - if `True`, then statistics updated using new answers as before
      - if `False`, then recalculates all statistics using all answers (may be needed if e.g. some junk answers are deleted by an admin)
  - reduce duplication
- `AnswerSession`
  - add `expired` flag: initially False
    - set to True by `get_or_create_current_milestone_answer_session` if it was created 7 or more days ago
    - set to True when stats are updated if it was created 9 days or more ago
      - includes a grace period to avoid setting a currently in use answer session to expired
    - once an answer session is expired, then answers can no longer be modified / submitted by the user
      - this should ensure that answers cannot be modified after they have been included in the statistics
    - resolves #219
  - add `included_in_statistics` flag: initially False
    - set to True once the answers from this session are included in the statistics
- `MilestoneAnswer`
  - remove `included_in_milestone_statistics` and `included_in_milestonegroup_statistics` flags
    - this is now done at the level of an answer session rather than for each individual answer
- milestone feedback functions
  - insert a `TrafficLight.invalid.value` instead of raising an exception if there are no statistics for a milestone id or group
  - no longer recalculate stats when constructing feedback to avoid slowing down a user request in this case
- add `/update-milestone-age-scores` admin endpoint to recalculate the statistics
  - TODO: add tests, add button(s) to admin interface?
  - TODO: add scheduled calling of this function
  • Loading branch information
lkeegan committed Feb 28, 2025
1 parent 5d8832b commit 9973b78
Show file tree
Hide file tree
Showing 14 changed files with 531 additions and 690 deletions.
4 changes: 2 additions & 2 deletions mondey_backend/src/mondey_backend/models/milestones.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ class MilestoneAnswer(SQLModel, table=True):
)
milestone_group_id: int = Field(default=None, foreign_key="milestonegroup.id")
answer: int
included_in_milestone_statistics: bool = False
included_in_milestonegroup_statistics: bool = False


class MilestoneAnswerSession(SQLModel, table=True):
Expand All @@ -171,6 +169,8 @@ class MilestoneAnswerSession(SQLModel, table=True):
"server_default": text("CURRENT_TIMESTAMP"),
}
)
expired: bool = False
included_in_statistics: bool = False
answers: Mapped[dict[int, MilestoneAnswer]] = dict_relationship(key="milestone_id")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ...models.milestones import SubmittedMilestoneImage
from ...models.milestones import SubmittedMilestoneImagePublic
from ...models.utils import ItemOrder
from ..statistics import update_stats
from ..utils import add
from ..utils import get
from ..utils import milestone_group_image_path
Expand Down Expand Up @@ -195,4 +196,11 @@ def get_milestone_age_scores(

return collection

@router.post(
"/update-milestone-age-scores/{incremental}",
response_model=str,
)
def update_milestone_age_scores(session: SessionDep, incremental: bool) -> str:
return update_stats(session, incremental)

return router
112 changes: 34 additions & 78 deletions mondey_backend/src/mondey_backend/routers/scores.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from __future__ import annotations

import logging
from datetime import datetime
from datetime import timedelta
from enum import Enum
from typing import cast

Expand All @@ -15,8 +13,6 @@
from ..models.milestones import MilestoneAnswerSession
from ..models.milestones import MilestoneGroupAgeScore
from ..models.milestones import MilestoneGroupAgeScoreCollection
from .statistics import calculate_milestone_statistics_by_age
from .statistics import calculate_milestonegroup_statistics_by_age
from .utils import get_child_age_in_months


Expand Down Expand Up @@ -88,7 +84,6 @@ def compute_milestonegroup_feedback_summary(
by first calculating the mean score over all milestones that belong to the milestonegroup that
are relevant for the child when the given answersession was created. The mean is then
compared against the mean and standard deviation over the known population of children for the child's age.
When the statistics is outdated (older than a week currently) or there is none, it is recomputed and updated in the database.
See `compute_feedback_simple` for the feedback logic.
Parameters
Expand Down Expand Up @@ -123,53 +118,35 @@ def compute_milestonegroup_feedback_summary(
logger.debug(f" child age in months: {age}")
# extract milestonegroups
groups = set(answer.milestone_group_id for answer in answersession.answers.values())
today = datetime.now()

# for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
# if the statistics is older than a week, we update it with the current data
feedback: dict[int, int] = {}
for group in groups:
logger.debug(f" group: {group}")
stats = session.get(MilestoneGroupAgeScoreCollection, group)
logger.debug(f" old stats: {stats}")
if stats is not None:
if stats is None:
logger.debug(" no stats")
feedback[group] = TrafficLight.invalid.value
else:
logger.debug(f" stats: {stats}")
for i, score in enumerate(stats.scores):
if score.count > 0:
logger.debug(
f" old score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
f" score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
)

if stats is None or stats.created_at < today - timedelta(days=7):
new_stats = calculate_milestonegroup_statistics_by_age(session, group)

if new_stats is None:
raise ValueError("No statistics for milestone group: ", group)

# update stuff in database
for i, new_score in enumerate(new_stats.scores):
if new_score.count > 0:
logger.debug(
f" new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
)
session.merge(new_score)

session.merge(new_stats)
session.commit()
stats = new_stats

# extract the answers for the current milestone group
group_answers = [
answer.answer + 1
for answer in answersession.answers.values()
if answer.milestone_group_id == group
]
logger.debug(
f' group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
)
# use the statistics recorded for a certain age as the basis for the feedback computation
feedback[group] = compute_feedback_simple(
stats.scores[age], float(np.mean(group_answers))
)
# extract the answers for the current milestone group
group_answers = [
answer.answer + 1
for answer in answersession.answers.values()
if answer.milestone_group_id == group
]
logger.debug(
f' group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
)
# use the statistics recorded for a certain age as the basis for the feedback computation
feedback[group] = compute_feedback_simple(
stats.scores[age], float(np.mean(group_answers))
)
logger.debug(f"summary feedback: {feedback}")
return feedback

Expand All @@ -179,8 +156,8 @@ def compute_milestonegroup_feedback_detailed(
) -> dict[int, dict[int, int]]:
"""
Compute the per-milestone (detailed) feedback for all answers in a given answersession.
This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age. If this statistics is outdated (older than a week currently) or is
missing, it is recomputed and updated in the database. See `compute_feedback_simple` for the feedback logic.
This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age.
See `compute_feedback_simple` for the feedback logic.
Return a dictionary mapping milestonegroup -> [milestone -> feedback].
Parameters
----------
Expand Down Expand Up @@ -214,49 +191,28 @@ def compute_milestonegroup_feedback_detailed(

age = get_child_age_in_months(child, answersession.created_at)
logger.debug(f" child age in months: {age}")
today = datetime.today()

# for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
feedback: dict[int, dict[int, int]] = {}
for milestone_id, answer in answersession.answers.items():
# try to get statistics for the current milestone and update it if it's not there
# or is too old
logger.debug(f" milestone id: {milestone_id}, answer: {answer.answer + 1}")
stats = session.get(MilestoneAgeScoreCollection, milestone_id)
logger.debug(f" old stats: {stats}")
if stats is not None:
logger.debug(f" stats: {stats}")
if answer.milestone_group_id not in feedback:
feedback[answer.milestone_group_id] = {}
if stats is None:
feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
TrafficLight.invalid.value
)
else:
for i, score in enumerate(stats.scores):
if score.count > 0:
logger.debug(
f" old score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
f" score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
)

if stats is None or stats.created_at < today - timedelta(days=7):
new_stats = calculate_milestone_statistics_by_age(session, milestone_id)

if new_stats is None:
raise ValueError(
"No new statistics could be calculated for milestone: ",
milestone_id,
)

# update stuff in database
for i, new_score in enumerate(new_stats.scores):
if new_score.count > 0:
logger.debug(
f" new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
)
session.merge(new_score)

session.merge(new_stats)
session.commit()
stats = new_stats

if answer.milestone_group_id not in feedback:
feedback[answer.milestone_group_id] = {}

feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
compute_feedback_simple(stats.scores[age], answer.answer + 1)
)
feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
compute_feedback_simple(stats.scores[age], answer.answer + 1)
)

logger.debug(f" detailed feedback: {feedback}")

Expand Down
Loading

0 comments on commit 9973b78

Please sign in to comment.