[staged-updates] optimizes the crap out of the tasks progress page to…

… stop it killing servers. This does remove a feature from the page, but the feature was stupid and nobody uses this page anyway (but now they can).
onnela-lab · Aug 23, 2024 · 1250974 · 1250974
1 parent 530acce
commit 1250974
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 73 deletions.
diff --git a/database/study_models.py b/database/study_models.py
@@ -1,16 +1,14 @@
 from __future__ import annotations
 
-import operator
-from datetime import datetime, timedelta, tzinfo
-from typing import Any, Dict, Optional
+from datetime import datetime, tzinfo
+from typing import Any, Dict
 
 from dateutil.tz import gettz
 from django.core.exceptions import ObjectDoesNotExist
 from django.core.validators import MaxValueValidator, MinValueValidator
 from django.db import models
 from django.db.models import F, Func, Manager
 from django.db.models.query import QuerySet
-from django.utils import timezone
 from django.utils.timezone import localtime
 
 from constants.data_stream_constants import ALL_DATA_STREAMS
@@ -126,54 +124,6 @@ def get_researchers(self) -> QuerySet[Researcher]:
         from database.user_models_researcher import Researcher
         return Researcher.objects.filter(study_relations__study=self)
 
-    def get_earliest_data_time_bin(
-        self, only_after_epoch: bool = True, only_before_now: bool = True
-    ) -> Optional[datetime]:
-        return self._get_data_time_bin(
-            earliest=True,
-            only_after_epoch=only_after_epoch,
-            only_before_now=only_before_now,
-        )
-
-    def get_latest_data_time_bin(
-            self, only_after_epoch: bool = True, only_before_now: bool = True
-    ) -> Optional[datetime]:
-        return self._get_data_time_bin(
-            earliest=False,
-            only_after_epoch=only_after_epoch,
-            only_before_now=only_before_now,
-        )
-
-    def _get_data_time_bin(
-        self, earliest=True, only_after_epoch: bool = True, only_before_now: bool = True
-    ) -> Optional[datetime]:
-        """ Return the earliest ChunkRegistry time bin datetime for this study.
-        
-        Note: As of 2021-07-01, running the query as a QuerySet filter or sorting the QuerySet can
-              take upwards of 30 seconds. Doing the logic in python speeds this up tremendously.
-        Args:
-            earliest: if True, will return earliest datetime; if False, will return latest datetime
-            only_after_epoch: if True, will filter results only for datetimes after the Unix epoch
-                              (1970-01-01T00:00:00Z)
-            only_before_now: if True, will filter results only for datetimes before now """
-
-        time_bins: QuerySet[datetime] = self.chunk_registries.values_list("time_bin", flat=True)
-        comparator = operator.lt if earliest else operator.gt
-        now = timezone.now()
-        desired_time_bin = None
-        for time_bin in time_bins:
-            if only_after_epoch and time_bin.timestamp() <= 0:
-                continue
-            if only_before_now and time_bin > now:
-                continue
-            if desired_time_bin is None:
-                desired_time_bin = time_bin
-                continue
-            if comparator(desired_time_bin, time_bin):
-                continue
-            desired_time_bin = time_bin
-        return desired_time_bin
-
     def notification_events(self, **archived_event_filter_kwargs):
         from database.schedule_models import ArchivedEvent
         return ArchivedEvent.objects.filter(

diff --git a/endpoints/forest_endpoints.py b/endpoints/forest_endpoints.py
@@ -30,6 +30,7 @@
 from database.user_models_participant import Participant
 from libs.django_forms.forms import CreateTasksForm
 from libs.efficient_paginator import EfficientQueryPaginator
+from libs.endpoint_helpers.dashboard_helpers import get_first_and_last_days_of_data
 from libs.endpoint_helpers.summary_statistic_helpers import SummaryStatisticsPaginator
 from libs.internal_types import ParticipantQuerySet, ResearcherRequest
 from libs.s3 import NoSuchKeyException
@@ -75,11 +76,6 @@ def forest_tasks_progress(request: ResearcherRequest, study_id=None):
     # number of forest tasks shouldn't be the bottleneck here.
     tasks = ForestTask.objects.filter(participant__in=participants).order_by("created_on")
 
-    # these are quite optimized buuuuut it is still slow.
-    start_date = (study.get_earliest_data_time_bin() or study.created_on).date()
-    end_date = (study.get_latest_data_time_bin() or timezone.now()).date()
-
-    params = {}
     results = defaultdict(lambda: "-")
     chart_elements_lookup = {False: "N", None: "?"}
     # this loop builds the chart of whether there are forest results for date ranges
@@ -102,7 +98,10 @@ def forest_tasks_progress(request: ResearcherRequest, study_id=None):
                 # 3. If in_table is a ? we can just skip it because we cannot upgrade from ? to Y here.
                 #  So, we just skip if we are already at ? in the chart element, and otherwise we do the lookup.
                 results[key] = chart_elements_lookup[output_exists]
-            params[key] = task.safe_unpickle_parameters_as_string()
+
+    start_date, end_date = get_first_and_last_days_of_data(study)
+    start_date = start_date or study.created_on.date()
+    end_date = end_date or timezone.now().date()
 
     # generate the date range for the chart, we need it many times.
     dates = list(daterange(start_date, end_date, inclusive=True))
@@ -114,14 +113,6 @@ def forest_tasks_progress(request: ResearcherRequest, study_id=None):
                 [results[(participant.id, tree_name, date)] for date in dates]
             chart.append(row)
 
-    # ensure that within each tree, only a single set of param values are used (only the most recent runs
-    # are considered, and unsuccessful runs are assumed to invalidate old runs, clearing params)
-    params_conflict = False
-    for tree_name in {k[1] for k in params.keys()}:
-        if len({m for k, m in params.items() if m is not None and k[1] == tree_name}) > 1:
-            params_conflict = True
-            break
-
     chart_json = orjson.dumps(chart).decode()  # may be huge, but orjson is very fast.
     return render(
         request,
@@ -130,7 +121,6 @@ def forest_tasks_progress(request: ResearcherRequest, study_id=None):
             study=study,
             chart_columns=["participant", "tree"] + dates,
             status_choices=ForestTaskStatus,
-            params_conflict=params_conflict,
             start_date=start_date,
             end_date=end_date,
             chart=chart_json  # this uses the jinja safe filter and should never involve user input

diff --git a/frontend/templates/forest/forest_tasks_progress.html b/frontend/templates/forest/forest_tasks_progress.html
@@ -44,10 +44,5 @@
 {% block title %}Forest Analysis Progress{% endblock %}
 
 {% block content %}
-  {% if params_conflict %}
-    <div class="alert alert-warning">
-      Warning: There are analyses on this study that were computed using different Forest parameters.
-    </div>
-  {% endif %}
   <table id="analysis_chart" class="display" width="100%"></table>
 {%  endblock %}
diff --git a/frontend/templates/forest/task_log.html b/frontend/templates/forest/task_log.html
@@ -37,7 +37,7 @@
     </div>
 
     <div class="table-responsive col-xs-12 well">
-      <table class="table">
+      <table class="table ng-cloak">
         <thead>
           <tr>
             <th scope="col">Created On</th>