Merge pull request #46 from OceanNetworksCanada/issue-45-fix-time-estimation

kan-fu · web-flow · commit 476841f74463 · 2024-09-20T10:21:19.000-07:00
fix: use total_seconds instead of seconds in the datetime
diff --git a/src/onc/modules/_MultiPage.py b/src/onc/modules/_MultiPage.py
@@ -4,6 +4,7 @@
 from time import time
 
 import dateutil.parser
+import humanize
 
 from ._util import _formatDuration
 
@@ -38,11 +39,17 @@ def getAllPages(self, service: str, url: str, filters: dict):
             )
 
             pageCount = 1
-            pageEstimate = self._estimatePages(response, service, responseTime)
+            pageEstimate = self._estimatePages(response, service)
             if pageEstimate > 0:
-                timeEstimate = _formatDuration(pageEstimate * responseTime)
-                print(f"Estimated approx. {pageEstimate} pages")
-                print(f"Estimated approx. {timeEstimate} to complete")
+                # Exclude the first page when calculating the time estimation
+                timeEstimate = _formatDuration((pageEstimate - 1) * responseTime)
+                print(
+                    f"Downloading time for the first page: {humanize.naturaldelta(responseTime)}"  # noqa: E501
+                )
+                print(f"Estimated approx. {pageEstimate} pages in total.")
+                print(
+                    f"Estimated approx. {timeEstimate} to complete for the rest of the pages."  # noqa: E501
+                )
 
             # keep downloading pages until next is None
             print("")
@@ -111,7 +118,7 @@ def _catenateData(self, response: object, nextResponse: object, service: str):
         elif service == "archivefiles":
             response["files"] += nextResponse["files"]
 
-    def _estimatePages(self, response: object, service: str, responseTime: float):
+    def _estimatePages(self, response: object, service: str):
         """
         Estimate the number of pages the request will require.
 
@@ -127,16 +134,17 @@ def _estimatePages(self, response: object, service: str, responseTime: float):
         if pageTimespan == 0:
             return 0
 
-        # total timespan to cover
+        # total timespan to cover in the next parameter excluding the first page
         totalBegin = dateutil.parser.parse(response["next"]["parameters"]["dateFrom"])
         totalEnd = dateutil.parser.parse(response["next"]["parameters"]["dateTo"])
         totalTimespan = totalEnd - totalBegin
 
         # handle cases of very small timeframes
-        pageSeconds = max(pageTimespan.seconds, 1)
-        totalSeconds = totalTimespan.seconds
+        pageSeconds = max(pageTimespan.total_seconds(), 1)
+        totalSeconds = totalTimespan.total_seconds()
 
-        return math.ceil(totalSeconds / pageSeconds)
+        # plus one for the first page
+        return math.ceil(totalSeconds / pageSeconds) + 1
 
     def _rowCount(self, response, service: str):
         """