4
4
from time import time
5
5
6
6
import dateutil .parser
7
+ import humanize
7
8
8
9
from ._util import _formatDuration
9
10
@@ -38,11 +39,17 @@ def getAllPages(self, service: str, url: str, filters: dict):
38
39
)
39
40
40
41
pageCount = 1
41
- pageEstimate = self ._estimatePages (response , service , responseTime )
42
+ pageEstimate = self ._estimatePages (response , service )
42
43
if pageEstimate > 0 :
43
- timeEstimate = _formatDuration (pageEstimate * responseTime )
44
- print (f"Estimated approx. { pageEstimate } pages" )
45
- print (f"Estimated approx. { timeEstimate } to complete" )
44
+ # Exclude the first page when calculating the time estimation
45
+ timeEstimate = _formatDuration ((pageEstimate - 1 ) * responseTime )
46
+ print (
47
+ f"Downloading time for the first page: { humanize .naturaldelta (responseTime )} " # noqa: E501
48
+ )
49
+ print (f"Estimated approx. { pageEstimate } pages in total." )
50
+ print (
51
+ f"Estimated approx. { timeEstimate } to complete for the rest of the pages." # noqa: E501
52
+ )
46
53
47
54
# keep downloading pages until next is None
48
55
print ("" )
@@ -111,7 +118,7 @@ def _catenateData(self, response: object, nextResponse: object, service: str):
111
118
elif service == "archivefiles" :
112
119
response ["files" ] += nextResponse ["files" ]
113
120
114
- def _estimatePages (self , response : object , service : str , responseTime : float ):
121
+ def _estimatePages (self , response : object , service : str ):
115
122
"""
116
123
Estimate the number of pages the request will require.
117
124
@@ -127,16 +134,17 @@ def _estimatePages(self, response: object, service: str, responseTime: float):
127
134
if pageTimespan == 0 :
128
135
return 0
129
136
130
- # total timespan to cover
137
+ # total timespan to cover in the next parameter excluding the first page
131
138
totalBegin = dateutil .parser .parse (response ["next" ]["parameters" ]["dateFrom" ])
132
139
totalEnd = dateutil .parser .parse (response ["next" ]["parameters" ]["dateTo" ])
133
140
totalTimespan = totalEnd - totalBegin
134
141
135
142
# handle cases of very small timeframes
136
- pageSeconds = max (pageTimespan .seconds , 1 )
137
- totalSeconds = totalTimespan .seconds
143
+ pageSeconds = max (pageTimespan .total_seconds () , 1 )
144
+ totalSeconds = totalTimespan .total_seconds ()
138
145
139
- return math .ceil (totalSeconds / pageSeconds )
146
+ # plus one for the first page
147
+ return math .ceil (totalSeconds / pageSeconds ) + 1
140
148
141
149
def _rowCount (self , response , service : str ):
142
150
"""
0 commit comments