Skip to content

Commit 86d647b

Browse files
committed
Remove max_length parameter from fetch
If a file has a bigger size than expected, RequestFetcher.fetch downloads it up to max_length without any errors. Only the consecutive hash check raises exception. A better behaviour in such case would be to raise a DownloadLengthMismatchError. For this reason this commit: - removes max_length from the fetch() definition, - modifies download_file to check the downloaded length after each chunk and raise an error accordingly. Signed-off-by: Teodora Sechkova <[email protected]>
1 parent e5c9e8e commit 86d647b

File tree

2 files changed

+22
-41
lines changed

2 files changed

+22
-41
lines changed

tuf/ngclient/_internal/requests_fetcher.py

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,11 @@ def __init__(self):
5353
self.chunk_size: int = 400000 # bytes
5454
self.sleep_before_round: Optional[int] = None
5555

56-
def fetch(self, url: str, max_length: int) -> Iterator[bytes]:
57-
"""Fetches the contents of HTTP/HTTPS url from a remote server.
58-
59-
Ensures the length of the downloaded data is up to 'max_length'.
56+
def fetch(self, url: str) -> Iterator[bytes]:
57+
"""Fetches the contents of HTTP/HTTPS url from a remote server
6058
6159
Arguments:
6260
url: A URL string that represents a file location.
63-
max_length: An integer value representing the maximum
64-
number of bytes to be downloaded.
6561
6662
Raises:
6763
exceptions.SlowRetrievalError: A timeout occurs while receiving
@@ -90,17 +86,14 @@ def fetch(self, url: str, max_length: int) -> Iterator[bytes]:
9086
status = e.response.status_code
9187
raise exceptions.FetcherHTTPError(str(e), status)
9288

93-
return self._chunks(response, max_length)
89+
return self._chunks(response)
9490

95-
def _chunks(
96-
self, response: "requests.Response", max_length: int
97-
) -> Iterator[bytes]:
91+
def _chunks(self, response: "requests.Response") -> Iterator[bytes]:
9892
"""A generator function to be returned by fetch. This way the
9993
caller of fetch can differentiate between connection and actual data
10094
download."""
10195

10296
try:
103-
bytes_received = 0
10497
while True:
10598
# We download a fixed chunk of data in every round. This is
10699
# so that we can defend against slow retrieval attacks.
@@ -111,35 +104,19 @@ def _chunks(
111104
if self.sleep_before_round:
112105
time.sleep(self.sleep_before_round)
113106

114-
read_amount = min(
115-
self.chunk_size,
116-
max_length - bytes_received,
117-
)
118-
119107
# NOTE: This may not handle some servers adding a
120108
# Content-Encoding header, which may cause urllib3 to
121109
# misbehave:
122110
# https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582
123-
data = response.raw.read(read_amount)
124-
bytes_received += len(data)
111+
data = response.raw.read(self.chunk_size)
125112

126-
# We might have no more data to read. Check number of bytes
127-
# downloaded.
113+
# We might have no more data to read, we signal
114+
# that the download is complete.
128115
if not data:
129-
# Finally, we signal that the download is complete.
130116
break
131117

132118
yield data
133119

134-
if bytes_received >= max_length:
135-
break
136-
137-
logger.debug(
138-
"Downloaded %d out of %d bytes",
139-
bytes_received,
140-
max_length,
141-
)
142-
143120
except urllib3.exceptions.ReadTimeoutError as e:
144121
raise exceptions.SlowRetrievalError(str(e))
145122

tuf/ngclient/fetcher.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,11 @@ class FetcherInterface:
2929
__metaclass__ = abc.ABCMeta
3030

3131
@abc.abstractmethod
32-
def fetch(self, url: str, max_length: int) -> Iterator[bytes]:
32+
def fetch(self, url: str) -> Iterator[bytes]:
3333
"""Fetches the contents of HTTP/HTTPS url from a remote server.
3434
35-
Ensures the length of the downloaded data is up to 'max_length'.
36-
3735
Arguments:
3836
url: A URL string that represents a file location.
39-
max_length: An integer value representing the maximum
40-
number of bytes to be downloaded.
4137
4238
Raises:
4339
tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving
@@ -77,14 +73,22 @@ def download_file(self, url: str, max_length: int) -> Iterator[IO]:
7773
number_of_bytes_received = 0
7874

7975
with tempfile.TemporaryFile() as temp_file:
80-
chunks = self.fetch(url, max_length)
76+
chunks = self.fetch(url)
8177
for chunk in chunks:
82-
temp_file.write(chunk)
8378
number_of_bytes_received += len(chunk)
84-
if number_of_bytes_received > max_length:
85-
raise exceptions.DownloadLengthMismatchError(
86-
max_length, number_of_bytes_received
87-
)
79+
if number_of_bytes_received > max_length:
80+
raise exceptions.DownloadLengthMismatchError(
81+
max_length, number_of_bytes_received
82+
)
83+
84+
temp_file.write(chunk)
85+
86+
logger.debug(
87+
"Downloaded %d out of %d bytes",
88+
number_of_bytes_received,
89+
max_length,
90+
)
91+
8892
temp_file.seek(0)
8993
yield temp_file
9094

0 commit comments

Comments
 (0)