Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix#1706 - memory leak - issue while downloading large files #2452

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions googleapiclient/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,119 @@ def __init__(
)


class MediaGenBaseDownload(object):
""" Download media resources using generator.
Example:
request = farms.animals().get_media(id='cow')
downloader = MediaGenBaseDownload(request)
for chunk, status, done in downloader.next_chunk():
with open('cow.png', 'ab') as save_file:
save_file.write(chunk)
print("Download %d%%." % round(status.progress() * 100, 2))
print(done)
"""

@util.positional(2)
def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE):
"""Constructor.
Args:
request: googleapiclient.http.HttpRequest, the media request to perform in
chunks.
chunksize: int, File will be downloaded in chunks of this many bytes.
"""
self._request = request
self._uri = request.uri
self._chunksize = chunksize
self._progress = 0
self._total_size = None
self._done = False

# Stubs for testing.
self._sleep = time.sleep
self._rand = random.random

self._headers = {}
for k, v in request.headers.items():
# allow users to supply custom headers by setting them on the request
# but strip out the ones that are set by default on requests generated by
# API methods like Drive's files().get(fileId=...)
if not k.lower() in ("accept", "accept-encoding", "user-agent"):
self._headers[k] = v

@util.positional(1)
def next_chunk(self, num_retries=0):
"""Get the next chunk of the download.
Args:
num_retries: Integer, number of times to retry with randomized
exponential backoff. If all retries fail, the raised HttpError
represents the last request. If zero (default), we attempt the
request only once.
Returns:
(content, status, done): (file object, MediaDownloadProgress, boolean)
The value of 'done' will be True when the media has been fully
downloaded or the total size of the media is unknown.
Raises:
googleapiclient.errors.HttpError if the response was not a 2xx.
httplib2.HttpLib2Error if a transport error has occurred.
"""
while self._done is False:
headers = self._headers.copy()
headers["range"] = "bytes=%d-%d" % (
self._progress,
self._progress + self._chunksize - 1,
)
http = self._request.http

resp, content = _retry_request(
http,
num_retries,
"media download",
self._sleep,
self._rand,
self._uri,
"GET",
headers=headers,
)

if resp.status in [200, 206]:
if "content-location" in resp and resp["content-location"] != self._uri:
self._uri = resp["content-location"]
self._progress += len(content)

if "content-range" in resp:
content_range = resp["content-range"]
length = content_range.rsplit("/", 1)[1]
self._total_size = int(length)
elif "content-length" in resp:
self._total_size = int(resp["content-length"])

if self._total_size is None or self._progress == self._total_size:
self._done = True

yield (
content,
MediaDownloadProgress(self._progress, self._total_size),
self._done
)
continue
elif resp.status == 416:
# 416 is Range Not Satisfiable
# This typically occurs with a zero byte file
content_range = resp["content-range"]
length = content_range.rsplit("/", 1)[1]
self._total_size = int(length)
if self._total_size == 0:
self._done = True
yield (
content,
MediaDownloadProgress(self._progress, self._total_size),
self._done,
)
continue

raise HttpError(resp, content, uri=self._uri)


class MediaIoBaseDownload(object):
""" "Download media resources.

Expand Down
Loading