Merge pull request #939 from sabeechen/upload-thorttling

Implement upload throttling
sabeechen · Nov 13, 2023 · eefff52 · eefff52
2 parents b2c274c + 8293db8
commit eefff52
Show file tree

Hide file tree

Showing 48 changed files with 983 additions and 465 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.9-buster
+FROM python:3.11-buster
 
 WORKDIR /usr/src/install
 RUN apt-get update

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,5 +1,24 @@
 {
     "build": { "dockerfile": "Dockerfile" },
+    "customizations": {
+      "extensions": [
+        "ms-python.python", 
+        "wholroyd.jinja",
+        "ms-python.vscode-pylance",
+        "cssho.vscode-svgviewer", // SVG viewer
+        "eamodio.gitlens", // IDE Git information
+        "ms-azuretools.vscode-docker", // Docker integration and linting
+        "shardulm94.trailing-spaces", // Show trailing spaces
+        "davidanson.vscode-markdownlint",
+        "IBM.output-colorizer", // Colorize your output/test logs
+        "Gruntfuggly.todo-tree", // Highlights TODO comments
+        "bierner.emojisense", // Emoji sense for markdown
+        "stkb.rewrap", // rewrap comments after n characters on one line
+        "vscode-icons-team.vscode-icons", // Better file extension icons
+        "github.vscode-pull-request-github", // Github interaction
+        "GitHub.copilot",
+        "mikoz.black-py"]
+      },
     "runArgs": ["--init", "--privileged"],
     "extensions": ["ms-python.python", "wholroyd.jinja","ms-python.vscode-pylance"],
     "forwardPorts": [3000],

diff --git a/.devcontainer/requirements-dev.txt b/.devcontainer/requirements-dev.txt
@@ -31,4 +31,5 @@ grpcio
 aioping
 pytz
 tzlocal
-pytest-cov
+pytest-cov
+pytest-xdist
diff --git a/.github/workflows/prod_push.yaml b/.github/workflows/prod_push.yaml
@@ -9,7 +9,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: '3.11'
 
     - name: Check out dev repo
       uses: actions/checkout@v4

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -11,7 +11,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: '3.11'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/.github/workflows/staging_push.yml b/.github/workflows/staging_push.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: '3.11'
     # Check out the current branch
     - uses: actions/checkout@v4
       with:

diff --git a/.github/workflows/test_staging_push.yml b/.github/workflows/test_staging_push.yml
@@ -12,7 +12,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: '3.11'
 
     - name: Check out dev repo
       uses: actions/checkout@v4

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -1,3 +1,20 @@
 {
-  "recommendations": ["wholroyd.jinja"]
-}
+  "recommendations": [
+    "ms-python.python",
+    "wholroyd.jinja",
+    "ms-python.vscode-pylance",
+    "cssho.vscode-svgviewer", // SVG viewer
+    "eamodio.gitlens", // IDE Git information
+    "ms-azuretools.vscode-docker", // Docker integration and linting
+    "shardulm94.trailing-spaces", // Show trailing spaces
+    "davidanson.vscode-markdownlint",
+    "IBM.output-colorizer", // Colorize your output/test logs
+    "Gruntfuggly.todo-tree", // Highlights TODO comments
+    "bierner.emojisense", // Emoji sense for markdown
+    "stkb.rewrap", // rewrap comments after n characters on one line
+    "vscode-icons-team.vscode-icons", // Better file extension icons
+    "github.vscode-pull-request-github", // Github interaction
+    "GitHub.copilot",
+    "mikoz.black-py"
+  ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -6,7 +6,7 @@
   "python.linting.flake8Enabled": true,
   "python.linting.pylintEnabled": false,
   "python.linting.mypyEnabled": false,
-  "python.linting.mypyArgs": ["--python-version", "3.8"],
+  "python.linting.mypyArgs": ["--python-version", "3.11"],
   "python.linting.flake8Args": ["--ignore=E501,E731", "--verbose"],
   "python.autoComplete.addBrackets": true,
   "files.associations": {

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
 The project is mostly maintained by Stephen Beechen ([email protected]) whom you can reach out to for guidance. Before digging in to this, you might be helpful to familiarize yourself with some of the technologies used in the project.
 
 - [Developing Addons for Home Assistant](https://developers.home-assistant.io/docs/add-ons) - Useful to understand how addons work.
-- [Python](https://www.python.org/) - The addon is written in Python 3.8 and makes heavy use of the asyncio framework.
+- [Python](https://www.python.org/) - The addon is written in Python 3.11 and makes heavy use of the asyncio framework.
 - [AIOHTTP](https://docs.aiohttp.org/en/stable/) - The addon serves its web interface through an AIOHTTP server, and uses the AIOHTTP client library for all web requests.
 - [pytest](https://docs.pytest.org/en/latest/) - The addon uses pytest for all of its test.
 - [Visual Studio Code](https://code.visualstudio.com/) - The addon codebase is designed to work with Visual Studio code, but in practice you could use any editor (it would be harder). These instructions assume you're using VSCode, it’s a free cross-platform download.
@@ -28,17 +28,17 @@ If the you open the repository folder in Visual Studio code with docker installe
 
 ### Harder but also works: Manual Setup
 1. Install [Visual Studio Code](https://code.visualstudio.com/)
-2. Install [Python 3.8](https://www.python.org/downloads/) for your platform.
+2. Install [Python 3.11](https://www.python.org/downloads/) for your platform.
 3. Install a git client. I like [GitHub Desktop](https://desktop.github.com/)
 4. Clone the project repository
    ```
    https://github.com/sabeechen/hassio-google-drive-backup.git
    ```
-5. Open Visual studio Code, go to the extension menu, and install the Desktop] (Python extension from Microsoft. It may prompt you to choose a Python interpreter (you want Python 3.8) and select a test framework (you want pytest).
+5. Open Visual studio Code, go to the extension menu, and install the Desktop] (Python extension from Microsoft. It may prompt you to choose a Python interpreter (you want Python 3.11) and select a test framework (you want pytest).
 6. <kbd>File</kbd> > <kbd>Open Folder</kbd> to open the cloned repository folder.
 7. Open the terminal (`Ctrl` + `Shift` + <code>`</code>) and install the Python packages required for development:
    ```
-   > python3.8 -m pip install -r .devcontainer/requirements-dev.txt
+   > python3.11 -m pip install -r .devcontainer/requirements-dev.txt
    ```
    That should set you up!
 
@@ -98,7 +98,7 @@ I haven't tried using the Supervisor's new devcontainers for development yet (th
 You should be able to run tests from within the Visual Studio tests tab. Make sure all the tests pass before you to make a PR. You can also run them from the command line with:
 
 ```bash
-> python3.8 -m pytest hassio-google-drive-backup
+> python3.11 -m pytest hassio-google-drive-backup
 ```
 
 ## Writing Tests

diff --git a/LICENSE b/LICENSE
@@ -1,7 +1,7 @@
 
 MIT License
 
-Copyright (c) 2022 Stephen Beechen
+Copyright (c) 2023 Stephen Beechen
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -207,7 +207,7 @@ The config option `backup_name` can be changed to give backups a different name
 - `{version_ha}`, Home Assistant version string (eg 0.91.3)
 - `{version_hassos}`: HassOS version string (eg 0.2.15)
 - `{version_super}`: , Supervisor version string (eg 1.2.19)
-- `{date}`: Locale aware date (eg 2022/01/01).
+- `{date}`: Locale aware date (eg 2023/01/01).
 - `{time}`: Locale aware time (eg 02:03:04 am)
 - `{datetime}`: Locale-aware datetime string
 - `{isotime}`: Date and time in ISO format

diff --git a/hassio-google-drive-backup/Dockerfile b/hassio-google-drive-backup/Dockerfile
@@ -5,7 +5,7 @@ COPY . /app
 RUN chmod +x addon_deps.sh
 RUN ./addon_deps.sh
 RUN pip3 install .
-COPY config.json /usr/local/lib/python3.8/site-packages/config.json
+COPY config.json /usr/local/lib/python3.11/site-packages/config.json
 
 EXPOSE 1627
 EXPOSE 8099

diff --git a/hassio-google-drive-backup/Dockerfile-server b/hassio-google-drive-backup/Dockerfile-server
@@ -1,12 +1,12 @@
 # Use the official lightweight Python image.
 # https://hub.docker.com/_/python
-FROM python:3.9-buster
+FROM python:3.11-buster
 
 # Copy local code to the container image.
 ENV APP_HOME /server
 WORKDIR $APP_HOME
 COPY . ./
-COPY config.json /usr/local/lib/python3.9/site-packages/config.json
+COPY config.json /usr/local/lib/python3.11/site-packages/config.json
 
 # Install server python requirements
 RUN pip3 install --trusted-host pypi.python.org -r requirements-server.txt

diff --git a/hassio-google-drive-backup/backup/config/bytesizeasstringvalidator.py b/hassio-google-drive-backup/backup/config/bytesizeasstringvalidator.py
@@ -27,4 +27,6 @@ def validate(self, value):
         return value
 
     def formatForUi(self, value):
+        if value is None or len(str(value)) == 0 or value == 0:
+            return ""
         return ByteFormatter().format(value)
diff --git a/hassio-google-drive-backup/backup/config/settings.py b/hassio-google-drive-backup/backup/config/settings.py
@@ -151,6 +151,8 @@ class Setting(Enum):
     DEPRECTAED_ENABLE_BACKUP_STALE_SENSOR = "enable_snapshot_stale_sensor"
     DEPRECTAED_ENABLE_BACKUP_STATE_SENSOR = "enable_snapshot_state_sensor"
 
+    UPLOAD_LIMIT_BYTES_PER_SECOND = "upload_limit_bytes_per_second"
+
     def default(self):
         if "staging" in VERSION and self in _STAGING_DEFAULTS:
             return _STAGING_DEFAULTS[self]
@@ -295,6 +297,8 @@ def key(self):
     Setting.CACHE_WARMUP_MAX_SECONDS: 15 * 60,  # 30 minutes
     Setting.CACHE_WARMUP_ERROR_TIMEOUT_SECONDS: 24 * 60 * 60,  # 1 day
     Setting.MAX_BACKOFF_SECONDS: 60 * 60 * 2,  # 2 hours
+
+    Setting.UPLOAD_LIMIT_BYTES_PER_SECOND: 0,
 }
 
 _STAGING_DEFAULTS = {
@@ -435,6 +439,8 @@ def key(self):
     Setting.CACHE_WARMUP_MAX_SECONDS: "float(0,)",
     Setting.CACHE_WARMUP_ERROR_TIMEOUT_SECONDS: "float(0,)",
     Setting.MAX_BACKOFF_SECONDS: "int(3600,)?",
+
+    Setting.UPLOAD_LIMIT_BYTES_PER_SECOND: "float(0,)?",
 }
 
 PRIVATE = [
@@ -506,10 +512,12 @@ def getValidator(name, schema):
 for key in addon_config["schema"]:
     _VALIDATORS[_LOOKUP[key]] = getValidator(key, addon_config["schema"][key])
 
-_VALIDATORS[Setting.MAX_SYNC_INTERVAL_SECONDS] = DurationAsStringValidator("max_sync_interval_seconds", minimum=1, maximum=None)
-_VALIDATORS[Setting.HA_REPORTING_INTERVAL_SECONDS] = DurationAsStringValidator("ha_reporting_interval_seconds", minimum=1, maximum=None)
-_VALIDATORS[Setting.DELETE_IGNORED_AFTER_DAYS] = DurationAsStringValidator("delete_ignored_after_days", minimum=0, maximum=None, base_seconds=60 * 60 * 24, default_as_empty=0)
-_VALIDATORS[Setting.MAXIMUM_UPLOAD_CHUNK_BYTES] = BytesizeAsStringValidator("maximum_upload_chunk_bytes", minimum=256 * 1024)
+_VALIDATORS[Setting.MAX_SYNC_INTERVAL_SECONDS] = DurationAsStringValidator(Setting.MAX_SYNC_INTERVAL_SECONDS.value, minimum=1, maximum=None)
+_VALIDATORS[Setting.HA_REPORTING_INTERVAL_SECONDS] = DurationAsStringValidator(Setting.HA_REPORTING_INTERVAL_SECONDS.value, minimum=1, maximum=None)
+_VALIDATORS[Setting.DELETE_IGNORED_AFTER_DAYS] = DurationAsStringValidator(Setting.DELETE_IGNORED_AFTER_DAYS.value, minimum=0, maximum=None, base_seconds=60 * 60 * 24, default_as_empty=0)
+_VALIDATORS[Setting.MAXIMUM_UPLOAD_CHUNK_BYTES] = BytesizeAsStringValidator(Setting.MAXIMUM_UPLOAD_CHUNK_BYTES.value, minimum=256 * 1024)
+_VALIDATORS[Setting.PENDING_BACKUP_TIMEOUT_SECONDS] = DurationAsStringValidator(Setting.PENDING_BACKUP_TIMEOUT_SECONDS.value, minimum=1, maximum=None)
+_VALIDATORS[Setting.UPLOAD_LIMIT_BYTES_PER_SECOND] = BytesizeAsStringValidator(Setting.UPLOAD_LIMIT_BYTES_PER_SECOND.value, minimum=0)
 VERSION = addon_config["version"]
 
 

diff --git a/hassio-google-drive-backup/backup/creds/driverequester.py b/hassio-google-drive-backup/backup/creds/driverequester.py
@@ -23,10 +23,16 @@ def __init__(self, config: Config, session: ClientSession, resolver: Resolver):
         self.session = session
         self.resolver = resolver
         self.config = config
+        self.all_resposnes: list[ClientResponse] = []
+
+        # This is just kept around for debugging purposes
+        self.track_response = False
 
     async def request(self, method, url, headers={}, json=None, data=None) -> ClientResponse:
         try:
             response = await self.session.request(method, url, headers=headers, json=json, timeout=self.buildTimeout(), data=data)
+            if self.track_response:
+                self.all_resposnes.append(response)
             if response.status < 400:
                 return response
             await self.raiseForKnownErrors(response)

diff --git a/hassio-google-drive-backup/backup/drive/driverequests.py b/hassio-google-drive-backup/backup/drive/driverequests.py
@@ -1,7 +1,7 @@
 import io
 import math
 import re
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 from urllib.parse import urlencode
 from datetime import datetime, timedelta
 
@@ -14,12 +14,13 @@
 from ..exceptions import (GoogleCredentialsExpired,
                           GoogleSessionError, LogicError,
                           ProtocolError, ensureKey, KnownTransient, GoogleTimeoutError, GoogleUnexpectedError)
-from backup.util import Backoff
+from backup.util import Backoff, TokenBucket
 from backup.file import JsonFileSaver
 from ..time import Time
 from ..logger import getLogger
 from backup.creds import Creds, Exchanger, DriveRequester
 from datetime import timezone
+from ..config.byteformatter import ByteFormatter
 
 logger = getLogger(__name__)
 
@@ -74,7 +75,7 @@
 @singleton
 class DriveRequests():
     @inject
-    def __init__(self, config: Config, time: Time, drive: DriveRequester, session: ClientSession, exchanger: Exchanger):
+    def __init__(self, config: Config, time: Time, drive: DriveRequester, session: ClientSession, exchanger: Exchanger, byte_formatter: ByteFormatter):
         self.session = session
         self.config = config
         self.time = time
@@ -87,6 +88,7 @@ def __init__(self, config: Config, time: Time, drive: DriveRequester, session: C
         self.last_attempt_location = None
         self.last_attempt_count = 0
         self.last_attempt_start_time = None
+        self.bytes_formatter = byte_formatter
         self.tryLoadCredentials()
 
     async def _getHeaders(self):
@@ -216,6 +218,13 @@ async def create(self, stream, metadata, mime_type):
         # Upload logic is complicated. See https://developers.google.com/drive/api/v3/manage-uploads#resumable
         total_size = stream.size()
         location = None
+
+        limiter: Union[TokenBucket, None] = None
+        if self.config.get(Setting.UPLOAD_LIMIT_BYTES_PER_SECOND) > 0:
+            # google requires a minimum 256kb upload chunk, so the limiter bucket capacity must be at least that to function.
+            speed_as_tokens = self.config.get(Setting.UPLOAD_LIMIT_BYTES_PER_SECOND) / BASE_CHUNK_SIZE
+            capacity = max(speed_as_tokens, 1)
+            limiter = TokenBucket(self.time, capacity, speed_as_tokens, 0)
         if metadata == self.last_attempt_metadata and self.last_attempt_location is not None and self.last_attempt_count < RETRY_SESSION_ATTEMPTS and self.time.now() < self.last_attempt_start_time + UPLOAD_SESSION_EXPIRATION_DURATION:
             logger.debug(
                 "Attempting to resume a previously failed upload where we left off")
@@ -282,10 +291,17 @@ async def create(self, stream, metadata, mime_type):
 
         # Always start with the minimum chunk size and work up from there in case the last attempt
         # failed due to connectivity errors or ... whatever.
-        current_chunk_size = BASE_CHUNK_SIZE
+        current_chunk_size = 1
         while True:
             start = stream.position()
-            data = await stream.read(current_chunk_size)
+
+            # See if we need to limit the chunk size to reduce bandwidth.
+            if limiter is not None:
+                request = int(await limiter.consumeWithWait(1, current_chunk_size))
+                if request != current_chunk_size:
+                    # This can go over the speed cap slightly, not a big deal though
+                    current_chunk_size = request
+            data = await stream.read(current_chunk_size * BASE_CHUNK_SIZE)
             chunk_size = len(data.getbuffer())
             if chunk_size == 0:
                 raise LogicError(
@@ -295,7 +311,7 @@ async def create(self, stream, metadata, mime_type):
                 "Content-Range": "bytes {0}-{1}/{2}".format(start, start + chunk_size - 1, total_size)
             }
             startTime = self.time.now()
-            logger.debug("Sending {0} bytes to Google Drive".format(current_chunk_size))
+            logger.debug("Sending {0} to Google Drive".format(self.bytes_formatter.format(chunk_size)))
             try:
                 async with await self.retryRequest("PUT", location, headers=headers, data=data, patch_url=False) as partial:
                     # Base the next chunk size on how long it took to send the last chunk.
@@ -336,17 +352,17 @@ async def create(self, stream, metadata, mime_type):
                     raise e
 
     def _getNextChunkSize(self, last_chunk_size, last_chunk_seconds):
-        max = BASE_CHUNK_SIZE * math.floor(self.config.get(Setting.MAXIMUM_UPLOAD_CHUNK_BYTES) / BASE_CHUNK_SIZE)
-        if max < BASE_CHUNK_SIZE:
-            max = BASE_CHUNK_SIZE
+        max = math.floor(self.config.get(Setting.MAXIMUM_UPLOAD_CHUNK_BYTES) / BASE_CHUNK_SIZE)
+        if max < 1:
+            max = 1
         if last_chunk_seconds <= 0:
             return max
-        next_chunk = CHUNK_UPLOAD_TARGET_SECONDS * last_chunk_size / last_chunk_seconds
+        next_chunk = math.floor(CHUNK_UPLOAD_TARGET_SECONDS * last_chunk_size / last_chunk_seconds)
         if next_chunk >= max:
             return max
-        if next_chunk < BASE_CHUNK_SIZE:
-            return BASE_CHUNK_SIZE
-        return math.floor(next_chunk / BASE_CHUNK_SIZE) * BASE_CHUNK_SIZE
+        if next_chunk < 1:
+            return 1
+        return next_chunk
 
     async def createFolder(self, metadata):
         async with await self.retryRequest("POST", URL_FILES + "?supportsAllDrives=true", json=metadata) as resp: