deepgram · bd-g · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 ## [Unreleased]
 
+### Added
+
+- Automatic model management on AWS now supports model removal. See the `engine.modelManager.models.remove` section in the `values.yaml` file for details.
+
+### Changed
+
+- Automatic model downloads on AWS are moved from `engine.modelManager.models.links` to `engine.modelManager.models.add`. The old `links` field is still supported, but migration is recommended.
+
 ### Fixed
 
 - Update sample files to fix an issue with sample command for Kubernetes Secret creation storing Quay credential

@@ -1,15 +1,15 @@
 dependencies:
 - name: gpu-operator
   repository: https://helm.ngc.nvidia.com/nvidia
-  version: v24.3.0
+  version: v24.6.2
 - name: cluster-autoscaler
   repository: https://kubernetes.github.io/autoscaler
-  version: 9.37.0
+  version: 9.40.0
 - name: kube-prometheus-stack
   repository: https://prometheus-community.github.io/helm-charts
   version: 60.5.0
 - name: prometheus-adapter
   repository: https://prometheus-community.github.io/helm-charts
-  version: 4.10.0
-digest: sha256:38ff59bc00f2cd88c2f06a844f0309d3fea593dd0a8877a7667288fa9f5c53b8
-generated: "2024-07-15T11:42:44.763681334-04:00"
+  version: 4.11.0
+digest: sha256:015d3cb08c7419ea94d643fbb4848a67a2d199b4f8d0fb76baaa6129567f6f09
+generated: "2024-09-25T13:42:10.032220779-04:00"
@@ -236,7 +236,9 @@ If you encounter issues while deploying or using Deepgram, consider the followin
 | engine.metricsServer | object | `` | metricsServer exposes an endpoint on each Engine container for reporting inference-specific system metrics. See https://developers.deepgram.com/docs/metrics-guide#deepgram-engine for more details. |
 | engine.metricsServer.host | string | `"0.0.0.0"` | host is the IP address to listen on for metrics requests. You will want to listen on all interfaces to interact with other pods in the cluster. |
 | engine.metricsServer.port | int | `9991` | port to listen on for metrics requests |
-| engine.modelManager.models.links | list | `[]` | Links to your Deepgram models, if automatically downloading into storage backing a persistent volume. **Automatic downloads are currently supported for AWS EFS volumes only.** Insert each model link provided to you by your Deepgram Account Representative. |
+| engine.modelManager.models.add | list | `[]` | Links to your Deepgram models to automatically download into storage backing a persistent volume. **Automatic model management is currently supported for AWS EFS volumes only.** Insert each model link provided to you by your Deepgram Account Representative. |
+| engine.modelManager.models.links | list | `[]` | Deprecated field to automatically download models. Functionality still supported, but migration to use `engine.modelManager.models.add` is strongly recommended. |
+| engine.modelManager.models.remove | list | `[]` | If desiring to remove a model from storage (to reduce number of models loaded by Engine on startup), move a link from the `engine.modelManager.models.add` section to this section. You can also use a model name instead of the full link to designate for removal. **Automatic model management is currently supported for AWS EFS volumes only.** |
 | engine.modelManager.volumes.aws.efs.enabled | bool | `false` | Whether to use an [AWS Elastic File Sytem](https://aws.amazon.com/efs/) to store Deepgram models for use by Engine containers. This option requires your cluster to be running in [AWS EKS](https://aws.amazon.com/eks/). |
 | engine.modelManager.volumes.aws.efs.fileSystemId | string | `nil` | FileSystemId of existing AWS Elastic File System where Deepgram model files will be persisted. You can find it using the AWS CLI: ``` $ aws efs describe-file-systems --query "FileSystems[*].FileSystemId" ``` |
 | engine.modelManager.volumes.aws.efs.forceDownload | bool | `false` | Whether to force a fresh download of all model links provided, even if models are already present in EFS. |

@@ -98,11 +98,16 @@ engine:
           enabled: true
           fileSystemId: fs-xxxxxxxxxxxxxxxx # Replace with your EFS ID
     models:
-      links:
+      add:
         - https://link-to-model-1.dg # Replace these links with those provided to you
         - https://link-to-model-2.dg #   by your Deepgram Account Representative.
         - https://link-to-model-3.dg
         - ...
+      remove:
+        # - https://link-to-old-model-1.dg # Replace these with identifiers for any models already present
+        # - https://link-to-old-model-2.dg #   in the EFS that you'd like removed. For a new installation,
+        # - name-of-old-model-3.dg #   this will likely be empty.
+        # - ...
 
 licenseProxy:
   enabled: true

@@ -1,9 +1,9 @@
 {{- if .Values.engine.modelManager.volumes.aws.efs.enabled }}
-{{- if .Values.engine.modelManager.models.links }}
+{{- if or .Values.engine.modelManager.models.links .Values.engine.modelManager.models.add .Values.engine.modelManager.models.remove }}
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: {{ .Values.engine.modelManager.volumes.aws.efs.namePrefix }}-aws-efs-models-download
+  name: {{ .Values.engine.modelManager.volumes.aws.efs.namePrefix }}-aws-efs-models-management
   labels:
 {{ include "deepgram-self-hosted.labels" . | indent 4}}
   annotations:
@@ -15,35 +15,54 @@ spec:
       affinity:
         {{- toYaml .Values.engine.affinity | nindent 8 }}
       containers:
-      - name: model-download
+      - name: model-management
         image: alpine
         command:
           - /bin/sh
           - -c
           - |
-            LINKS=$(cat <<EOF
+            TIMESTAMP_FORMAT="%Y-%m-%dT%H:%M:%SZ"
+
+            ADD_LINKS=$(cat <<EOF
+            {{- range .Values.engine.modelManager.models.add }}
+            {{ . }}
+            {{- end }}
             {{- range .Values.engine.modelManager.models.links }}
             {{ . }}
             {{- end }}
             EOF
             )
-            TIMESTAMP_FORMAT="%Y-%m-%dT%H:%M:%SZ"
-
-            echo "$LINKS" | while IFS= read -r link; do
-              FILE_NAME=$(basename "$link")
 
-              if [ "{{ .Values.engine.modelManager.volumes.aws.efs.forceDownload }}" == "true" ]; then
-                printf "[%s] [INFO] Force downloading model file: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
-                wget -O "/mnt/efs/$FILE_NAME" "$link"
-                printf "[%s] [INFO] Downloaded model file (force download): %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
-              elif [ ! -f "/mnt/efs/$FILE_NAME" ]; then
+            echo "$ADD_LINKS" | while IFS= read -r link; do
+              [ -z "$link" ] && continue
+              FILE_NAME=$(basename "$link")
+              if [ "{{ .Values.engine.modelManager.volumes.aws.efs.forceDownload }}" == "true" ] || [ ! -f "/mnt/efs/$FILE_NAME" ]; then
                 printf "[%s] [INFO] Downloading model file: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
                 wget -O "/mnt/efs/$FILE_NAME" "$link"
                 printf "[%s] [INFO] Downloaded model file: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
               else
                 printf "[%s] [INFO] Model file already exists in EFS storage: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
               fi
             done
+
+            REMOVE_LINKS=$(cat <<EOF
+            {{- range .Values.engine.modelManager.models.remove }}
+            {{ . }}
+            {{- end }}
+            EOF
+            )
+
+            echo "$REMOVE_LINKS" | while IFS= read -r link; do
+              [ -z "$link" ] && continue
+              FILE_NAME=$(basename "$link")
+              if [ -f "/mnt/efs/$FILE_NAME" ]; then
+                printf "[%s] [INFO] Removing model file: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
+                rm "/mnt/efs/$FILE_NAME"
+                printf "[%s] [INFO] Removed model file: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
+              else
+                printf "[%s] [INFO] Model file not found in EFS storage: %s\n" "$(date -u +$TIMESTAMP_FORMAT)" "$FILE_NAME"
+              fi
+            done
         volumeMounts:
           - name: aws-efs-volume
             mountPath: /mnt/efs
@@ -54,4 +73,3 @@ spec:
             claimName: {{ .Values.engine.modelManager.volumes.aws.efs.namePrefix }}-aws-efs-pvc
 {{- end }}
 {{- end }}
-
@@ -452,12 +452,21 @@ engine:
           fsType: "ext4"
 
     models:
-      # -- Links to your Deepgram models, if automatically downloading
+      # -- Deprecated field to automatically download models. Functionality still supported,
+      # but migration to use `engine.modelManager.models.add` is strongly recommended.
+      links: []
+      # -- Links to your Deepgram models to automatically download
       # into storage backing a persistent volume.
-      # **Automatic downloads are currently supported for AWS EFS volumes only.**
+      # **Automatic model management is currently supported for AWS EFS volumes only.**
       # Insert each model link provided to you by your Deepgram
       # Account Representative.
-      links: []
+      add: []
+      # -- If desiring to remove a model from storage (to reduce number of models loaded by
+      # Engine on startup), move a link from the `engine.modelManager.models.add` section
+      # to this section. You can also use a model name instead of the full link to designate
+      # for removal.
+      # **Automatic model management is currently supported for AWS EFS volumes only.**
+      remove: []
 
   # -- chunking defines the size of audio chunks to process in seconds.
   # Adjusting these values will affect both inference performance and accuracy