From e22dd560be791c15d9e5e6581480281a1b587f07 Mon Sep 17 00:00:00 2001 From: Brent George Date: Thu, 25 Jul 2024 14:55:45 -0400 Subject: [PATCH 1/3] July 2024 Deepgram product release update --- charts/deepgram-self-hosted/CHANGELOG.md | 22 +++++++++++++-- charts/deepgram-self-hosted/Chart.yaml | 4 +-- charts/deepgram-self-hosted/README.md | 15 +++++------ .../templates/api/api.config.yaml | 7 +++-- .../templates/engine/engine.config.yaml | 4 +-- charts/deepgram-self-hosted/values.yaml | 27 ++++++++----------- common/license_proxy_deploy/api.toml | 6 +++++ common/standard_deploy/api.toml | 6 +++++ docker/docker-compose.license-proxy.yml | 6 ++--- docker/docker-compose.standard.yml | 4 +-- podman/podman-compose.license-proxy.yml | 6 ++--- podman/podman-compose.standard.yml | 4 +-- 12 files changed, 68 insertions(+), 43 deletions(-) diff --git a/charts/deepgram-self-hosted/CHANGELOG.md b/charts/deepgram-self-hosted/CHANGELOG.md index ae01100..22d5dba 100644 --- a/charts/deepgram-self-hosted/CHANGELOG.md +++ b/charts/deepgram-self-hosted/CHANGELOG.md @@ -6,6 +6,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +## [0.4.0] - 2024-07-25 + +### Added +- Introduced entity detection feature flag for API containers (`false` by default). +- Updated default container tags to July 2024 release. Refer to the [main Deepgram changelog](https://deepgram.com/changelog/deepgram-self-hosted-july-2024-release-240725) for additional details. Highlights include: + - Support for Deepgram's new English/Spanish multilingual code-switching model + - Beta support for entity detection + - Beta support for improved redaction for pre-recorded audio + - Beta support for improved entity formatting for streaming audio + +### Removed + +- Removed some items nested under `api.features` and `engine.features` sections in favor of opinionated defaults. + ## [0.3.0] - 2024-07-18 ### Added @@ -16,9 +30,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added +- Sample `values.yaml` file for on-premises/self-managed Kubernetes clusters. + +### Fixed + - Resolves a mismatch between PVC and SC prefix naming convention. - Resolves error when specifying custom service account names. -- Sample `values.yaml` file for on-premises/self-managed Kubernetes clusters. ### Changed @@ -66,7 +83,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - Initial implementation of the Helm chart. -[unreleased]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.3.0...HEAD +[unreleased]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.4.0...HEAD +[0.4.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.3.0...deepgram-self-hosted-0.4.0 [0.3.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.2.3...deepgram-self-hosted-0.3.0 [0.2.3]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.2.2-beta...deepgram-self-hosted-0.2.3 [0.2.2-beta]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.2.1-beta...deepgram-self-hosted-0.2.2-beta diff --git a/charts/deepgram-self-hosted/Chart.yaml b/charts/deepgram-self-hosted/Chart.yaml index 29c5ee5..697e516 100644 --- a/charts/deepgram-self-hosted/Chart.yaml +++ b/charts/deepgram-self-hosted/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: deepgram-self-hosted type: application -version: 0.3.0 -appVersion: "release-240627" +version: 0.4.0 +appVersion: "release-240725" description: A Helm chart for running Deepgram services in a self-hosted environment home: "https://developers.deepgram.com/docs/self-hosted-introduction" sources: ["https://github.com/deepgram/self-hosted-resources"] diff --git a/charts/deepgram-self-hosted/README.md b/charts/deepgram-self-hosted/README.md index 41dcf8b..bc1de8b 100644 --- a/charts/deepgram-self-hosted/README.md +++ b/charts/deepgram-self-hosted/README.md @@ -1,6 +1,6 @@ # deepgram-self-hosted -![Version: 0.3.0](https://img.shields.io/badge/Version-0.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: release-240627](https://img.shields.io/badge/AppVersion-release--240627-informational?style=flat-square) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/deepgram-self-hosted)](https://artifacthub.io/packages/search?repo=deepgram-self-hosted) +![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: release-240725](https://img.shields.io/badge/AppVersion-release--240725-informational?style=flat-square) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/deepgram-self-hosted)](https://artifacthub.io/packages/search?repo=deepgram-self-hosted) A Helm chart for running Deepgram services in a self-hosted environment @@ -188,11 +188,11 @@ If you encounter issues while deploying or using Deepgram, consider the followin | api.driverPool.standard.timeoutBackoff | float | `1.2` | timeoutBackoff is the factor to increase the timeout by for each additional retry (for exponential backoff). | | api.features | object | `` | Enable ancillary features | | api.features.diskBufferPath | string | `nil` | If API is receiving requests faster than Engine can process them, a request queue will form. By default, this queue is stored in memory. Under high load, the queue may grow too large and cause Out-Of-Memory errors. To avoid this, set a diskBufferPath to buffer the overflow on the request queue to disk. WARN: This is only to temporarily buffer requests during high load. If there is not enough Engine capacity to process the queued requests over time, the queue (and response time) will grow indefinitely. | -| api.features.summarization | bool | `true` | summarization enable summarization *if* a valid summarization model is available. | -| api.features.topicDetection | bool | `true` | topicDetection enables topic detection *if* a valid topic detection model is available. | +| api.features.entityDetection | bool | `false` | Enables entity detection on pre-recorded audio *if* a valid entity detection model is available. *WARNING*: Beta functionality. | +| api.features.entityRedaction | bool | `false` | Enables entity-based redaction on pre-recorded audio *if* a valid entity detection model is available. *WARNING*: Beta functionality. | | api.image.path | string | `"quay.io/deepgram/onprem-api"` | path configures the image path to use for creating API containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | api.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram API image | -| api.image.tag | string | `"release-240627"` | tag defines which Deepgram release to use for API containers | +| api.image.tag | string | `"release-240725"` | tag defines which Deepgram release to use for API containers | | api.livenessProbe | object | `` | Liveness probe customization for API pods. | | api.namePrefix | string | `"deepgram-api"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram API containers. | | api.readinessProbe | object | `` | Readiness probe customization for API pods. | @@ -228,13 +228,10 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.chunking.speechToText.streaming.minDuration | float | `nil` | minDuration is the minimum audio duration for a STT chunk size for a streaming request | | engine.chunking.speechToText.streaming.step | float | `1` | step defines how often to return interim results, in seconds. This value may be lowered to increase the frequency of interim results. However, this also causes a significant decrease in the number of concurrent streams supported by a single GPU. Please contact your Deepgram Account representative for more details. | | engine.concurrencyLimit.activeRequests | int | `nil` | activeRequests limits the number of active requests handled by a single Engine container. If additional requests beyond the limit are sent, the API container forming the request will try a different Engine pod. If no Engine pods are able to accept the request, the API will return a 429 HTTP response to the client. The `nil` default means no limit will be set. | -| engine.features | object | `` | Enable ancillary features | -| engine.features.languageDetection | bool | `true` | languageDetection enables Deepgram language detection *if* a valid language detection model is available | -| engine.features.multichannel | bool | `true` | multichannel allows/disallows multichannel requests | | engine.halfPrecision.state | string | `"auto"` | Engine will automatically enable half precision operations if your GPU supports them. You can explicitly enable or disable this behavior with the state parameter which supports `"enable"`, `"disabled"`, and `"auto"`. | | engine.image.path | string | `"quay.io/deepgram/onprem-engine"` | path configures the image path to use for creating Engine containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | engine.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram Engine image | -| engine.image.tag | string | `"release-240627"` | tag defines which Deepgram release to use for Engine containers | +| engine.image.tag | string | `"release-240725"` | tag defines which Deepgram release to use for Engine containers | | engine.livenessProbe | object | `` | Liveness probe customization for Engine pods. | | engine.metricsServer | object | `` | metricsServer exposes an endpoint on each Engine container for reporting inference-specific system metrics. See https://developers.deepgram.com/docs/metrics-guide#deepgram-engine for more details. | | engine.metricsServer.host | string | `"0.0.0.0"` | host is the IP address to listen on for metrics requests. You will want to listen on all interfaces to interact with other pods in the cluster. | @@ -290,7 +287,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | licenseProxy.enabled | bool | `false` | The License Proxy is optional, but highly recommended to be deployed in production to enable highly available environments. | | licenseProxy.image.path | string | `"quay.io/deepgram/onprem-license-proxy"` | path configures the image path to use for creating License Proxy containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | licenseProxy.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram License Proxy image | -| licenseProxy.image.tag | string | `"release-240627"` | tag defines which Deepgram release to use for License Proxy containers | +| licenseProxy.image.tag | string | `"release-240725"` | tag defines which Deepgram release to use for License Proxy containers | | licenseProxy.keepUpstreamServerAsBackup | bool | `true` | Even with a License Proxy deployed, API and Engine pods can be configured to keep the upstream `license.deepgram.com` license server as a fallback licensing option if the License Proxy is unavailable. Disable this option if you are restricting API/Engine Pod network access for security reasons, and only the License Proxy should send egress traffic to the upstream license server. | | licenseProxy.livenessProbe | object | `` | Liveness probe customization for Proxy pods. | | licenseProxy.namePrefix | string | `"deepgram-license-proxy"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram License Proxy containers. | diff --git a/charts/deepgram-self-hosted/templates/api/api.config.yaml b/charts/deepgram-self-hosted/templates/api/api.config.yaml index db5821f..d08b06a 100644 --- a/charts/deepgram-self-hosted/templates/api/api.config.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.config.yaml @@ -43,8 +43,11 @@ data: {{- end }} [features] - topic_detection = {{ .Values.api.features.topicDetection }} - summarization = {{ .Values.api.features.summarization }} + topic_detection = true + summarization = true + entity_detection = {{ .Values.api.features.entityDetection }} + entity_redaction = {{ .Values.api.features.entityRedaction }} + {{- if .Values.api.features.diskBufferPath }} disk_buffer_path = "{{ .Values.api.features.diskBufferPath }}" {{- end }} diff --git a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml index 947cecd..a499676 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml @@ -43,8 +43,8 @@ data: ] [features] - multichannel = {{ .Values.engine.features.multichannel }} - language_detection = {{ .Values.engine.features.languageDetection }} + multichannel = true + language_detection = true [chunking.batch] {{- if .Values.engine.chunking.speechToText.batch.minDuration }} diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index be170d7..0bf5c4a 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -112,7 +112,7 @@ api: # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram API image pullPolicy: IfNotPresent # -- tag defines which Deepgram release to use for API containers - tag: release-240627 + tag: release-240725 # -- Additional labels to add to API resources additionalLabels: {} @@ -222,11 +222,15 @@ api: # -- Enable ancillary features # @default -- `` features: - # -- topicDetection enables topic detection *if* a valid topic detection model is available. - topicDetection: true + # -- Enables entity detection on pre-recorded audio + # *if* a valid entity detection model is available. + # *WARNING*: Beta functionality. + entityDetection: false - # -- summarization enable summarization *if* a valid summarization model is available. - summarization: true + # -- Enables entity-based redaction on pre-recorded audio + # *if* a valid entity detection model is available. + # *WARNING*: Beta functionality. + entityRedaction: false # -- If API is receiving requests faster than Engine can process them, a request # queue will form. By default, this queue is stored in memory. Under high load, @@ -274,7 +278,7 @@ engine: # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram Engine image pullPolicy: IfNotPresent # -- tag defines which Deepgram release to use for Engine containers - tag: release-240627 + tag: release-240725 # -- Additional labels to add to Engine resources additionalLabels: {} @@ -455,15 +459,6 @@ engine: # Account Representative. links: [] - # -- Enable ancillary features - # @default -- `` - features: - # -- multichannel allows/disallows multichannel requests - multichannel: true - # -- languageDetection enables Deepgram language detection *if* - # a valid language detection model is available - languageDetection: true - # -- chunking defines the size of audio chunks to process in seconds. # Adjusting these values will affect both inference performance and accuracy # of results. Please contact your Deepgram Account Representative if you @@ -525,7 +520,7 @@ licenseProxy: # Deepgram images into a private container registry. path: quay.io/deepgram/onprem-license-proxy # -- tag defines which Deepgram release to use for License Proxy containers - tag: release-240627 + tag: release-240725 # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram # License Proxy image pullPolicy: IfNotPresent diff --git a/common/license_proxy_deploy/api.toml b/common/license_proxy_deploy/api.toml index 28085d0..e87d6ca 100644 --- a/common/license_proxy_deploy/api.toml +++ b/common/license_proxy_deploy/api.toml @@ -66,6 +66,12 @@ topic_detection = true # or false ### Enables summarization *if* a valid summarization model is available summarization = true # or false +### Enables pre-recorded entity detection *if* a valid entity detection model is available +entity_detection = false # or true + +### Enables pre-recorded entity-based redaction *if* a valid entity detection model is available +entity_redaction = false # or true + ### If API is receiving requests faster than Engine can process them, a request ### queue will form. By default, this queue is stored in memory. Under high load, ### the queue may grow too large and cause Out-Of-Memory errors. To avoid this, diff --git a/common/standard_deploy/api.toml b/common/standard_deploy/api.toml index 692cd02..2874f33 100644 --- a/common/standard_deploy/api.toml +++ b/common/standard_deploy/api.toml @@ -64,6 +64,12 @@ topic_detection = true # or false ### Enables summarization *if* a valid summarization model is available summarization = true # or false +### Enables pre-recorded entity detection *if* a valid entity detection model is available +entity_detection = false # or true + +### Enables pre-recorded entity-based redaction *if* a valid entity detection model is available +entity_redaction = false # or true + ### If API is receiving requests faster than Engine can process them, a request ### queue will form. By default, this queue is stored in memory. Under high load, ### the queue may grow too large and cause Out-Of-Memory errors. To avoid this, diff --git a/docker/docker-compose.license-proxy.yml b/docker/docker-compose.license-proxy.yml index 9cc7f66..d31618f 100644 --- a/docker/docker-compose.license-proxy.yml +++ b/docker/docker-compose.license-proxy.yml @@ -5,7 +5,7 @@ version: "3.7" services: # The speech API service. api: - image: quay.io/deepgram/onprem-api:release-240627 + image: quay.io/deepgram/onprem-api:release-240725 # Here we expose the API port to the host machine. The container port # (right-hand side) must match the port that the API service is listening @@ -41,7 +41,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/onprem-engine:release-240627 + image: quay.io/deepgram/onprem-engine:release-240725 # Utilize a GPU, if available. runtime: nvidia @@ -83,7 +83,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/onprem-license-proxy:release-240627 + image: quay.io/deepgram/onprem-license-proxy:release-240725 # Here we expose the License Proxy status port to the host machine. The container port # (right-hand side) must match the port that the License Proxy service is listening diff --git a/docker/docker-compose.standard.yml b/docker/docker-compose.standard.yml index 2696200..3f7ca0b 100644 --- a/docker/docker-compose.standard.yml +++ b/docker/docker-compose.standard.yml @@ -5,7 +5,7 @@ version: "3.7" services: # The speech API service. api: - image: quay.io/deepgram/onprem-api:release-240627 + image: quay.io/deepgram/onprem-api:release-240725 # Here we expose the API port to the host machine. The container port # (right-hand side) must match the port that the API service is listening @@ -37,7 +37,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/onprem-engine:release-240627 + image: quay.io/deepgram/onprem-engine:release-240725 # Utilize a GPU, if available. runtime: nvidia diff --git a/podman/podman-compose.license-proxy.yml b/podman/podman-compose.license-proxy.yml index 9ea213f..11445b1 100644 --- a/podman/podman-compose.license-proxy.yml +++ b/podman/podman-compose.license-proxy.yml @@ -5,7 +5,7 @@ version: "3.7" services: # The speech API service. api: - image: quay.io/deepgram/onprem-api:release-240627 + image: quay.io/deepgram/onprem-api:release-240725 # Here we expose the API port to the host machine. The container port # (right-hand side) must match the port that the API service is listening @@ -41,7 +41,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/onprem-engine:release-240627 + image: quay.io/deepgram/onprem-engine:release-240725 # Utilize a GPU, if available. devices: @@ -84,7 +84,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/onprem-license-proxy:release-240627 + image: quay.io/deepgram/onprem-license-proxy:release-240725 # Here we expose the License Proxy status port to the host machine. The container port # (right-hand side) must match the port that the License Proxy service is listening diff --git a/podman/podman-compose.standard.yml b/podman/podman-compose.standard.yml index 1437877..8a42937 100644 --- a/podman/podman-compose.standard.yml +++ b/podman/podman-compose.standard.yml @@ -5,7 +5,7 @@ version: "3.7" services: # The speech API service. api: - image: quay.io/deepgram/onprem-api:release-240627 + image: quay.io/deepgram/onprem-api:release-240725 # Here we expose the API port to the host machine. The container port # (right-hand side) must match the port that the API service is listening @@ -37,7 +37,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/onprem-engine:release-240627 + image: quay.io/deepgram/onprem-engine:release-240725 # Utilize a GPU, if available. devices: From ff67b2ba846b92f66a701240d48bb46e15996869 Mon Sep 17 00:00:00 2001 From: Brent George Date: Thu, 25 Jul 2024 15:07:28 -0400 Subject: [PATCH 2/3] add appropriate warnings to beta features --- common/license_proxy_deploy/api.toml | 2 ++ common/standard_deploy/api.toml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/common/license_proxy_deploy/api.toml b/common/license_proxy_deploy/api.toml index e87d6ca..c1990df 100644 --- a/common/license_proxy_deploy/api.toml +++ b/common/license_proxy_deploy/api.toml @@ -67,9 +67,11 @@ topic_detection = true # or false summarization = true # or false ### Enables pre-recorded entity detection *if* a valid entity detection model is available +### *WARNING*: Beta functionality. entity_detection = false # or true ### Enables pre-recorded entity-based redaction *if* a valid entity detection model is available +### *WARNING*: Beta functionality. entity_redaction = false # or true ### If API is receiving requests faster than Engine can process them, a request diff --git a/common/standard_deploy/api.toml b/common/standard_deploy/api.toml index 2874f33..70e10e8 100644 --- a/common/standard_deploy/api.toml +++ b/common/standard_deploy/api.toml @@ -65,9 +65,11 @@ topic_detection = true # or false summarization = true # or false ### Enables pre-recorded entity detection *if* a valid entity detection model is available +### *WARNING*: Beta functionality. entity_detection = false # or true ### Enables pre-recorded entity-based redaction *if* a valid entity detection model is available +### *WARNING*: Beta functionality. entity_redaction = false # or true ### If API is receiving requests faster than Engine can process them, a request From 2873794d5e84193b86678cb6ff6e278ca77000ac Mon Sep 17 00:00:00 2001 From: Brent George Date: Thu, 25 Jul 2024 16:20:49 -0400 Subject: [PATCH 3/3] more specific qualifiers on beta features --- charts/deepgram-self-hosted/CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/deepgram-self-hosted/CHANGELOG.md b/charts/deepgram-self-hosted/CHANGELOG.md index 22d5dba..7149be4 100644 --- a/charts/deepgram-self-hosted/CHANGELOG.md +++ b/charts/deepgram-self-hosted/CHANGELOG.md @@ -12,9 +12,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - Introduced entity detection feature flag for API containers (`false` by default). - Updated default container tags to July 2024 release. Refer to the [main Deepgram changelog](https://deepgram.com/changelog/deepgram-self-hosted-july-2024-release-240725) for additional details. Highlights include: - Support for Deepgram's new English/Spanish multilingual code-switching model - - Beta support for entity detection - - Beta support for improved redaction for pre-recorded audio - - Beta support for improved entity formatting for streaming audio + - Beta support for entity detection for pre-recorded English audio + - Beta support for improved redaction for pre-recorded English audio + - Beta support for improved entity formatting for streaming English audio ### Removed