Add README and config for modelID (#10)

* Add README and config for modelID Signed-off-by: Sanket Sudake <[email protected]> * Add ai-stack chart initial Signed-off-by: Sanket Sudake <[email protected]> * correct links Signed-off-by: Sanket Sudake <[email protected]> --------- Signed-off-by: Sanket Sudake <[email protected]>
infracloudio · Jul 16, 2024 · 70081ba · 70081ba
1 parent ad49683
commit 70081ba
Show file tree

Hide file tree

Showing 15 changed files with 253 additions and 20 deletions.
diff --git a/charts/ai-stack/.helmignore b/charts/ai-stack/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/charts/ai-stack/Chart.lock b/charts/ai-stack/Chart.lock
@@ -0,0 +1,9 @@
+dependencies:
+- name: text-generation-inference
+  repository: file://../text-generation-inference
+  version: 0.1.2
+- name: text-embeddings-inference
+  repository: file://../text-embeddings-inference
+  version: 0.1.2
+digest: sha256:77a1b777ff09b33b9940105305ade8896881671c94bee25285c8855b9179d426
+generated: "2024-07-16T13:52:43.716392+05:30"
diff --git a/charts/ai-stack/Chart.yaml b/charts/ai-stack/Chart.yaml
@@ -0,0 +1,37 @@
+apiVersion: v2
+name: ai-stack
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: ""
+
+dependencies:
+  - name: text-generation-inference
+    version: 0.1.2
+    repository: "file://../text-generation-inference"
+    alias: tgi
+    condition: tgi.enabled
+
+  - name: text-embeddings-inference
+    version: 0.1.2
+    repository: "file://../text-embeddings-inference"
+    alias: tei
+    condition: tei.enabled
diff --git a/charts/ai-stack/README.md b/charts/ai-stack/README.md
@@ -0,0 +1,35 @@
+# ai-stack
+
+This chart deploys the AI stack on a Kubernetes cluster using the Helm package manager.
+
+## Components
+
+The AI stack consists of the following components:
+
+- [Text Embeddings Inference(TEI)](../text-embeddings-inference/)
+- [Text Generation Inference(TGI)](../text-generation-inference/)
+
+## Setup Helm Repository
+
+```bash
+helm repo add infracloud-charts https://infracloudio.github.io/charts
+helm repo update
+```
+
+See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation.
+
+## Installing the Chart
+
+To install the chart with the release name `ai-stack`:
+
+```bash
+helm install ai-stack infracloud-charts/ai-stack
+```
+
+## Uninstalling the Chart
+
+To uninstall the `ai-stack` deployment:
+
+```bash
+helm uninstall ai-stack
+```
diff --git a/charts/ai-stack/charts/text-embeddings-inference-0.1.2.tgz b/charts/ai-stack/charts/text-embeddings-inference-0.1.2.tgz
diff --git a/charts/ai-stack/charts/text-generation-inference-0.1.2.tgz b/charts/ai-stack/charts/text-generation-inference-0.1.2.tgz
diff --git a/charts/ai-stack/values.yaml b/charts/ai-stack/values.yaml
@@ -0,0 +1,55 @@
+# Default values for ai-stack.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# Values for the text-generation-inference chart
+tei:
+  enabled: true
+
+  config:
+    modelID: "BAAI/bge-large-en-v1.5"
+
+  env:
+    - name: MAX_CLIENT_BATCH_SIZE
+      value: "1024"
+    - name: RUST_BACKTRACE
+      value: "full"
+
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+    requests:
+      nvidia.com/gpu: 1
+
+  strategy:
+    type: Recreate
+
+  service:
+    type: LoadBalancer
+    port: 80
+
+# Values for the text-embeddings-inference chart
+tgi:
+  enabled: true
+
+  config:
+    modelID: "Qwen/Qwen2-7B-Instruct"
+
+  env:
+    - name: MAX_INPUT_TOKENS
+      value: "6144"
+    - name: MAX_TOTAL_TOKENS
+      value: "8192"
+
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+    requests:
+      nvidia.com/gpu: 1
+
+  strategy:
+    type: Recreate
+
+  service:
+    type: LoadBalancer
+    port: 80
diff --git a/charts/text-embeddings-inference/Chart.yaml b/charts/text-embeddings-inference/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.1
+version: 0.1.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/charts/text-embeddings-inference/README.md b/charts/text-embeddings-inference/README.md
@@ -0,0 +1,30 @@
+# text-embeddings-interface
+
+A helm chart to deploy the [Text Embeddings Inference(TEI)](https://github.com/huggingface/text-embeddings-inference) from Hugging Face.
+
+## Setup Helm Repository
+
+```bash
+helm repo add infracloud-charts https://infracloudio.github.io/charts
+helm repo update
+```
+
+See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation.
+
+## Installing the Chart
+
+To install the chart with the release name `tei`:
+
+```bash
+helm install tei infracloud-charts/text-embeddings-inference
+```
+
+## Uninstalling the Chart
+
+To uninstall the `tei` deployment:
+
+```bash
+helm uninstall tei
+```
+
+The command removes all the Kubernetes components associated with the chart and deletes the release.
diff --git a/charts/text-embeddings-inference/templates/deployment.yaml b/charts/text-embeddings-inference/templates/deployment.yaml
@@ -40,7 +40,13 @@ spec:
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           command: {{ .Values.command }}
           env:
+            {{- if .Values.env }}
             {{- toYaml .Values.env | nindent 12 }}
+            {{- end }}
+            {{- if .Values.config.modelID }}
+            - name: MODEL_ID
+              value: {{ .Values.config.modelID | quote }}
+            {{- end }}
           ports:
             - name: http
               containerPort: {{ .Values.service.port }}

diff --git a/charts/text-embeddings-inference/values.yaml b/charts/text-embeddings-inference/values.yaml
@@ -20,16 +20,17 @@ image:
 
 command: ["text-embeddings-launcher"]
 
-env:
-  # - name: MY_ENV_VAR
-  #   value: my-env-var-value
+config:
+  # The name of the model to use. Can be a MODEL_ID as listed on <https://hf.co/models> like `thenlper/gte-base`.
+  # If you mention MODEL_ID, environment variable would take precedence. 
+  modelID: BAAI/bge-large-en-v1.5
+
+env: []
   # Reference: https://huggingface.co/docs/text-embeddings-inference/cli_arguments 
-  - name: MODEL_ID
-    value: BAAI/bge-large-en-v1.5
-  - name: MAX_CLIENT_BATCH_SIZE
-    value: "1024"
-  - name: RUST_BACKTRACE
-    value: "full"
+  # - name: MAX_CLIENT_BATCH_SIZE
+  #   value: "32"
+  # - name: RUST_BACKTRACE
+  #   value: "full"
 
 imagePullSecrets: []
 nameOverride: ""

diff --git a/charts/text-generation-inference/Chart.yaml b/charts/text-generation-inference/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.1
+version: 0.1.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/charts/text-generation-inference/README.md b/charts/text-generation-inference/README.md
@@ -0,0 +1,30 @@
+# text-generation-inference
+
+A helm chart to deploy the [Text Generation Inference(TGI)](https://github.com/huggingface/text-generation-inference) from Hugging Face.
+
+## Setup Helm Repository
+
+```bash
+helm repo add infracloud-charts https://infracloudio.github.io/charts
+helm repo update
+```
+
+See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation.
+
+## Installing the Chart
+
+To install the chart with the release name `tgi`:
+
+```bash
+helm install tgi infracloud-charts/text-generation-inference
+```
+
+## Uninstalling the Chart
+
+To uninstall the `tgi` deployment:
+
+```bash
+helm uninstall tgi
+```
+
+The command removes all the Kubernetes components associated with the chart and deletes the release.
diff --git a/charts/text-generation-inference/templates/deployment.yaml b/charts/text-generation-inference/templates/deployment.yaml
@@ -40,7 +40,13 @@ spec:
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           command: {{ .Values.command }}
           env:
+            {{- if .Values.env }}
             {{- toYaml .Values.env | nindent 12 }}
+            {{- end }}
+            {{- if .Values.config.modelID }}
+            - name: MODEL_ID
+              value: {{ .Values.config.modelID | quote }}
+            {{- end }}
           ports:
             - name: http
               containerPort: {{ .Values.service.port }}

diff --git a/charts/text-generation-inference/values.yaml b/charts/text-generation-inference/values.yaml
@@ -20,16 +20,17 @@ image:
 
 command: ["text-generation-launcher"]
 
-env:
-  # - name: MY_ENV_VAR
-  #   value: my-env-var-value
+config:
+  # The name of the model to use. Can be a MODEL_ID as listed on <https://hf.co/models> like `gpt2` or `OpenAssistant/oasst-sft-1-pythia-12b`.
+  # If you mention MODEL_ID, environment variable would take precedence. 
+  modelID: "bigscience/bloom-560m"
+
+env: []
   # Reference: https://huggingface.co/docs/text-generation-inference/main/en/basic_tutorials/launcher
-  - name: MODEL_ID
-    value: microsoft/phi-1_5
-  - name: MAX_INPUT_TOKENS
-    value: "6144"
-  - name: MAX_OUTPUT_TOKENS
-    value: "8192"
+  # - name: MAX_INPUT_TOKENS
+  #   value: "6144"
+  # - name: MAX_OUTPUT_TOKENS
+  #   value: "8192"
 
 imagePullSecrets: []
 nameOverride: ""