diff --git a/charts/ai-stack/.helmignore b/charts/ai-stack/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/ai-stack/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/ai-stack/Chart.lock b/charts/ai-stack/Chart.lock new file mode 100644 index 0000000..41764f8 --- /dev/null +++ b/charts/ai-stack/Chart.lock @@ -0,0 +1,9 @@ +dependencies: +- name: text-generation-inference + repository: file://../text-generation-inference + version: 0.1.2 +- name: text-embeddings-inference + repository: file://../text-embeddings-inference + version: 0.1.2 +digest: sha256:77a1b777ff09b33b9940105305ade8896881671c94bee25285c8855b9179d426 +generated: "2024-07-16T13:52:43.716392+05:30" diff --git a/charts/ai-stack/Chart.yaml b/charts/ai-stack/Chart.yaml new file mode 100644 index 0000000..4ab29cc --- /dev/null +++ b/charts/ai-stack/Chart.yaml @@ -0,0 +1,37 @@ +apiVersion: v2 +name: ai-stack +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "" + +dependencies: + - name: text-generation-inference + version: 0.1.2 + repository: "file://../text-generation-inference" + alias: tgi + condition: tgi.enabled + + - name: text-embeddings-inference + version: 0.1.2 + repository: "file://../text-embeddings-inference" + alias: tei + condition: tei.enabled \ No newline at end of file diff --git a/charts/ai-stack/README.md b/charts/ai-stack/README.md new file mode 100644 index 0000000..fd6aca2 --- /dev/null +++ b/charts/ai-stack/README.md @@ -0,0 +1,35 @@ +# ai-stack + +This chart deploys the AI stack on a Kubernetes cluster using the Helm package manager. + +## Components + +The AI stack consists of the following components: + +- [Text Embeddings Inference(TEI)](../text-embeddings-inference/) +- [Text Generation Inference(TGI)](../text-generation-inference/) + +## Setup Helm Repository + +```bash +helm repo add infracloud-charts https://infracloudio.github.io/charts +helm repo update +``` + +See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation. + +## Installing the Chart + +To install the chart with the release name `ai-stack`: + +```bash +helm install ai-stack infracloud-charts/ai-stack +``` + +## Uninstalling the Chart + +To uninstall the `ai-stack` deployment: + +```bash +helm uninstall ai-stack +``` diff --git a/charts/ai-stack/charts/text-embeddings-inference-0.1.2.tgz b/charts/ai-stack/charts/text-embeddings-inference-0.1.2.tgz new file mode 100644 index 0000000..b1b3e21 Binary files /dev/null and b/charts/ai-stack/charts/text-embeddings-inference-0.1.2.tgz differ diff --git a/charts/ai-stack/charts/text-generation-inference-0.1.2.tgz b/charts/ai-stack/charts/text-generation-inference-0.1.2.tgz new file mode 100644 index 0000000..db800a5 Binary files /dev/null and b/charts/ai-stack/charts/text-generation-inference-0.1.2.tgz differ diff --git a/charts/ai-stack/values.yaml b/charts/ai-stack/values.yaml new file mode 100644 index 0000000..d55049f --- /dev/null +++ b/charts/ai-stack/values.yaml @@ -0,0 +1,55 @@ +# Default values for ai-stack. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Values for the text-generation-inference chart +tei: + enabled: true + + config: + modelID: "BAAI/bge-large-en-v1.5" + + env: + - name: MAX_CLIENT_BATCH_SIZE + value: "1024" + - name: RUST_BACKTRACE + value: "full" + + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + + strategy: + type: Recreate + + service: + type: LoadBalancer + port: 80 + +# Values for the text-embeddings-inference chart +tgi: + enabled: true + + config: + modelID: "Qwen/Qwen2-7B-Instruct" + + env: + - name: MAX_INPUT_TOKENS + value: "6144" + - name: MAX_TOTAL_TOKENS + value: "8192" + + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + + strategy: + type: Recreate + + service: + type: LoadBalancer + port: 80 \ No newline at end of file diff --git a/charts/text-embeddings-inference/Chart.yaml b/charts/text-embeddings-inference/Chart.yaml index e6ca6c4..aa3bf49 100644 --- a/charts/text-embeddings-inference/Chart.yaml +++ b/charts/text-embeddings-inference/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/text-embeddings-inference/README.md b/charts/text-embeddings-inference/README.md new file mode 100644 index 0000000..5b3fd2e --- /dev/null +++ b/charts/text-embeddings-inference/README.md @@ -0,0 +1,30 @@ +# text-embeddings-interface + +A helm chart to deploy the [Text Embeddings Inference(TEI)](https://github.com/huggingface/text-embeddings-inference) from Hugging Face. + +## Setup Helm Repository + +```bash +helm repo add infracloud-charts https://infracloudio.github.io/charts +helm repo update +``` + +See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation. + +## Installing the Chart + +To install the chart with the release name `tei`: + +```bash +helm install tei infracloud-charts/text-embeddings-inference +``` + +## Uninstalling the Chart + +To uninstall the `tei` deployment: + +```bash +helm uninstall tei +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. diff --git a/charts/text-embeddings-inference/templates/deployment.yaml b/charts/text-embeddings-inference/templates/deployment.yaml index eb892e0..6f66e1c 100644 --- a/charts/text-embeddings-inference/templates/deployment.yaml +++ b/charts/text-embeddings-inference/templates/deployment.yaml @@ -40,7 +40,13 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} command: {{ .Values.command }} env: + {{- if .Values.env }} {{- toYaml .Values.env | nindent 12 }} + {{- end }} + {{- if .Values.config.modelID }} + - name: MODEL_ID + value: {{ .Values.config.modelID | quote }} + {{- end }} ports: - name: http containerPort: {{ .Values.service.port }} diff --git a/charts/text-embeddings-inference/values.yaml b/charts/text-embeddings-inference/values.yaml index cdf0773..ead348f 100644 --- a/charts/text-embeddings-inference/values.yaml +++ b/charts/text-embeddings-inference/values.yaml @@ -20,16 +20,17 @@ image: command: ["text-embeddings-launcher"] -env: - # - name: MY_ENV_VAR - # value: my-env-var-value +config: + # The name of the model to use. Can be a MODEL_ID as listed on like `thenlper/gte-base`. + # If you mention MODEL_ID, environment variable would take precedence. + modelID: BAAI/bge-large-en-v1.5 + +env: [] # Reference: https://huggingface.co/docs/text-embeddings-inference/cli_arguments - - name: MODEL_ID - value: BAAI/bge-large-en-v1.5 - - name: MAX_CLIENT_BATCH_SIZE - value: "1024" - - name: RUST_BACKTRACE - value: "full" + # - name: MAX_CLIENT_BATCH_SIZE + # value: "32" + # - name: RUST_BACKTRACE + # value: "full" imagePullSecrets: [] nameOverride: "" diff --git a/charts/text-generation-inference/Chart.yaml b/charts/text-generation-inference/Chart.yaml index 1ae69ec..b25503a 100644 --- a/charts/text-generation-inference/Chart.yaml +++ b/charts/text-generation-inference/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/text-generation-inference/README.md b/charts/text-generation-inference/README.md new file mode 100644 index 0000000..902b4ec --- /dev/null +++ b/charts/text-generation-inference/README.md @@ -0,0 +1,30 @@ +# text-generation-inference + +A helm chart to deploy the [Text Generation Inference(TGI)](https://github.com/huggingface/text-generation-inference) from Hugging Face. + +## Setup Helm Repository + +```bash +helm repo add infracloud-charts https://infracloudio.github.io/charts +helm repo update +``` + +See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation. + +## Installing the Chart + +To install the chart with the release name `tgi`: + +```bash +helm install tgi infracloud-charts/text-generation-inference +``` + +## Uninstalling the Chart + +To uninstall the `tgi` deployment: + +```bash +helm uninstall tgi +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. diff --git a/charts/text-generation-inference/templates/deployment.yaml b/charts/text-generation-inference/templates/deployment.yaml index 5670ecd..6c5a1e9 100644 --- a/charts/text-generation-inference/templates/deployment.yaml +++ b/charts/text-generation-inference/templates/deployment.yaml @@ -40,7 +40,13 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} command: {{ .Values.command }} env: + {{- if .Values.env }} {{- toYaml .Values.env | nindent 12 }} + {{- end }} + {{- if .Values.config.modelID }} + - name: MODEL_ID + value: {{ .Values.config.modelID | quote }} + {{- end }} ports: - name: http containerPort: {{ .Values.service.port }} diff --git a/charts/text-generation-inference/values.yaml b/charts/text-generation-inference/values.yaml index b0af387..c70d57c 100644 --- a/charts/text-generation-inference/values.yaml +++ b/charts/text-generation-inference/values.yaml @@ -20,16 +20,17 @@ image: command: ["text-generation-launcher"] -env: - # - name: MY_ENV_VAR - # value: my-env-var-value +config: + # The name of the model to use. Can be a MODEL_ID as listed on like `gpt2` or `OpenAssistant/oasst-sft-1-pythia-12b`. + # If you mention MODEL_ID, environment variable would take precedence. + modelID: "bigscience/bloom-560m" + +env: [] # Reference: https://huggingface.co/docs/text-generation-inference/main/en/basic_tutorials/launcher - - name: MODEL_ID - value: microsoft/phi-1_5 - - name: MAX_INPUT_TOKENS - value: "6144" - - name: MAX_OUTPUT_TOKENS - value: "8192" + # - name: MAX_INPUT_TOKENS + # value: "6144" + # - name: MAX_OUTPUT_TOKENS + # value: "8192" imagePullSecrets: [] nameOverride: ""