From abf683b280310e4795075b1ff8c53f7d8bcb9023 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DETROYES Date: Tue, 11 Feb 2025 18:43:40 +0800 Subject: [PATCH 1/4] feat: work on intel Signed-off-by: Jean-Baptiste DETROYES --- templates/deployment.yaml | 12 +++++------- values.yaml | 6 +++++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/templates/deployment.yaml b/templates/deployment.yaml index 5aaf977..5d2ab81 100644 --- a/templates/deployment.yaml +++ b/templates/deployment.yaml @@ -82,19 +82,13 @@ spec: resources: {{- $limits := default dict .Values.resources.limits }} {{- if .Values.ollama.gpu.enabled }} - # If gpu is enabled, it can either be a NVIDIA card or a AMD card {{- if or (eq .Values.ollama.gpu.type "nvidia") (not .Values.ollama.gpu.type) }} - # NVIDIA is assumed by default if no value is set and GPU is enabled - # NVIDIA cards can have mig enabled (i.e., the card is sliced into parts - # Therefore, the first case is no migs enabled {{- if or (not .Values.ollama.gpu.mig) (not .Values.ollama.gpu.mig.enabled ) }} {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} {{- $limits = merge $limits $gpuLimit }} - # Second case is mig is enabled {{- else if or (.Values.ollama.gpu.mig.enabled) }} - # Initialize empty dictionary + {{- $migDevices := dict -}} - # Loop over the entries in the mig devices {{- range $key, $value := .Values.ollama.gpu.mig.devices }} {{- $migKey := printf "nvidia.com/mig-%s" $key -}} {{- $migDevices = merge $migDevices (dict $migKey $value) -}} @@ -106,6 +100,10 @@ spec: {{- $gpuLimit := dict "amd.com/gpu" (.Values.ollama.gpu.number | default 1) }} {{- $limits = merge $limits $gpuLimit }} {{- end }} + {{- if eq .Values.ollama.gpu.type "intel" }} + {{- $gpuLimit := dict (.Values.ollama.gpu.intelResource | default "gpu.intel.com/i915") (.Values.ollama.gpu.number | default 1) }} + {{- $limits = merge $limits $gpuLimit }} + {{- end }} {{- end }} {{- $ressources := deepCopy (dict "limits" $limits) | mergeOverwrite .Values.resources }} {{- toYaml $ressources | nindent 12 }} diff --git a/values.yaml b/values.yaml index a691224..4b59d77 100644 --- a/values.yaml +++ b/values.yaml @@ -47,7 +47,7 @@ ollama: # -- Enable GPU integration enabled: false - # -- GPU type: 'nvidia' or 'amd' + # -- GPU type: 'nvidia', 'amd' or 'intel' # If 'ollama.gpu.enabled', default value is nvidia # If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override # This is due cause AMD and CPU/CUDA are different images @@ -57,6 +57,10 @@ ollama: # If you use MIG section below then this parameter is ignored number: 1 + # -- only for intekl cards; change to (example) 'gpu.intel.com/xe' for new KMD devices + # Refer to documentation: https://github.com/intel/intel-device-plugins-for-kubernetes + intelResource: "gpu.intel.com/i915" + # -- only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice nvidiaResource: "nvidia.com/gpu" # nvidiaResource: "nvidia.com/mig-1g.10gb" # example From 7621a8bf7d372ca085de8130dc698bf74748cf91 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DETROYES Date: Thu, 20 Feb 2025 16:30:55 +0800 Subject: [PATCH 2/4] docs: update documentation Signed-off-by: Jean-Baptiste DETROYES --- README.md | 4 ++-- values.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b0da0ff..44700d1 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,9 @@ This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama) - Kubernetes: `>= 1.16.0-0` for **CPU only** -- Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD) +- Kubernetes: `>= 1.26.0-0` for **GPU** support (NVIDIA, AMD and Intel) -*Not all GPUs are currently supported with ollama (especially with AMD)* +*Not all GPUs are currently supported with ollama (especially with Intel)* ## Deploying Ollama chart diff --git a/values.yaml b/values.yaml index 4b59d77..8cc188c 100644 --- a/values.yaml +++ b/values.yaml @@ -57,7 +57,7 @@ ollama: # If you use MIG section below then this parameter is ignored number: 1 - # -- only for intekl cards; change to (example) 'gpu.intel.com/xe' for new KMD devices + # -- only for intel cards; change to 'gpu.intel.com/xe' for new KMD devices # Refer to documentation: https://github.com/intel/intel-device-plugins-for-kubernetes intelResource: "gpu.intel.com/i915" From 454d238b10e17dd9c0e26c18217f26186ccb80a9 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DETROYES Date: Thu, 20 Feb 2025 17:08:14 +0800 Subject: [PATCH 3/4] docs: update documentation Signed-off-by: Jean-Baptiste DETROYES --- README.md | 92 +++++++++++++++++++++++++++++++++++++++ templates/deployment.yaml | 1 - 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 44700d1..d583389 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,98 @@ ollama: | updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate | | volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | | volumes | list | `[]` | Additional volumes on the output Deployment definition. | +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity for pod assignment | +| autoscaling.enabled | bool | `false` | Enable autoscaling | +| autoscaling.maxReplicas | int | `100` | Number of maximum replicas | +| autoscaling.minReplicas | int | `1` | Number of minimum replicas | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica | +| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. | +| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go | +| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) | +| extraObjects | list | `[]` | Extra K8s manifests to deploy | +| fullnameOverride | string | `""` | String to fully override template | +| hostIPC | bool | `false` | Use the host’s ipc namespace. | +| hostNetwork | bool | `false` | Use the host's network namespace. | +| hostPID | bool | `false` | Use the host’s pid namespace | +| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy | +| image.repository | string | `"ollama/ollama"` | Docker image registry | +| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. | +| imagePullSecrets | list | `[]` | Docker registry secret names as an array | +| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. | +| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) | +| ingress.enabled | bool | `false` | Enable ingress controller resource | +| ingress.hosts[0].host | string | `"ollama.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | | +| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. | +| initContainers | list | `[]` | Init containers to add to the pod | +| knative.containerConcurrency | int | `0` | Knative service container concurrency | +| knative.enabled | bool | `false` | Enable Knative integration | +| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds | +| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds | +| knative.timeoutSeconds | int | `300` | Knative service timeout seconds | +| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) | +| livenessProbe.enabled | bool | `true` | Enable livenessProbe | +| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe | +| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe | +| livenessProbe.path | string | `"/"` | Request path for livenessProbe | +| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe | +| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe | +| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe | +| nameOverride | string | `""` | String to partially override template (will maintain the release name) | +| namespaceOverride | string | `""` | String to fully override namespace | +| nodeSelector | object | `{}` | Node labels for pod assignment. | +| ollama.gpu.enabled | bool | `false` | Enable GPU integration | +| ollama.gpu.intelResource | string | `"gpu.intel.com/i915"` | only for intel cards; change to 'gpu.intel.com/xe' for new KMD devices Refer to documentation: https://github.com/intel/intel-device-plugins-for-kubernetes | +| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number | +| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored | +| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored | +| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice | +| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia', 'amd' or 'intel' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images | +| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup | +| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral | +| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral | +| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" | +| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ | +| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations | +| persistentVolume.enabled | bool | `false` | Enable persistence using PVC | +| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true | +| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size | +| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) | +| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty | +| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. | +| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward | +| podAnnotations | object | `{}` | Map of annotations to add to the pods | +| podLabels | object | `{}` | Map of labels to add to the pods | +| podSecurityContext | object | `{}` | Pod Security Context | +| readinessProbe.enabled | bool | `true` | Enable readinessProbe | +| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe | +| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe | +| readinessProbe.path | string | `"/"` | Request path for readinessProbe | +| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe | +| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe | +| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe | +| replicaCount | int | `1` | Number of replicas | +| resources.limits | object | `{}` | Pod limit | +| resources.requests | object | `{}` | Pod requests | +| runtimeClassName | string | `""` | Specify runtime class | +| securityContext | object | `{}` | Container Security Context | +| service.annotations | object | `{}` | Annotations to add to the service | +| service.loadBalancerIP | string | `nil` | Load Balancer IP address | +| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' | +| service.port | int | `11434` | Service port | +| service.type | string | `"ClusterIP"` | Service type | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | +| tolerations | list | `[]` | Tolerations for pod assignment | +| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment | +| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate | +| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | +| volumes | list | `[]` | Additional volumes on the output Deployment definition. | ---------------------------------------------- diff --git a/templates/deployment.yaml b/templates/deployment.yaml index 5d2ab81..2c9bb9a 100644 --- a/templates/deployment.yaml +++ b/templates/deployment.yaml @@ -87,7 +87,6 @@ spec: {{- $gpuLimit := dict (.Values.ollama.gpu.nvidiaResource | default "nvidia.com/gpu") (.Values.ollama.gpu.number | default 1) }} {{- $limits = merge $limits $gpuLimit }} {{- else if or (.Values.ollama.gpu.mig.enabled) }} - {{- $migDevices := dict -}} {{- range $key, $value := .Values.ollama.gpu.mig.devices }} {{- $migKey := printf "nvidia.com/mig-%s" $key -}} From b9b2889a1fae38722d947cb3623afaa35a04e197 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DETROYES Date: Thu, 20 Feb 2025 17:10:11 +0800 Subject: [PATCH 4/4] docs: update documentation Signed-off-by: Jean-Baptiste DETROYES --- README.md | 93 +------------------------------------------------------ 1 file changed, 1 insertion(+), 92 deletions(-) diff --git a/README.md b/README.md index d583389..33dca40 100644 --- a/README.md +++ b/README.md @@ -149,98 +149,6 @@ ollama: - See [values.yaml](values.yaml) to see the Chart's default values. -| Key | Type | Default | Description | -|--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| affinity | object | `{}` | Affinity for pod assignment | -| autoscaling.enabled | bool | `false` | Enable autoscaling | -| autoscaling.maxReplicas | int | `100` | Number of maximum replicas | -| autoscaling.minReplicas | int | `1` | Number of minimum replicas | -| autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU usage to target replica | -| extraArgs | list | `[]` | Additional arguments on the output Deployment definition. | -| extraEnv | list | `[]` | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go | -| extraEnvFrom | list | `[]` | Additionl environment variables from external sources (like ConfigMap) | -| extraObjects | list | `[]` | Extra K8s manifests to deploy | -| fullnameOverride | string | `""` | String to fully override template | -| hostIPC | bool | `false` | Use the host’s ipc namespace. | -| hostNetwork | bool | `false` | Use the host's network namespace. | -| hostPID | bool | `false` | Use the host’s pid namespace | -| image.pullPolicy | string | `"IfNotPresent"` | Docker pull policy | -| image.repository | string | `"ollama/ollama"` | Docker image registry | -| image.tag | string | `""` | Docker image tag, overrides the image tag whose default is the chart appVersion. | -| imagePullSecrets | list | `[]` | Docker registry secret names as an array | -| ingress.annotations | object | `{}` | Additional annotations for the Ingress resource. | -| ingress.className | string | `""` | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+) | -| ingress.enabled | bool | `false` | Enable ingress controller resource | -| ingress.hosts[0].host | string | `"ollama.local"` | | -| ingress.hosts[0].paths[0].path | string | `"/"` | | -| ingress.hosts[0].paths[0].pathType | string | `"Prefix"` | | -| ingress.tls | list | `[]` | The tls configuration for hostnames to be covered with this ingress record. | -| initContainers | list | `[]` | Init containers to add to the pod | -| knative.containerConcurrency | int | `0` | Knative service container concurrency | -| knative.enabled | bool | `false` | Enable Knative integration | -| knative.idleTimeoutSeconds | int | `300` | Knative service idle timeout seconds | -| knative.responseStartTimeoutSeconds | int | `300` | Knative service response start timeout seconds | -| knative.timeoutSeconds | int | `300` | Knative service timeout seconds | -| lifecycle | object | `{}` | Lifecycle for pod assignment (override ollama.models startup pull/run) | -| livenessProbe.enabled | bool | `true` | Enable livenessProbe | -| livenessProbe.failureThreshold | int | `6` | Failure threshold for livenessProbe | -| livenessProbe.initialDelaySeconds | int | `60` | Initial delay seconds for livenessProbe | -| livenessProbe.path | string | `"/"` | Request path for livenessProbe | -| livenessProbe.periodSeconds | int | `10` | Period seconds for livenessProbe | -| livenessProbe.successThreshold | int | `1` | Success threshold for livenessProbe | -| livenessProbe.timeoutSeconds | int | `5` | Timeout seconds for livenessProbe | -| nameOverride | string | `""` | String to partially override template (will maintain the release name) | -| namespaceOverride | string | `""` | String to fully override namespace | -| nodeSelector | object | `{}` | Node labels for pod assignment. | -| ollama.gpu.enabled | bool | `false` | Enable GPU integration | -| ollama.gpu.mig.devices | object | `{}` | Specify the mig devices and the corresponding number | -| ollama.gpu.mig.enabled | bool | `false` | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored | -| ollama.gpu.number | int | `1` | Specify the number of GPU If you use MIG section below then this parameter is ignored | -| ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice | -| ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images | -| ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup | -| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 | -| ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral | -| ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral | -| ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" | -| persistentVolume.accessModes | list | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ | -| persistentVolume.annotations | object | `{}` | Ollama server data Persistent Volume annotations | -| persistentVolume.enabled | bool | `false` | Enable persistence using PVC | -| persistentVolume.existingClaim | string | `""` | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true | -| persistentVolume.size | string | `"30Gi"` | Ollama server data Persistent Volume size | -| persistentVolume.storageClass | string | `""` | Ollama server data Persistent Volume Storage Class If defined, storageClassName: If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner. (gp2 on AWS, standard on GKE, AWS & OpenStack) | -| persistentVolume.subPath | string | `""` | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty | -| persistentVolume.volumeMode | string | `""` | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode. | -| persistentVolume.volumeName | string | `""` | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward | -| podAnnotations | object | `{}` | Map of annotations to add to the pods | -| podLabels | object | `{}` | Map of labels to add to the pods | -| podSecurityContext | object | `{}` | Pod Security Context | -| readinessProbe.enabled | bool | `true` | Enable readinessProbe | -| readinessProbe.failureThreshold | int | `6` | Failure threshold for readinessProbe | -| readinessProbe.initialDelaySeconds | int | `30` | Initial delay seconds for readinessProbe | -| readinessProbe.path | string | `"/"` | Request path for readinessProbe | -| readinessProbe.periodSeconds | int | `5` | Period seconds for readinessProbe | -| readinessProbe.successThreshold | int | `1` | Success threshold for readinessProbe | -| readinessProbe.timeoutSeconds | int | `3` | Timeout seconds for readinessProbe | -| replicaCount | int | `1` | Number of replicas | -| resources.limits | object | `{}` | Pod limit | -| resources.requests | object | `{}` | Pod requests | -| runtimeClassName | string | `""` | Specify runtime class | -| securityContext | object | `{}` | Container Security Context | -| service.annotations | object | `{}` | Annotations to add to the service | -| service.loadBalancerIP | string | `nil` | Load Balancer IP address | -| service.nodePort | int | `31434` | Service node port when service type is 'NodePort' | -| service.port | int | `11434` | Service port | -| service.type | string | `"ClusterIP"` | Service type | -| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | -| serviceAccount.automount | bool | `true` | Automatically mount a ServiceAccount's API credentials? | -| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | -| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | -| tolerations | list | `[]` | Tolerations for pod assignment | -| topologySpreadConstraints | object | `{}` | Topology Spread Constraints for pod assignment | -| updateStrategy.type | string | `"Recreate"` | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate | -| volumeMounts | list | `[]` | Additional volumeMounts on the output Deployment definition. | -| volumes | list | `[]` | Additional volumes on the output Deployment definition. | | Key | Type | Default | Description | |-----|------|---------|-------------| | affinity | object | `{}` | Affinity for pod assignment | @@ -292,6 +200,7 @@ ollama: | ollama.gpu.nvidiaResource | string | `"nvidia.com/gpu"` | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice | | ollama.gpu.type | string | `"nvidia"` | GPU type: 'nvidia', 'amd' or 'intel' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images | | ollama.insecure | bool | `false` | Add insecure flag for pulling at container startup | +| ollama.models.create | list | `[]` | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create: - name: llama3.1-ctx32768 configMapRef: my-configmap configMapKeyRef: configmap-key - name: llama3.1-ctx32768 template: | FROM llama3.1 PARAMETER num_ctx 32768 | | ollama.models.pull | list | `[]` | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull: - llama2 - mistral | | ollama.models.run | list | `[]` | List of models to load in memory at container startup run: - llama2 - mistral | | ollama.mountPath | string | `""` | Override ollama-data volume mount path, default: "/root/.ollama" |