From 2ca0fbb37da3d416b542bef970b0199c900f41d3 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 8 May 2024 05:32:51 +0100 Subject: [PATCH] Use Elia config file to set model --- examples/elia-tui-cpu.yaml | 72 +++++++++++++---------------------- examples/elia-tui.yaml | 70 +++++++++++++--------------------- examples/llama3-70b-gguf.yaml | 40 ++++++++++++++----- examples/llama3-8b-gguf.yaml | 38 +++++++++++++----- 4 files changed, 112 insertions(+), 108 deletions(-) diff --git a/examples/elia-tui-cpu.yaml b/examples/elia-tui-cpu.yaml index 038a168..3dc21ad 100644 --- a/examples/elia-tui-cpu.yaml +++ b/examples/elia-tui-cpu.yaml @@ -1,40 +1,4 @@ apiVersion: premlabs.io/v1alpha1 -kind: AIModelMap -metadata: - name: phi-2-chat -spec: - localai: - - variant: base - uri: "l3utterfly/phi-2-layla-v1-chatml-gguf" - # The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting - engineConfigFile: | - name: gpt-4-1106-preview - mmap: true - parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf - - template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}} - <|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} - context_size: 4096 - f16: true - stopwords: - - <|im_end|> - - - usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt-4-1106-preview", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' ---- -apiVersion: premlabs.io/v1alpha1 kind: AIDeployment metadata: name: phi-2-chat @@ -43,16 +7,14 @@ spec: engine: name: "localai" options: - imageTag: latest-cpu + imageTag: v2.15.0-ffmpeg-core endpoint: - port: 8080 domain: "phi-2-chat.127.0.0.1.nip.io" models: - - modelMapRef: - name: phi-2-chat - variant: base + - uri: phi-2-chat deployment: - resources: + resources: requests: cpu: 4 memory: 8Gi @@ -60,6 +22,18 @@ spec: cpu: 32 memory: "16Gi" --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: model-config +data: + config.toml: | + default_model="openai/phi-2-chat" + + [[models]] + name="openai/phi-2-chat" + api_base="http://phi-2-chat:8080/v1" +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -76,11 +50,19 @@ spec: spec: containers: - name: elia - image: premai/elia - env: - - name: OPENAI_API_BASE - value: "http://phi-2-chat:8080" + image: premai/elia:1.7.0 ports: - containerPort: 3000 stdin: true tty: true + volumeMounts: + - name: config-volume + mountPath: /root/.config/elia + readOnly: true + volumes: + - name: config-volume + configMap: + name: model-config + items: + - key: config.toml + path: config.toml diff --git a/examples/elia-tui.yaml b/examples/elia-tui.yaml index 18f2bad..ad7b4a6 100644 --- a/examples/elia-tui.yaml +++ b/examples/elia-tui.yaml @@ -1,40 +1,4 @@ apiVersion: premlabs.io/v1alpha1 -kind: AIModelMap -metadata: - name: phi-2-chat -spec: - localai: - - variant: base - uri: "l3utterfly/phi-2-layla-v1-chatml-gguf" - # The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting - engineConfigFile: | - name: gpt-4-1106-preview - mmap: true - parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf - - template: - chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} - {{if .Content}}{{.Content}}{{end}} - <|im_end|> - chat: | - {{.Input}} - <|im_start|>assistant - completion: | - {{.Input}} - context_size: 4096 - f16: true - stopwords: - - <|im_end|> - - - usage: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "gpt-4-1106-preview", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' ---- -apiVersion: premlabs.io/v1alpha1 kind: AIDeployment metadata: name: phi-2-chat @@ -43,14 +7,12 @@ spec: engine: name: "localai" options: - imageTag: v2.12.4-cublas-cuda12-ffmpeg + imageTag: v2.15.0-cublas-cuda12-ffmpeg endpoint: - port: 8080 domain: "phi-2-chat.127.0.0.1.nip.io" models: - - modelMapRef: - name: phi-2-chat - variant: base + - uri: phi-2-chat deployment: accelerator: interface: "CUDA" @@ -64,6 +26,18 @@ spec: cpu: 32 memory: "16Gi" --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: model-config +data: + config.toml: | + default_model="openai/phi-2-chat" + + [[models]] + name="openai/phi-2-chat" + api_base="http://phi-2-chat:8080/v1" +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -80,11 +54,19 @@ spec: spec: containers: - name: elia - image: premai/elia - env: - - name: OPENAI_API_BASE - value: "http://phi-2-chat:8080" + image: premai/elia:1.7.0 ports: - containerPort: 3000 stdin: true tty: true + volumeMounts: + - name: config-volume + mountPath: /root/.config/elia + readOnly: true + volumes: + - name: config-volume + configMap: + name: model-config + items: + - key: config.toml + path: config.toml diff --git a/examples/llama3-70b-gguf.yaml b/examples/llama3-70b-gguf.yaml index 498d991..35ff5bd 100644 --- a/examples/llama3-70b-gguf.yaml +++ b/examples/llama3-70b-gguf.yaml @@ -7,7 +7,7 @@ spec: - variant: base uri: "huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf" engineConfigFile: | - name: gpt-4-1106-preview + name: llama-3-70b-gguf mmap: true parameters: model: huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf @@ -50,7 +50,7 @@ spec: major: 7 startupProbe: initialDelaySeconds: 300 - periodSeconds: 1 + periodSeconds: 5 timeoutSeconds: 5 failureThreshold: 300 resources: @@ -64,6 +64,18 @@ spec: - name: "DEBUG" value: "true" --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: model-config +data: + config.toml: | + default_model="openai/llama-3-70b-gguf" + + [[models]] + name="openai/llama-3-70b-gguf" + api_base="http://llama-3-70b-gguf:8080/v1" +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -79,15 +91,23 @@ spec: app: llama-3-tui spec: containers: - - name: llama-3-tui - image: premai/elia - env: - - name: OPENAI_API_BASE - value: "http://llama-3-70b-gguf:8080" + - name: elia + image: premai/elia:1.7.0 ports: - containerPort: 3000 stdin: true tty: true + volumeMounts: + - name: config-volume + mountPath: /root/.config/elia + readOnly: true + volumes: + - name: config-volume + configMap: + name: model-config + items: + - key: config.toml + path: config.toml --- apiVersion: v1 kind: ConfigMap @@ -95,11 +115,11 @@ metadata: name: llama-3-cli-conf data: extra-openai-models.yaml: | - - model_id: gpt-4-1106-preview - model_name: gpt-4-1106-preview + - model_id: llama-3-70b-gguf + model_name: llama-3-70b-gguf api_base: http://llama-3-70b-gguf:8080 default_model.txt: | - gpt-4-1106-preview + llama-3-70b-gguf --- apiVersion: apps/v1 kind: Deployment diff --git a/examples/llama3-8b-gguf.yaml b/examples/llama3-8b-gguf.yaml index ab4aa01..ad75a0a 100644 --- a/examples/llama3-8b-gguf.yaml +++ b/examples/llama3-8b-gguf.yaml @@ -7,7 +7,7 @@ spec: - variant: base uri: "huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf" engineConfigFile: | - name: gpt-4-1106-preview + name: llama-3-8b-gguf mmap: true parameters: model: huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf @@ -59,6 +59,18 @@ spec: - name: "DEBUG" value: "true" --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: model-config +data: + config.toml: | + default_model="openai/llama-3-8b-gguf" + + [[models]] + name="openai/llama-3-8b-gguf" + api_base="http://llama-3-8b-gguf:8080/v1" +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -74,15 +86,23 @@ spec: app: llama-3-tui spec: containers: - - name: llama-3-tui - image: premai/elia - env: - - name: OPENAI_API_BASE - value: "http://llama-3-8b-gguf:8080" + - name: elia + image: premai/elia:1.7.0 ports: - containerPort: 3000 stdin: true tty: true + volumeMounts: + - name: config-volume + mountPath: /root/.config/elia + readOnly: true + volumes: + - name: config-volume + configMap: + name: model-config + items: + - key: config.toml + path: config.toml --- apiVersion: v1 kind: ConfigMap @@ -90,11 +110,11 @@ metadata: name: llama-3-cli-conf data: extra-openai-models.yaml: | - - model_id: gpt-4-1106-preview - model_name: gpt-4-1106-preview + - model_id: llama-3-8b-gguf + model_name: llama-3-8b-gguf api_base: http://llama-3-8b-gguf:8080 default_model.txt: | - gpt-4-1106-preview + llama-3-8b-gguf --- apiVersion: apps/v1 kind: Deployment