Skip to content

Commit

Permalink
Use Elia config file to set model
Browse files Browse the repository at this point in the history
  • Loading branch information
richiejp committed May 23, 2024
1 parent 6f80b81 commit 2ca0fbb
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 108 deletions.
72 changes: 27 additions & 45 deletions examples/elia-tui-cpu.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,4 @@
apiVersion: premlabs.io/v1alpha1
kind: AIModelMap
metadata:
name: phi-2-chat
spec:
localai:
- variant: base
uri: "l3utterfly/phi-2-layla-v1-chatml-gguf"
# The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting
engineConfigFile: |
name: gpt-4-1106-preview
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-1106-preview",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
---
apiVersion: premlabs.io/v1alpha1
kind: AIDeployment
metadata:
name: phi-2-chat
Expand All @@ -43,23 +7,33 @@ spec:
engine:
name: "localai"
options:
imageTag: latest-cpu
imageTag: v2.15.0-ffmpeg-core
endpoint:
- port: 8080
domain: "phi-2-chat.127.0.0.1.nip.io"
models:
- modelMapRef:
name: phi-2-chat
variant: base
- uri: phi-2-chat
deployment:
resources:
resources:
requests:
cpu: 4
memory: 8Gi
limits:
cpu: 32
memory: "16Gi"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: model-config
data:
config.toml: |
default_model="openai/phi-2-chat"
[[models]]
name="openai/phi-2-chat"
api_base="http://phi-2-chat:8080/v1"
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -76,11 +50,19 @@ spec:
spec:
containers:
- name: elia
image: premai/elia
env:
- name: OPENAI_API_BASE
value: "http://phi-2-chat:8080"
image: premai/elia:1.7.0
ports:
- containerPort: 3000
stdin: true
tty: true
volumeMounts:
- name: config-volume
mountPath: /root/.config/elia
readOnly: true
volumes:
- name: config-volume
configMap:
name: model-config
items:
- key: config.toml
path: config.toml
70 changes: 26 additions & 44 deletions examples/elia-tui.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,4 @@
apiVersion: premlabs.io/v1alpha1
kind: AIModelMap
metadata:
name: phi-2-chat
spec:
localai:
- variant: base
uri: "l3utterfly/phi-2-layla-v1-chatml-gguf"
# The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting
engineConfigFile: |
name: gpt-4-1106-preview
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-1106-preview",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'
---
apiVersion: premlabs.io/v1alpha1
kind: AIDeployment
metadata:
name: phi-2-chat
Expand All @@ -43,14 +7,12 @@ spec:
engine:
name: "localai"
options:
imageTag: v2.12.4-cublas-cuda12-ffmpeg
imageTag: v2.15.0-cublas-cuda12-ffmpeg
endpoint:
- port: 8080
domain: "phi-2-chat.127.0.0.1.nip.io"
models:
- modelMapRef:
name: phi-2-chat
variant: base
- uri: phi-2-chat
deployment:
accelerator:
interface: "CUDA"
Expand All @@ -64,6 +26,18 @@ spec:
cpu: 32
memory: "16Gi"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: model-config
data:
config.toml: |
default_model="openai/phi-2-chat"
[[models]]
name="openai/phi-2-chat"
api_base="http://phi-2-chat:8080/v1"
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -80,11 +54,19 @@ spec:
spec:
containers:
- name: elia
image: premai/elia
env:
- name: OPENAI_API_BASE
value: "http://phi-2-chat:8080"
image: premai/elia:1.7.0
ports:
- containerPort: 3000
stdin: true
tty: true
volumeMounts:
- name: config-volume
mountPath: /root/.config/elia
readOnly: true
volumes:
- name: config-volume
configMap:
name: model-config
items:
- key: config.toml
path: config.toml
40 changes: 30 additions & 10 deletions examples/llama3-70b-gguf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
- variant: base
uri: "huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf"
engineConfigFile: |
name: gpt-4-1106-preview
name: llama-3-70b-gguf
mmap: true
parameters:
model: huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf
Expand Down Expand Up @@ -50,7 +50,7 @@ spec:
major: 7
startupProbe:
initialDelaySeconds: 300
periodSeconds: 1
periodSeconds: 5
timeoutSeconds: 5
failureThreshold: 300
resources:
Expand All @@ -64,6 +64,18 @@ spec:
- name: "DEBUG"
value: "true"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: model-config
data:
config.toml: |
default_model="openai/llama-3-70b-gguf"
[[models]]
name="openai/llama-3-70b-gguf"
api_base="http://llama-3-70b-gguf:8080/v1"
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -79,27 +91,35 @@ spec:
app: llama-3-tui
spec:
containers:
- name: llama-3-tui
image: premai/elia
env:
- name: OPENAI_API_BASE
value: "http://llama-3-70b-gguf:8080"
- name: elia
image: premai/elia:1.7.0
ports:
- containerPort: 3000
stdin: true
tty: true
volumeMounts:
- name: config-volume
mountPath: /root/.config/elia
readOnly: true
volumes:
- name: config-volume
configMap:
name: model-config
items:
- key: config.toml
path: config.toml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: llama-3-cli-conf
data:
extra-openai-models.yaml: |
- model_id: gpt-4-1106-preview
model_name: gpt-4-1106-preview
- model_id: llama-3-70b-gguf
model_name: llama-3-70b-gguf
api_base: http://llama-3-70b-gguf:8080
default_model.txt: |
gpt-4-1106-preview
llama-3-70b-gguf
---
apiVersion: apps/v1
kind: Deployment
Expand Down
38 changes: 29 additions & 9 deletions examples/llama3-8b-gguf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
- variant: base
uri: "huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf"
engineConfigFile: |
name: gpt-4-1106-preview
name: llama-3-8b-gguf
mmap: true
parameters:
model: huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf
Expand Down Expand Up @@ -59,6 +59,18 @@ spec:
- name: "DEBUG"
value: "true"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: model-config
data:
config.toml: |
default_model="openai/llama-3-8b-gguf"
[[models]]
name="openai/llama-3-8b-gguf"
api_base="http://llama-3-8b-gguf:8080/v1"
---
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -74,27 +86,35 @@ spec:
app: llama-3-tui
spec:
containers:
- name: llama-3-tui
image: premai/elia
env:
- name: OPENAI_API_BASE
value: "http://llama-3-8b-gguf:8080"
- name: elia
image: premai/elia:1.7.0
ports:
- containerPort: 3000
stdin: true
tty: true
volumeMounts:
- name: config-volume
mountPath: /root/.config/elia
readOnly: true
volumes:
- name: config-volume
configMap:
name: model-config
items:
- key: config.toml
path: config.toml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: llama-3-cli-conf
data:
extra-openai-models.yaml: |
- model_id: gpt-4-1106-preview
model_name: gpt-4-1106-preview
- model_id: llama-3-8b-gguf
model_name: llama-3-8b-gguf
api_base: http://llama-3-8b-gguf:8080
default_model.txt: |
gpt-4-1106-preview
llama-3-8b-gguf
---
apiVersion: apps/v1
kind: Deployment
Expand Down

0 comments on commit 2ca0fbb

Please sign in to comment.