Use Elia config file to set model

premAI-io · May 23, 2024 · 2ca0fbb · 2ca0fbb
1 parent 6f80b81
commit 2ca0fbb
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 108 deletions.
diff --git a/examples/elia-tui-cpu.yaml b/examples/elia-tui-cpu.yaml
@@ -1,40 +1,4 @@
 apiVersion: premlabs.io/v1alpha1
-kind: AIModelMap
-metadata:
-  name: phi-2-chat
-spec:
-  localai:
-      - variant: base
-        uri: "l3utterfly/phi-2-layla-v1-chatml-gguf"
-        # The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting
-        engineConfigFile: |
-          name: gpt-4-1106-preview
-          mmap: true
-          parameters:
-            model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-          template:
-            chat_message: |
-              <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-              {{if .Content}}{{.Content}}{{end}}
-              <|im_end|>
-            chat: |
-              {{.Input}}
-              <|im_start|>assistant
-            completion: |
-              {{.Input}}
-          context_size: 4096
-          f16: true
-          stopwords:
-          - <|im_end|>
-          - <dummy32000>
-          usage: |
-                curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-                    "model": "gpt-4-1106-preview",
-                    "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-                }'
----
-apiVersion: premlabs.io/v1alpha1
 kind: AIDeployment 
 metadata:
   name: phi-2-chat
@@ -43,23 +7,33 @@ spec:
   engine:
     name: "localai" 
     options:
-      imageTag: latest-cpu
+      imageTag: v2.15.0-ffmpeg-core
   endpoint:
     - port: 8080 
       domain: "phi-2-chat.127.0.0.1.nip.io"
   models:
-    - modelMapRef:
-        name: phi-2-chat
-        variant: base
+    - uri: phi-2-chat
   deployment:
-   resources:
+    resources:
       requests:
         cpu: 4
         memory: 8Gi
       limits:
         cpu: 32
         memory: "16Gi"
 ---          
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: model-config
+data:
+  config.toml: |
+    default_model="openai/phi-2-chat"
+    
+    [[models]]
+    name="openai/phi-2-chat"
+    api_base="http://phi-2-chat:8080/v1"
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -76,11 +50,19 @@ spec:
     spec:
       containers:
       - name: elia
-        image: premai/elia
-        env:
-        - name: OPENAI_API_BASE
-          value: "http://phi-2-chat:8080"
+        image: premai/elia:1.7.0
         ports:
         - containerPort: 3000
         stdin: true
         tty: true
+        volumeMounts:
+        - name: config-volume
+          mountPath: /root/.config/elia
+          readOnly: true
+      volumes:
+      - name: config-volume
+        configMap:
+          name: model-config
+          items:
+          - key: config.toml
+            path: config.toml
diff --git a/examples/elia-tui.yaml b/examples/elia-tui.yaml
@@ -1,40 +1,4 @@
 apiVersion: premlabs.io/v1alpha1
-kind: AIModelMap
-metadata:
-  name: phi-2-chat
-spec:
-  localai:
-      - variant: base
-        uri: "l3utterfly/phi-2-layla-v1-chatml-gguf"
-        # The LocalAI model description. Note that the model is called gpt-4-turbo because Elia has the models hardcoded at the time of writting
-        engineConfigFile: |
-          name: gpt-4-1106-preview
-          mmap: true
-          parameters:
-            model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-          template:
-            chat_message: |
-              <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-              {{if .Content}}{{.Content}}{{end}}
-              <|im_end|>
-            chat: |
-              {{.Input}}
-              <|im_start|>assistant
-            completion: |
-              {{.Input}}
-          context_size: 4096
-          f16: true
-          stopwords:
-          - <|im_end|>
-          - <dummy32000>
-          usage: |
-                curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-                    "model": "gpt-4-1106-preview",
-                    "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-                }'
----
-apiVersion: premlabs.io/v1alpha1
 kind: AIDeployment 
 metadata:
   name: phi-2-chat
@@ -43,14 +7,12 @@ spec:
   engine:
     name: "localai" 
     options:
-      imageTag: v2.12.4-cublas-cuda12-ffmpeg
+      imageTag: v2.15.0-cublas-cuda12-ffmpeg
   endpoint:
     - port: 8080 
       domain: "phi-2-chat.127.0.0.1.nip.io"
   models:
-    - modelMapRef:
-        name: phi-2-chat
-        variant: base
+    - uri: phi-2-chat
   deployment:
     accelerator:
       interface: "CUDA"
@@ -64,6 +26,18 @@ spec:
         cpu: 32
         memory: "16Gi"
 ---          
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: model-config
+data:
+  config.toml: |
+    default_model="openai/phi-2-chat"
+    
+    [[models]]
+    name="openai/phi-2-chat"
+    api_base="http://phi-2-chat:8080/v1"
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -80,11 +54,19 @@ spec:
     spec:
       containers:
       - name: elia
-        image: premai/elia
-        env:
-        - name: OPENAI_API_BASE
-          value: "http://phi-2-chat:8080"
+        image: premai/elia:1.7.0
         ports:
         - containerPort: 3000
         stdin: true
         tty: true
+        volumeMounts:
+        - name: config-volume
+          mountPath: /root/.config/elia
+          readOnly: true
+      volumes:
+      - name: config-volume
+        configMap:
+          name: model-config
+          items:
+          - key: config.toml
+            path: config.toml
diff --git a/examples/llama3-70b-gguf.yaml b/examples/llama3-70b-gguf.yaml
@@ -7,7 +7,7 @@ spec:
       - variant: base
         uri: "huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf"
         engineConfigFile: |
-          name: gpt-4-1106-preview
+          name: llama-3-70b-gguf
           mmap: true
           parameters:
             model: huggingface://bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf
@@ -50,7 +50,7 @@ spec:
         major: 7
     startupProbe:
       initialDelaySeconds: 300
-      periodSeconds: 1
+      periodSeconds: 5
       timeoutSeconds: 5
       failureThreshold: 300
     resources:
@@ -64,6 +64,18 @@ spec:
     - name: "DEBUG"
       value: "true"
 --- 
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: model-config
+data:
+  config.toml: |
+    default_model="openai/llama-3-70b-gguf"
+    
+    [[models]]
+    name="openai/llama-3-70b-gguf"
+    api_base="http://llama-3-70b-gguf:8080/v1"
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -79,27 +91,35 @@ spec:
         app: llama-3-tui
     spec:
       containers:
-      - name: llama-3-tui
-        image: premai/elia
-        env:
-        - name: OPENAI_API_BASE
-          value: "http://llama-3-70b-gguf:8080"
+      - name: elia
+        image: premai/elia:1.7.0
         ports:
         - containerPort: 3000
         stdin: true
         tty: true
+        volumeMounts:
+        - name: config-volume
+          mountPath: /root/.config/elia
+          readOnly: true
+      volumes:
+      - name: config-volume
+        configMap:
+          name: model-config
+          items:
+          - key: config.toml
+            path: config.toml
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: llama-3-cli-conf
 data:
   extra-openai-models.yaml: |
-    - model_id: gpt-4-1106-preview
-      model_name: gpt-4-1106-preview
+    - model_id: llama-3-70b-gguf
+      model_name: llama-3-70b-gguf
       api_base: http://llama-3-70b-gguf:8080
   default_model.txt: |
-    gpt-4-1106-preview
+    llama-3-70b-gguf
 ---
 apiVersion: apps/v1
 kind: Deployment

diff --git a/examples/llama3-8b-gguf.yaml b/examples/llama3-8b-gguf.yaml
@@ -7,7 +7,7 @@ spec:
       - variant: base
         uri: "huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf"
         engineConfigFile: |
-          name: gpt-4-1106-preview
+          name: llama-3-8b-gguf
           mmap: true
           parameters:
             model: huggingface://bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q8_0.gguf
@@ -59,6 +59,18 @@ spec:
     - name: "DEBUG"
       value: "true"
 --- 
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: model-config
+data:
+  config.toml: |
+    default_model="openai/llama-3-8b-gguf"
+    
+    [[models]]
+    name="openai/llama-3-8b-gguf"
+    api_base="http://llama-3-8b-gguf:8080/v1"
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -74,27 +86,35 @@ spec:
         app: llama-3-tui
     spec:
       containers:
-      - name: llama-3-tui
-        image: premai/elia
-        env:
-        - name: OPENAI_API_BASE
-          value: "http://llama-3-8b-gguf:8080"
+      - name: elia
+        image: premai/elia:1.7.0
         ports:
         - containerPort: 3000
         stdin: true
         tty: true
+        volumeMounts:
+        - name: config-volume
+          mountPath: /root/.config/elia
+          readOnly: true
+      volumes:
+      - name: config-volume
+        configMap:
+          name: model-config
+          items:
+          - key: config.toml
+            path: config.toml
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: llama-3-cli-conf
 data:
   extra-openai-models.yaml: |
-    - model_id: gpt-4-1106-preview
-      model_name: gpt-4-1106-preview
+    - model_id: llama-3-8b-gguf
+      model_name: llama-3-8b-gguf
       api_base: http://llama-3-8b-gguf:8080
   default_model.txt: |
-    gpt-4-1106-preview
+    llama-3-8b-gguf
 ---
 apiVersion: apps/v1
 kind: Deployment