Merge pull request #42 from codificat/model-job

Add a Job to pre-populate a model into the web UI's storage
redhat-et · May 5, 2023 · c89b2a7 · c89b2a7
2 parents a55d14a + 672de14
commit c89b2a7
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 11 deletions.
diff --git a/manifests/README.md b/manifests/README.md
@@ -9,6 +9,7 @@ The manifest includes:
 - a `BuildConfig` to build an image from the repo, using s2i Python
 - a `Deployment` to deploy the built image
 - a `Service` and a `Route` to expose it
+- a `Job` to pre-populate storage with one model to begin with
 
 ### Requirements
 
@@ -20,14 +21,17 @@ That PVC must be populated with the models to serve, one per directory.
 
 **NOTE**: the provided `Deployment` is currently hardcoded to start up with `bloom-1b7`, so make sure to at least download that model into the PVC, or adjust the `Deployment` definition according to the models you have.
 
-One way to populate the PVC is to use the [download-model.py script](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) from the text-generation-webui repository:
+One way to populate the PVC is to use the [download-model.py script](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) from the text-generation-webui repository.
+
+The manitest has a `Job` that will dowload an initial model (`bloom-1b7`) into the PVC. The job uses the same image as the application, where the `download-model.py` script is, so it will not run until the image is built successfully.
+
+Besides the provided `Job`, models can be manually added to the PVC by running the command from the application pod itself:
 
 1. Wait for your application's Pod to start up
 2. Access your pod (either from the web console or via e.g. `oc rsh`). *Note*: if the pod is failing/restarting due to the lack of pre-loaded models you can use `oc debug` to create a temporary clone of the pod and perform the model download there
-3. `cd models`
-4. `python download-model.py bigscience/bloom-1b7`
+3. Run `python download-model.py bigscience/bloom-1b7`
 
-Repeat step 4 to download all the models you want. Here are the contents of a sample PVC after a few model downloads:
+Repeat the last step to download all the models you want. Here are the contents of a sample PVC after a few model downloads:
 
 ```
 (app-root) sh-4.4$ cd models

diff --git a/manifests/text-generation-webui.yaml b/manifests/text-generation-webui.yaml
@@ -90,8 +90,9 @@ spec:
         - python
         - server.py
         - --listen
+        - --api
         - --model
-        - bloom-1b7
+        - bigscience_bloom-1b7
         image: text-generation-webui:latest
         imagePullPolicy: Always
         name: text-generation-webui
@@ -127,11 +128,15 @@ metadata:
   name: text-generation-webui
 spec:
   ports:
-  - name: 8080-tcp
+  - name: rest-api
     port: 8080
     protocol: TCP
-    targetPort: 8080
-  - name: 7860-tcp
+    targetPort: 5000
+  - name: stream-api
+    port: 5005
+    protocol: TCP
+    targetPort: 5005
+  - name: web
     port: 7860
     protocol: TCP
     targetPort: 7860
@@ -152,16 +157,56 @@ metadata:
     app.kubernetes.io/part-of: text-generation-webui-app
   name: text-generation-webui
 spec:
+  to:
+    kind: Service
+    name: text-generation-webui
   port:
-    targetPort: 7860-tcp
+    targetPort: web
   tls:
     insecureEdgeTerminationPolicy: Redirect
     termination: edge
+---
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  labels:
+    app: text-generation-webui
+    app.kubernetes.io/component: text-generation-webui
+    app.kubernetes.io/instance: text-generation-webui
+    app.kubernetes.io/name: text-generation-webui
+    app.kubernetes.io/part-of: text-generation-webui-app
+  name: text-generation-api
+spec:
+  path: "/api"
+  to:
+    kind: Service
+    name: text-generation-webui
+  port:
+    targetPort: rest-api
+  tls:
+    insecureEdgeTerminationPolicy: Redirect
+    termination: edge
+---
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  labels:
+    app: text-generation-webui
+    app.kubernetes.io/component: text-generation-webui
+    app.kubernetes.io/instance: text-generation-webui
+    app.kubernetes.io/name: text-generation-webui
+    app.kubernetes.io/part-of: text-generation-webui-app
+  name: text-generation-stream
+spec:
+  path: "/api/v1/stream"
   to:
     kind: Service
     name: text-generation-webui
-    weight: 100
-  wildcardPolicy: None
+  port:
+    targetPort: stream-api
+  tls:
+    insecureEdgeTerminationPolicy: Redirect
+    termination: edge
 ---
 apiVersion: image.openshift.io/v1
 kind: ImageStream
@@ -181,3 +226,31 @@ metadata:
 spec:
   lookupPolicy:
     local: false
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  annotations:
+    alpha.image.policy.openshift.io/resolve-names: '*'
+  name: model-download
+spec:
+  template:
+    metadata:
+      annotations:
+        alpha.image.policy.openshift.io/resolve-names: '*'
+    spec:
+      containers:
+      - name: model-download
+        image: text-generation-webui:latest
+        command:
+          - python
+          - download-model.py
+          - bigscience/bloom-1b7
+        volumeMounts:
+        - mountPath: /opt/app-root/src/models
+          name: llms
+      restartPolicy: Never
+      volumes:
+      - name: llms
+        persistentVolumeClaim:
+          claimName: llms