From 8acf80d0830eef88a6ec4646594d9ac9610ea724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pep=20Turr=C3=B3=20Mauri?= Date: Thu, 4 May 2023 17:59:43 +0200 Subject: [PATCH 1/4] Add a Job to pre-populate one model into the web UI's storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pep Turró Mauri --- manifests/README.md | 12 ++++++++---- manifests/text-generation-webui.yaml | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/manifests/README.md b/manifests/README.md index b30a934..c9dcf77 100644 --- a/manifests/README.md +++ b/manifests/README.md @@ -9,6 +9,7 @@ The manifest includes: - a `BuildConfig` to build an image from the repo, using s2i Python - a `Deployment` to deploy the built image - a `Service` and a `Route` to expose it +- a `Job` to pre-populate storage with one model to begin with ### Requirements @@ -20,14 +21,17 @@ That PVC must be populated with the models to serve, one per directory. **NOTE**: the provided `Deployment` is currently hardcoded to start up with `bloom-1b7`, so make sure to at least download that model into the PVC, or adjust the `Deployment` definition according to the models you have. -One way to populate the PVC is to use the [download-model.py script](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) from the text-generation-webui repository: +One way to populate the PVC is to use the [download-model.py script](https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py) from the text-generation-webui repository. + +The manitest has a `Job` that will dowload an initial model (`bloom-1b7`) into the PVC. The job uses the same image as the application, where the `download-model.py` script is, so it will not run until the image is built successfully. + +Besides the provided `Job`, models can be manually added to the PVC by running the command from the application pod itself: 1. Wait for your application's Pod to start up 2. Access your pod (either from the web console or via e.g. `oc rsh`). *Note*: if the pod is failing/restarting due to the lack of pre-loaded models you can use `oc debug` to create a temporary clone of the pod and perform the model download there -3. `cd models` -4. `python download-model.py bigscience/bloom-1b7` +3. Run `python download-model.py bigscience/bloom-1b7` -Repeat step 4 to download all the models you want. Here are the contents of a sample PVC after a few model downloads: +Repeat the last step to download all the models you want. Here are the contents of a sample PVC after a few model downloads: ``` (app-root) sh-4.4$ cd models diff --git a/manifests/text-generation-webui.yaml b/manifests/text-generation-webui.yaml index 71ad673..d519772 100644 --- a/manifests/text-generation-webui.yaml +++ b/manifests/text-generation-webui.yaml @@ -181,3 +181,31 @@ metadata: spec: lookupPolicy: local: false +--- +apiVersion: batch/v1 +kind: Job +metadata: + annotations: + alpha.image.policy.openshift.io/resolve-names: '*' + name: model-download +spec: + template: + metadata: + annotations: + alpha.image.policy.openshift.io/resolve-names: '*' + spec: + containers: + - name: model-download + image: text-generation-webui:latest + command: + - python + - download-model.py + - bigscience/bloom-1b7 + volumeMounts: + - mountPath: /opt/app-root/src/models + name: llms + restartPolicy: Never + volumes: + - name: llms + persistentVolumeClaim: + claimName: llms From 8d43d85e64feecd05d2463f2ffd912f568594476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pep=20Turr=C3=B3=20Mauri?= Date: Thu, 4 May 2023 21:33:24 +0200 Subject: [PATCH 2/4] Enable the API in the text generation app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current version needs an explicit flag to enable the API Signed-off-by: Pep Turró Mauri --- manifests/text-generation-webui.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/manifests/text-generation-webui.yaml b/manifests/text-generation-webui.yaml index d519772..777ddb6 100644 --- a/manifests/text-generation-webui.yaml +++ b/manifests/text-generation-webui.yaml @@ -90,6 +90,7 @@ spec: - python - server.py - --listen + - --api - --model - bloom-1b7 image: text-generation-webui:latest From 22f624c8e640fc94e65458d65fb9f780b25c7841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pep=20Turr=C3=B3=20Mauri?= Date: Fri, 5 May 2023 14:23:24 +0200 Subject: [PATCH 3/4] Create separate routes for API access to text generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pep Turró Mauri --- manifests/text-generation-webui.yaml | 56 +++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/manifests/text-generation-webui.yaml b/manifests/text-generation-webui.yaml index 777ddb6..5e41971 100644 --- a/manifests/text-generation-webui.yaml +++ b/manifests/text-generation-webui.yaml @@ -128,11 +128,15 @@ metadata: name: text-generation-webui spec: ports: - - name: 8080-tcp + - name: rest-api port: 8080 protocol: TCP - targetPort: 8080 - - name: 7860-tcp + targetPort: 5000 + - name: stream-api + port: 5005 + protocol: TCP + targetPort: 5005 + - name: web port: 7860 protocol: TCP targetPort: 7860 @@ -153,16 +157,56 @@ metadata: app.kubernetes.io/part-of: text-generation-webui-app name: text-generation-webui spec: + to: + kind: Service + name: text-generation-webui port: - targetPort: 7860-tcp + targetPort: web tls: insecureEdgeTerminationPolicy: Redirect termination: edge +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + labels: + app: text-generation-webui + app.kubernetes.io/component: text-generation-webui + app.kubernetes.io/instance: text-generation-webui + app.kubernetes.io/name: text-generation-webui + app.kubernetes.io/part-of: text-generation-webui-app + name: text-generation-api +spec: + path: "/api" to: kind: Service name: text-generation-webui - weight: 100 - wildcardPolicy: None + port: + targetPort: rest-api + tls: + insecureEdgeTerminationPolicy: Redirect + termination: edge +--- +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + labels: + app: text-generation-webui + app.kubernetes.io/component: text-generation-webui + app.kubernetes.io/instance: text-generation-webui + app.kubernetes.io/name: text-generation-webui + app.kubernetes.io/part-of: text-generation-webui-app + name: text-generation-stream +spec: + path: "/api/v1/stream" + to: + kind: Service + name: text-generation-webui + port: + targetPort: stream-api + tls: + insecureEdgeTerminationPolicy: Redirect + termination: edge --- apiVersion: image.openshift.io/v1 kind: ImageStream From 672de146826b60cdd2177c83ac623c1d60e0a1fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pep=20Turr=C3=B3=20Mauri?= Date: Fri, 5 May 2023 14:24:03 +0200 Subject: [PATCH 4/4] Fix path to the default model for text generation app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pep Turró Mauri --- manifests/text-generation-webui.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/text-generation-webui.yaml b/manifests/text-generation-webui.yaml index 5e41971..a9ab746 100644 --- a/manifests/text-generation-webui.yaml +++ b/manifests/text-generation-webui.yaml @@ -92,7 +92,7 @@ spec: - --listen - --api - --model - - bloom-1b7 + - bigscience_bloom-1b7 image: text-generation-webui:latest imagePullPolicy: Always name: text-generation-webui