Skip to content

Commit 52ca619

Browse files
authored
Fix #11 - Revert model comparison to Flan-T5 (#26)
* Revert to Flan-T5 Remove CUDA from container update packages and switch to CPU torch update workbenches IS update deployment switch back to 1.3 update wb image * update conclusion
1 parent 67fa1df commit 52ca619

16 files changed

+1311
-1354
lines changed

bootstrap/ic-rhoai-configuration/images-puller.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ spec:
1919
spec:
2020
containers:
2121
- name: ic-workbench
22-
image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.0.1
22+
image: image-registry.openshift-image-registry.svc:5000/redhat-ods-applications/ic-workbench:2.1.0
2323
command: ["tail"]
2424
args: ["-f", "/dev/null"]
2525
resources:

bootstrap/ic-rhoai-configuration/workbench-imagestream.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ metadata:
99
internal.config.kubernetes.io/previousKinds: ImageStream
1010
opendatahub.io/notebook-image-name: CUSTOM - Insurance Claim Processing Lab Workbench
1111
internal.config.kubernetes.io/previousNames: CUSTOM - Insurance Claim Processing Lab Workbench
12-
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
12+
opendatahub.io/recommended-accelerators: ''
1313
opendatahub.io/notebook-image-desc: >-
1414
Jupyter notebook image with all the libraries needed for the OpenShift AI Insurance Claim Lab.
1515
argocd.argoproj.io/sync-wave: "1"
@@ -25,17 +25,17 @@ spec:
2525
lookupPolicy:
2626
local: true
2727
tags:
28-
- name: '2.0.1'
28+
- name: '2.1.0'
2929
annotations:
3030
opendatahub.io/notebook-python-dependencies: >-
31-
[{"name":"PyTorch","version":"2.1.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.29"},]
31+
[{"name":"PyTorch","version":"2.2.2"},{"name":"Langchain","version":"0.1.12"},{"name":"Ultralytics","version":"8.1.47"},]
3232
opendatahub.io/notebook-software: >-
33-
[{"name":"CUDA","version":"12.1"},{"name":"Python","version":"v3.11"}]
33+
[{"name":"Python","version":"v3.11"}]
3434
openshift.io/imported-from: quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench
3535
from:
3636
kind: DockerImage
3737
name: >-
38-
quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.0.1
38+
quay.io/rh-aiservices-bu/rhoai-lab-insurance-claim-workbench:2.1.0
3939
importPolicy:
4040
importMode: Legacy
4141
referencePolicy:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
kind: Deployment
2+
apiVersion: apps/v1
3+
metadata:
4+
name: llm-flant5
5+
namespace: ic-shared-llm
6+
labels:
7+
app: llm-flant5
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: llm-flant5
13+
template:
14+
metadata:
15+
creationTimestamp: null
16+
labels:
17+
app: llm-flant5
18+
spec:
19+
restartPolicy: Always
20+
schedulerName: default-scheduler
21+
affinity: {}
22+
terminationGracePeriodSeconds: 120
23+
securityContext: {}
24+
containers:
25+
- resources:
26+
limits:
27+
cpu: '4'
28+
memory: 8Gi
29+
requests:
30+
cpu: '2'
31+
memory: 6Gi
32+
readinessProbe:
33+
httpGet:
34+
path: /health
35+
port: http
36+
scheme: HTTP
37+
timeoutSeconds: 5
38+
periodSeconds: 30
39+
successThreshold: 1
40+
failureThreshold: 3
41+
terminationMessagePath: /dev/termination-log
42+
name: server
43+
livenessProbe:
44+
httpGet:
45+
path: /health
46+
port: http
47+
scheme: HTTP
48+
timeoutSeconds: 8
49+
periodSeconds: 100
50+
successThreshold: 1
51+
failureThreshold: 3
52+
env:
53+
- name: MODEL_ID
54+
value: google/flan-t5-large
55+
- name: MAX_INPUT_LENGTH
56+
value: '416'
57+
- name: MAX_TOTAL_TOKENS
58+
value: '512'
59+
- name: HUGGINGFACE_HUB_CACHE
60+
value: /models-cache
61+
- name: PORT
62+
value: '3000'
63+
- name: HOSTNAME
64+
value: '0.0.0.0'
65+
securityContext:
66+
capabilities:
67+
drop:
68+
- ALL
69+
runAsNonRoot: true
70+
allowPrivilegeEscalation: false
71+
seccompProfile:
72+
type: RuntimeDefault
73+
ports:
74+
- name: http
75+
containerPort: 3000
76+
protocol: TCP
77+
imagePullPolicy: IfNotPresent
78+
startupProbe:
79+
httpGet:
80+
path: /health
81+
port: http
82+
scheme: HTTP
83+
timeoutSeconds: 1
84+
periodSeconds: 30
85+
successThreshold: 1
86+
failureThreshold: 24
87+
initialDelaySeconds: 60
88+
volumeMounts:
89+
- name: models-cache
90+
mountPath: /models-cache
91+
- name: shm
92+
mountPath: /dev/shm
93+
terminationMessagePolicy: File
94+
image: 'ghcr.io/huggingface/text-generation-inference:1.3'
95+
volumes:
96+
- name: models-cache
97+
persistentVolumeClaim:
98+
claimName: models-cache-hftgi
99+
- name: shm
100+
emptyDir:
101+
medium: Memory
102+
sizeLimit: 1Gi
103+
dnsPolicy: ClusterFirst
104+
strategy:
105+
type: Recreate
106+
revisionHistoryLimit: 10
107+
progressDeadlineSeconds: 600

bootstrap/ic-shared-llm/deployment-ollama.yaml

-81
This file was deleted.

bootstrap/ic-shared-llm/kustomization.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ resources:
1111
- fix-odf-config.yaml
1212
# wave 1
1313
- pvc.yaml
14-
- pvc-ollama.yaml
14+
- pvc-hftgi.yaml
1515
- deployment.yaml
1616
- service.yaml
17-
- deployment-ollama.yaml
18-
- service-ollama.yaml
17+
- deployment-hftgi.yaml
18+
- service-hftgi.yaml
1919
# wave 2

bootstrap/ic-shared-llm/pvc-ollama.yaml bootstrap/ic-shared-llm/pvc-hftgi.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
apiVersion: v1
33
kind: PersistentVolumeClaim
44
metadata:
5-
name: ollama-cache
5+
name: models-cache-hftgi
66
namespace: ic-shared-llm
77
labels:
8-
app: ollama
8+
app: ic-shared-llm
99
annotations:
1010
argocd.argoproj.io/sync-wave: "0"
1111
spec:
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
kind: Service
22
apiVersion: v1
33
metadata:
4-
name: ollama
4+
name: llm-flant5
55
namespace: ic-shared-llm
66
labels:
7-
app: ollama
7+
app: llm-flant5
88
spec:
99
clusterIP: None
1010
ipFamilies:
1111
- IPv4
1212
ports:
1313
- name: http
1414
protocol: TCP
15-
port: 11434
15+
port: 3000
1616
targetPort: http
1717
type: ClusterIP
1818
ipFamilyPolicy: SingleStack
1919
sessionAffinity: None
2020
selector:
21-
app: ollama
21+
app: llm-flant5

bootstrap/ic-user-projects/create-projects-and-resources-job.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ spec:
3333
3434
# Define some variables
3535
WORKBENCH_NAME="my-workbench"
36-
WORKBENCH_IMAGE="ic-workbench:2.0.1"
36+
WORKBENCH_IMAGE="ic-workbench:2.1.0"
3737
PIPELINE_ENGINE="Tekton"
3838
3939
for i in $(seq 1 $user_count);

bootstrap/ic-user-projects/create-projects-and-resources.bash

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ DASHBOARD_ROUTE=https://$(oc get route rhods-dashboard -n redhat-ods-application
1010

1111
# Define some variables
1212
WORKBENCH_NAME="my-workbench"
13-
WORKBENCH_IMAGE="ic-workbench:2.0.1"
13+
WORKBENCH_IMAGE="ic-workbench:2.1.0"
1414
PIPELINE_ENGINE="Tekton"
1515

1616
for i in $(seq 1 $user_count);

bootstrap/workbench-image/Containerfile

-79
Original file line numberDiff line numberDiff line change
@@ -25,85 +25,6 @@ RUN echo "tsflags=nodocs" | tee -a /etc/yum.conf && \
2525
# End of OS Packages #
2626
#############################################
2727

28-
###################################################################################################
29-
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
30-
###################################################################################################
31-
32-
USER 0
33-
34-
ENV NVARCH x86_64
35-
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
36-
ENV NV_CUDA_CUDART_VERSION 12.1.105-1
37-
38-
COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo
39-
40-
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
41-
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
42-
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
43-
44-
ENV CUDA_VERSION 12.1.1
45-
46-
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
47-
RUN yum upgrade -y && yum install -y \
48-
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
49-
cuda-compat-12-1 \
50-
&& ln -s cuda-12.1 /usr/local/cuda \
51-
&& yum -y clean all --enablerepo='*' && \
52-
rm -rf /var/cache/dnf && \
53-
find /var/log -type f -name "*.log" -exec rm -f {} \;
54-
55-
# nvidia-docker 1.0
56-
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
57-
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
58-
59-
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
60-
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
61-
62-
COPY NGC-DL-CONTAINER-LICENSE /
63-
64-
# nvidia-container-runtime
65-
ENV NVIDIA_VISIBLE_DEVICES all
66-
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
67-
68-
ENV NV_CUDA_LIB_VERSION 12.1.1-1
69-
70-
ENV NV_NVTX_VERSION 12.1.105-1
71-
ENV NV_LIBNPP_VERSION 12.1.0.40-1
72-
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
73-
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
74-
ENV NV_LIBNCCL_PACKAGE_NAME libnccl
75-
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
76-
ENV NV_LIBNCCL_VERSION 2.17.1
77-
ENV NCCL_VERSION 2.17.1
78-
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1
79-
80-
RUN yum install -y \
81-
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
82-
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
83-
${NV_LIBNPP_PACKAGE} \
84-
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
85-
${NV_LIBNCCL_PACKAGE} \
86-
&& yum -y clean all --enablerepo='*' && \
87-
rm -rf /var/cache/dnf && \
88-
find /var/log -type f -name "*.log" -exec rm -f {} \;
89-
90-
# Set this flag so that libraries can find the location of CUDA
91-
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
92-
93-
# CuDNN
94-
ENV NV_CUDNN_VERSION 8.9.0.131-1
95-
ENV NV_CUDNN_PACKAGE libcudnn8-${NV_CUDNN_VERSION}.cuda12.1
96-
97-
RUN yum install -y \
98-
${NV_CUDNN_PACKAGE} \
99-
&& yum -y clean all --enablerepo='*' && \
100-
rm -rf /var/cache/dnf && \
101-
find /var/log -type f -name "*.log" -exec rm -f {} \;
102-
103-
#############################################
104-
# End of CUDA 12.1 Layer #
105-
#############################################
106-
10728
######################################
10829
# Deploy Python packages and Jupyter #
10930
######################################

0 commit comments

Comments
 (0)