diff --git a/notebooks/ray-experiments/finetuneflan.yaml b/notebooks/ray-experiments/finetuneflan.yaml new file mode 100644 index 0000000..dafc03c --- /dev/null +++ b/notebooks/ray-experiments/finetuneflan.yaml @@ -0,0 +1,193 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + labels: + orderedinstance: m5.xlarge_g4dn.xlarge + name: finetuneflan + namespace: default +spec: + priority: 9 + resources: + GenericItems: + - custompodresources: + - limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + replicas: 1 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + - limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 1 + replicas: 2 + requests: + cpu: 1 + memory: 2G + nvidia.com/gpu: 1 + generictemplate: + apiVersion: ray.io/v1alpha1 + kind: RayCluster + metadata: + labels: + appwrapper.mcad.ibm.com: finetuneflan + controller-tools.k8s.io: '1.0' + name: finetuneflan + namespace: default + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + serviceType: ClusterIP + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: finetuneflan + operator: In + values: + - finetuneflan + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + imagePullSecrets: [] + rayVersion: 2.1.0 + workerGroupSpecs: + - groupName: small-group-finetuneflan + maxReplicas: 2 + minReplicas: 2 + rayStartParams: + block: 'true' + num-gpus: '1' + replicas: 2 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: finetuneflan + operator: In + values: + - finetuneflan + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 1 + requests: + cpu: 1 + memory: 2G + nvidia.com/gpu: 1 + imagePullSecrets: [] + initContainers: + - command: + - sh + - -c + - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; + do echo waiting for myservice; sleep 2; done + image: busybox:1.28 + name: init-myservice + replicas: 1 + - generictemplate: + apiVersion: route.openshift.io/v1 + kind: Route + metadata: + labels: + odh-ray-cluster-service: finetuneflan-head-svc + name: ray-dashboard-finetuneflan + namespace: default + spec: + port: + targetPort: dashboard + to: + kind: Service + name: finetuneflan-head-svc + replica: 1 + Items: [] diff --git a/notebooks/ray-experiments/ray-flan-interactive.ipynb b/notebooks/ray-experiments/ray-flan-interactive.ipynb new file mode 100644 index 0000000..a63f5ec --- /dev/null +++ b/notebooks/ray-experiments/ray-flan-interactive.ipynb @@ -0,0 +1,3499 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bbc21043", + "metadata": {}, + "source": [ + "# Fine tune Flan T5 model using the Codeflare stack and Ray distribution" + ] + }, + { + "cell_type": "markdown", + "id": "439ab88e-e05e-43b5-b506-960aa9a5afaa", + "metadata": {}, + "source": [ + "This notebook fine tunes the flan T5 model with a summarization dataset. It first uses Instascale to add required machines to the Openshift cluster and then uses Codeflare stack to spawn up a ray cluster. Then it uses Ray train api to distribute the training job over multiple nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", + "metadata": {}, + "outputs": [], + "source": [ + "# Import pieces from codeflare-sdk\n", + "from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n", + "from codeflare_sdk.cluster.auth import TokenAuthentication" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a066b71b-4967-4d03-8601-c2afb2d0b507", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.1.0'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check ray version: it should match the worker's ray version\n", + "import ray\n", + "ray.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2fae774b-1cbb-4548-88bd-841ca0d3b0c7", + "metadata": {}, + "outputs": [], + "source": [ + "# Get packages for loading the model in this environment\n", + "#!pip install --upgrade ray peft accelerate" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "614daa0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Logged into \"https://api.et-cluster.6mwp.p1.openshiftapps.com:6443\" as \"shanand@redhat.com\" using the token provided.\\n\\nYou have access to 113 projects, the list has been suppressed. You can list all projects with \\'oc projects\\'\\n\\nUsing project \"opendatahub\".\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create authentication object for oc user permissions\n", + "auth = TokenAuthentication(\n", + " token = \"xxx\",\n", + " server = \"https://api.et-cluster.6mwp.p1.openshiftapps.com:6443\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "markdown", + "id": "bc27f84c", + "metadata": {}, + "source": [ + "Once again, let's start by running through the same cluster setup as before:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0f4bc870-091f-4e11-9642-cba145710159", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Written to: finetuneflan.yaml\n" + ] + } + ], + "source": [ + "# Create and configure our cluster object (and appwrapper)\n", + "cluster = Cluster(ClusterConfiguration(\n", + " name='finetuneflan',\n", + " namespace='default',\n", + " min_worker=2,\n", + " max_worker=2,\n", + " min_cpus=1,\n", + " max_cpus=2,\n", + " min_memory=8,\n", + " max_memory=24,\n", + " gpu=1,\n", + " instascale=True,\n", + " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for requested resources to be set up...\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [14], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Bring up the cluster\u001b[39;00m\n\u001b[1;32m 2\u001b[0m cluster\u001b[38;5;241m.\u001b[39mup()\n\u001b[0;32m----> 3\u001b[0m cluster\u001b[38;5;241m.\u001b[39mwait_ready()\n", + "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/codeflare_sdk/cluster/cluster.py:229\u001b[0m, in \u001b[0;36mCluster.wait_ready\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mand\u001b[39;00m time \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m timeout:\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwait() timed out after waiting \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtimeout\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124ms\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 229\u001b[0m \u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 230\u001b[0m time \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m5\u001b[39m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRequested cluster up and running!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Bring up the cluster\n", + "cluster.up()\n", + "cluster.wait_ready()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "df71c1ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
                     ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€                     \n",
+       "                                                                         \n",
+       " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n",
+       " โ”‚   Name                                                              โ”‚ \n",
+       " โ”‚   finetuneflan                                        Inactive โŒ   โ”‚ \n",
+       " โ”‚                                                                     โ”‚ \n",
+       " โ”‚   URI: ray://finetuneflan-head-svc.default.svc:10001                โ”‚ \n",
+       " โ”‚                                                                     โ”‚ \n",
+       " โ”‚   Dashboard๐Ÿ”—                                                       โ”‚ \n",
+       " โ”‚                                                                     โ”‚ \n",
+       " โ”‚                      Cluster Resources                              โ”‚ \n",
+       " โ”‚   โ•ญโ”€ Workers โ”€โ”€โ•ฎ  โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ          โ”‚ \n",
+       " โ”‚   โ”‚  Min  Max  โ”‚  โ”‚  Memory      CPU         GPU         โ”‚          โ”‚ \n",
+       " โ”‚   โ”‚            โ”‚  โ”‚                                      โ”‚          โ”‚ \n",
+       " โ”‚   โ”‚  2    2    โ”‚  โ”‚  8~24        1           1           โ”‚          โ”‚ \n",
+       " โ”‚   โ”‚            โ”‚  โ”‚                                      โ”‚          โ”‚ \n",
+       " โ”‚   โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ  โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ          โ”‚ \n",
+       " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3m \u001b[0m\u001b[1;3m ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n", + " โ”‚ \u001b[1;37;42mName\u001b[0m โ”‚ \n", + " โ”‚ \u001b[1;4mfinetuneflan\u001b[0m Inactive โŒ โ”‚ \n", + " โ”‚ โ”‚ \n", + " โ”‚ \u001b[1mURI:\u001b[0m ray://finetuneflan-head-svc.default.svc:10001 โ”‚ \n", + " โ”‚ โ”‚ \n", + " โ”‚ \u001b]8;id=384441;http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com\u001b\\\u001b[4;34mDashboard๐Ÿ”—\u001b[0m\u001b]8;;\u001b\\ โ”‚ \n", + " โ”‚ โ”‚ \n", + " โ”‚ \u001b[3m Cluster Resources \u001b[0m โ”‚ \n", + " โ”‚ โ•ญโ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n", + " โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \n", + " โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", + " โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m8~24 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", + " โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", + " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n", + " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "RayCluster(name='finetuneflan', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=24, worker_cpu=1, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cluster.details()" + ] + }, + { + "cell_type": "markdown", + "id": "33663f47", + "metadata": {}, + "source": [ + "This time we will demonstrate another potential method of use: working with the Ray cluster interactively.\n", + "\n", + "Using the SDK, we can get both the Ray cluster URI and dashboard URI:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c1719bca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com\n", + "ray://finetuneflan-head-svc.default.svc:10001\n" + ] + } + ], + "source": [ + "ray_dashboard_uri = cluster.cluster_dashboard_uri()\n", + "ray_cluster_uri = cluster.cluster_uri()\n", + "print(ray_dashboard_uri)\n", + "print(ray_cluster_uri)" + ] + }, + { + "cell_type": "markdown", + "id": "2a2aca6a", + "metadata": {}, + "source": [ + "Now we can connect directly to our Ray cluster via the Ray python client:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "300146dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ray cluster is up and running: True\n" + ] + } + ], + "source": [ + "#before proceeding make sure the cluster exists and the uri is not empty\n", + "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n", + "\n", + "import ray\n", + "from ray.air.config import ScalingConfig\n", + "\n", + "# reset the ray context in case there's already one. \n", + "ray.shutdown()\n", + "# establish connection to ray cluster\n", + "\n", + "#install additionall libraries that will be required for model training\n", + "runtime_env = {\"pip\": [\"transformers\",\n", + " \"datasets\",\n", + " \"evaluate\",\n", + " \"pyarrow<7.0.0\",\n", + " \"accelerate\",\n", + " \"loralib\",\n", + " \"py7zr\",\n", + " \"tensorboard\",\n", + " \"peft\"], \n", + " \"env_vars\": {\"HF_HOME\":\"huggingface\"}}\n", + "\n", + "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env, _temp_dir=\"huggingface\")\n", + "\n", + "print(\"Ray cluster is up and running: \", ray.is_initialized())" + ] + }, + { + "cell_type": "markdown", + "id": "9711030b", + "metadata": {}, + "source": [ + "Now that we are connected (and have passed in some package requirements), let's try writing some training code for a DistilBERT transformer model via HuggingFace (using IMDB dataset):" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1b36e0d9", + "metadata": {}, + "outputs": [], + "source": [ + "@ray.remote\n", + "def train_fn():\n", + " from datasets import load_dataset\n", + " import transformers\n", + " from transformers import AutoTokenizer, TrainingArguments\n", + " from transformers import AutoModelForSequenceClassification\n", + " import numpy as np\n", + " from datasets import load_metric\n", + " import ray\n", + " from ray import tune\n", + " from ray.train.huggingface import HuggingFaceTrainer\n", + " from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n", + " from datasets import load_dataset, concatenate_datasets\n", + " from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", + " from peft import LoraConfig, get_peft_model, TaskType #, prepare_model_for_int8_training\n", + "\n", + " model_name = \"google/flan-t5-xl\"\n", + "\n", + " #model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map=\"auto\")\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " \n", + " dataset = load_dataset(\"samsum\")\n", + "\n", + " print(f\"Train dataset size: {len(dataset['train'])}\")\n", + " print(f\"Test dataset size: {len(dataset['test'])}\")\n", + " \n", + " #### COMPUTE MAX SEQ LEN ##########\n", + " # The maximum total input sequence length after tokenization.\n", + " # Sequences longer than this will be truncated, sequences shorter will be padded.\n", + " conc_dataset = concatenate_datasets([dataset[\"train\"], dataset[\"test\"]])\n", + "\n", + " \n", + " tokenized_inputs = conc_dataset.map(lambda x: tokenizer(x[\"dialogue\"],\n", + " truncation=True),\n", + " batched=True,\n", + " remove_columns=[\"dialogue\", \"summary\"])\n", + " \n", + " input_lengths = [len(x) for x in tokenized_inputs[\"input_ids\"]]\n", + " # take 85 percentile of max length for better utilization\n", + " max_source_length = int(np.percentile(input_lengths, 85))\n", + " print(f\"Max source length: {max_source_length}\")\n", + "\n", + " # The maximum total sequence length for target text after tokenization.\n", + " # Sequences longer than this will be truncated, sequences shorter will be padded.\"\n", + " tokenized_targets = conc_dataset.map(lambda x: tokenizer(x[\"dialogue\"],\n", + " truncation=True),\n", + " batched=True,\n", + " remove_columns=[\"dialogue\", \"summary\"]) \n", + " target_lengths = [len(x) for x in tokenized_targets[\"input_ids\"]]\n", + " # take 90 percentile of max length for better utilization\n", + " max_target_length = int(np.percentile(target_lengths, 90))\n", + " print(f\"Max target length: {max_target_length}\")\n", + " \n", + " #### PREPROCESS DATA ##########\n", + " \n", + " def preprocess_function(sample,padding=\"max_length\"):\n", + " # add prefix to the input for t5\n", + " inputs = [\"summarize: \" + item for item in sample[\"dialogue\"]]\n", + "\n", + " # tokenize inputs\n", + " model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)\n", + "\n", + " # Tokenize targets with the `text_target` keyword argument\n", + " labels = tokenizer(text_target=sample[\"summary\"], max_length=max_target_length, padding=padding, truncation=True)\n", + "\n", + " # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore\n", + " # padding in the loss.\n", + " if padding == \"max_length\":\n", + " labels[\"input_ids\"] = [\n", + " [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels[\"input_ids\"]\n", + " ]\n", + "\n", + " model_inputs[\"labels\"] = labels[\"input_ids\"]\n", + " return model_inputs\n", + "\n", + " tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=[\"dialogue\", \"summary\", \"id\"])\n", + " print(f\"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}\")\n", + "\n", + " ray_train_ds = ray.data.from_huggingface(tokenized_dataset['train'])\n", + " ray_evaluation_ds = ray.data.from_huggingface(tokenized_dataset['test'])\n", + "\n", + " def compute_metrics(eval_pred):\n", + " metric = load_metric(\"accuracy\")\n", + " logits, labels = eval_pred\n", + " predictions = np.argmax(logits, axis=-1)\n", + " return metric.compute(predictions=predictions, references=labels)\n", + " \n", + " def trainer_init_per_worker(train_dataset, eval_dataset, **config):\n", + " model_name = \"google/flan-t5-xl\"\n", + " model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map=\"auto\")\n", + " lora_config = LoraConfig(\n", + " r=16,\n", + " lora_alpha=32,\n", + " target_modules=[\"q\", \"v\"],\n", + " lora_dropout=0.05,\n", + " bias=\"none\",\n", + " task_type=TaskType.SEQ_2_SEQ_LM\n", + " )\n", + " # prepare int-8 model for training\n", + " #model = prepare_model_for_int8_training(model)\n", + "\n", + " # add LoRA adaptor\n", + " model = get_peft_model(model, lora_config)\n", + " model.print_trainable_parameters()\n", + " \n", + " from transformers import DataCollatorForSeq2Seq\n", + "\n", + " # we want to ignore tokenizer pad token in the loss\n", + " label_pad_token_id = -100\n", + " # Data collator\n", + " data_collator = DataCollatorForSeq2Seq(\n", + " tokenizer,\n", + " model=model,\n", + " label_pad_token_id=label_pad_token_id,\n", + " pad_to_multiple_of=8\n", + " )\n", + " \n", + " output_dir=\"/tmp/flan/test\"\n", + "\n", + " # Define training args\n", + " training_args = Seq2SeqTrainingArguments(\n", + " output_dir=output_dir,\n", + " auto_find_batch_size=True,\n", + " learning_rate=1e-3, # higher learning rate\n", + " num_train_epochs=5,\n", + " logging_dir=f\"{output_dir}/logs\",\n", + " logging_strategy=\"steps\",\n", + " logging_steps=500,\n", + " save_strategy=\"no\",\n", + " report_to=\"tensorboard\",\n", + " )\n", + "\n", + " trainer = Seq2SeqTrainer(model=model,\n", + " args=training_args,\n", + " data_collator=data_collator,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=eval_dataset)\n", + " \n", + " return trainer\n", + "\n", + " scaling_config = ScalingConfig(num_workers=2, use_gpu=True) #num workers is the number of gpus\n", + "\n", + " # we are using the ray native HuggingFaceTrainer, but you can swap out to use non ray Huggingface Trainer. Both have the same method signature. \n", + " # the ray native HFTrainer has built in support for scaling to multiple GPUs\n", + " trainer = HuggingFaceTrainer(\n", + " trainer_init_per_worker=trainer_init_per_worker,\n", + " scaling_config=scaling_config,\n", + " datasets={\"train\": ray_train_ds, \"evaluation\": ray_evaluation_ds},\n", + " )\n", + " result = trainer.fit()\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "d4d8fd65", + "metadata": {}, + "source": [ + "Once we want to test our code out, we can run the training function we defined above remotely on our Ray cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5901d958", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading (โ€ฆ)okenizer_config.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 2.54k/2.54k [00:00<00:00, 767kB/s]\n", + "Downloading spiece.model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 792k/792k [00:00<00:00, 99.4MB/s]\n", + "Downloading (โ€ฆ)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00 2\u001b[0m r \u001b[38;5;241m=\u001b[39m ray\u001b[38;5;241m.\u001b[39mget(train_fn\u001b[38;5;241m.\u001b[39mremote())\n", + "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/_private/client_mode_hook.py:104\u001b[0m, in \u001b[0;36mclient_mode_hook..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client_mode_should_convert(auto_init\u001b[38;5;241m=\u001b[39mauto_init):\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# Legacy code\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;66;03m# we only convert init function if RAY_CLIENT_MODE=1\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/api.py:42\u001b[0m, in \u001b[0;36m_ClientAPI.get\u001b[0;34m(self, vals, timeout)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, vals, \u001b[38;5;241m*\u001b[39m, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m\"\"\"get is the hook stub passed on to replace `ray.get`\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124;03m vals: [Client]ObjectRef or list of these refs to retrieve.\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m timeout: Optional timeout in milliseconds\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 42\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvals\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/worker.py:434\u001b[0m, in \u001b[0;36mWorker.get\u001b[0;34m(self, vals, timeout)\u001b[0m\n\u001b[1;32m 432\u001b[0m op_timeout \u001b[38;5;241m=\u001b[39m max_blocking_operation_time\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get\u001b[49m\u001b[43m(\u001b[49m\u001b[43mto_get\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_timeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 436\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GetTimeoutError:\n", + "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/worker.py:462\u001b[0m, in \u001b[0;36mWorker._get\u001b[0;34m(self, ref, timeout)\u001b[0m\n\u001b[1;32m 460\u001b[0m logger\u001b[38;5;241m.\u001b[39mexception(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to deserialize \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(chunk\u001b[38;5;241m.\u001b[39merror))\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m--> 462\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mtotal_size \u001b[38;5;241m>\u001b[39m OBJECT_TRANSFER_WARNING_SIZE \u001b[38;5;129;01mand\u001b[39;00m log_once(\n\u001b[1;32m 464\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclient_object_transfer_size_warning\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 465\u001b[0m ):\n\u001b[1;32m 466\u001b[0m size_gb \u001b[38;5;241m=\u001b[39m chunk\u001b[38;5;241m.\u001b[39mtotal_size \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m \u001b[38;5;241m30\u001b[39m\n", + "\u001b[0;31mRayTaskError(RuntimeError)\u001b[0m: \u001b[36mray::train_fn()\u001b[39m (pid=4614, ip=10.128.28.7)\n File \"/tmp/ipykernel_14249/2624701892.py\", line 150, in train_fn\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 360, in fit\n raise result.error\nray.exceptions.RayTaskError(RuntimeError): \u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\nray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.\n\tclass_name: RayTrainWorker\n\tactor_id: 537034a8f0299b4acc1e1f4e05000000\n\tpid: 378\n\tnamespace: 79e19797-9a9d-4359-9e7e-135e143c02c0\n\tip: 10.128.30.22\nThe actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.\n\nThe above exception was the direct cause of the following exception:\n\n\u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/trainable.py\", line 355, in train\n raise skipped from exception_cause(skipped)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 325, in entrypoint\n return self._trainable_func(\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 475, in _trainable_func\n super()._trainable_func(self._merged_config, reporter, checkpoint_dir)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 651, in _trainable_func\n output = fn()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 390, in train_func\n trainer.training_loop()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 371, in training_loop\n self._report(training_iterator)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 320, in _report\n for results in training_iterator:\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 225, in __next__\n next_results = self._run_with_error_handling(self._fetch_next_result)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 188, in _run_with_error_handling\n return func()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 257, in _fetch_next_result\n results = self._backend_executor.get_next_results()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 390, in get_next_results\n results = self.get_with_failure_handling(futures)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 483, in get_with_failure_handling\n self._increment_failures()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 533, in _increment_failures\n raise exc.with_traceback(None) from self._last_failure\nRuntimeError: Training has failed after 1 attempts. You can change the number of max failure attempts by setting the `max_retries` arg in your `Trainer`." + ] + } + ], + "source": [ + "#call the above cell as a remote ray function\n", + "r = ray.get(train_fn.remote())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25819219-0317-43e5-bc31-d1fddd1fe897", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from ray.train.huggingface.transformers.transformers_checkpoint import TransformersCheckpoint\n", + "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", + "from peft import PeftModel, PeftConfig\n", + "\n", + "model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')\n", + "tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')\n", + "\n", + "checkpoint = TransformersCheckpoint.from_checkpoint(r.checkpoint)\n", + "\n", + "# Save model in a directory\n", + "model_output_dir = '../../models/raytune'\n", + "checkpoint.to_directory(model_output_dir)\n", + "\n", + "# Load the Lora model\n", + "model = PeftModel.from_pretrained(model, model_output_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "5af8cd32", + "metadata": {}, + "source": [ + "Once complete, we can bring our Ray cluster down and clean up:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f995319e-17a1-4e1c-80bb-5cd1014e719a", + "metadata": {}, + "outputs": [], + "source": [ + "# To do next:\n", + "# - train on ROSA data and add inference code\n", + "# - train a higher param model\n", + "# - Add bitsandbytes" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.down()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d41b90e", + "metadata": {}, + "outputs": [], + "source": [ + "auth.logout()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}