diff --git a/notebooks/ray-experiments/finetuneflan.yaml b/notebooks/ray-experiments/finetuneflan.yaml
new file mode 100644
index 0000000..dafc03c
--- /dev/null
+++ b/notebooks/ray-experiments/finetuneflan.yaml
@@ -0,0 +1,193 @@
+apiVersion: mcad.ibm.com/v1beta1
+kind: AppWrapper
+metadata:
+ labels:
+ orderedinstance: m5.xlarge_g4dn.xlarge
+ name: finetuneflan
+ namespace: default
+spec:
+ priority: 9
+ resources:
+ GenericItems:
+ - custompodresources:
+ - limits:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 0
+ replicas: 1
+ requests:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 0
+ - limits:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 1
+ replicas: 2
+ requests:
+ cpu: 1
+ memory: 2G
+ nvidia.com/gpu: 1
+ generictemplate:
+ apiVersion: ray.io/v1alpha1
+ kind: RayCluster
+ metadata:
+ labels:
+ appwrapper.mcad.ibm.com: finetuneflan
+ controller-tools.k8s.io: '1.0'
+ name: finetuneflan
+ namespace: default
+ spec:
+ autoscalerOptions:
+ idleTimeoutSeconds: 60
+ imagePullPolicy: Always
+ resources:
+ limits:
+ cpu: 500m
+ memory: 512Mi
+ requests:
+ cpu: 500m
+ memory: 512Mi
+ upscalingMode: Default
+ enableInTreeAutoscaling: false
+ headGroupSpec:
+ rayStartParams:
+ block: 'true'
+ dashboard-host: 0.0.0.0
+ num-gpus: '0'
+ serviceType: ClusterIP
+ template:
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: finetuneflan
+ operator: In
+ values:
+ - finetuneflan
+ containers:
+ - env:
+ - name: MY_POD_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.podIP
+ - name: RAY_USE_TLS
+ value: '0'
+ - name: RAY_TLS_SERVER_CERT
+ value: /home/ray/workspace/tls/server.crt
+ - name: RAY_TLS_SERVER_KEY
+ value: /home/ray/workspace/tls/server.key
+ - name: RAY_TLS_CA_CERT
+ value: /home/ray/workspace/tls/ca.crt
+ image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
+ imagePullPolicy: Always
+ lifecycle:
+ preStop:
+ exec:
+ command:
+ - /bin/sh
+ - -c
+ - ray stop
+ name: ray-head
+ ports:
+ - containerPort: 6379
+ name: gcs
+ - containerPort: 8265
+ name: dashboard
+ - containerPort: 10001
+ name: client
+ resources:
+ limits:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 0
+ requests:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 0
+ imagePullSecrets: []
+ rayVersion: 2.1.0
+ workerGroupSpecs:
+ - groupName: small-group-finetuneflan
+ maxReplicas: 2
+ minReplicas: 2
+ rayStartParams:
+ block: 'true'
+ num-gpus: '1'
+ replicas: 2
+ template:
+ metadata:
+ annotations:
+ key: value
+ labels:
+ key: value
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: finetuneflan
+ operator: In
+ values:
+ - finetuneflan
+ containers:
+ - env:
+ - name: MY_POD_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.podIP
+ - name: RAY_USE_TLS
+ value: '0'
+ - name: RAY_TLS_SERVER_CERT
+ value: /home/ray/workspace/tls/server.crt
+ - name: RAY_TLS_SERVER_KEY
+ value: /home/ray/workspace/tls/server.key
+ - name: RAY_TLS_CA_CERT
+ value: /home/ray/workspace/tls/ca.crt
+ image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
+ lifecycle:
+ preStop:
+ exec:
+ command:
+ - /bin/sh
+ - -c
+ - ray stop
+ name: machine-learning
+ resources:
+ limits:
+ cpu: 2
+ memory: 8G
+ nvidia.com/gpu: 1
+ requests:
+ cpu: 1
+ memory: 2G
+ nvidia.com/gpu: 1
+ imagePullSecrets: []
+ initContainers:
+ - command:
+ - sh
+ - -c
+ - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
+ do echo waiting for myservice; sleep 2; done
+ image: busybox:1.28
+ name: init-myservice
+ replicas: 1
+ - generictemplate:
+ apiVersion: route.openshift.io/v1
+ kind: Route
+ metadata:
+ labels:
+ odh-ray-cluster-service: finetuneflan-head-svc
+ name: ray-dashboard-finetuneflan
+ namespace: default
+ spec:
+ port:
+ targetPort: dashboard
+ to:
+ kind: Service
+ name: finetuneflan-head-svc
+ replica: 1
+ Items: []
diff --git a/notebooks/ray-experiments/ray-flan-interactive.ipynb b/notebooks/ray-experiments/ray-flan-interactive.ipynb
new file mode 100644
index 0000000..a63f5ec
--- /dev/null
+++ b/notebooks/ray-experiments/ray-flan-interactive.ipynb
@@ -0,0 +1,3499 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bbc21043",
+ "metadata": {},
+ "source": [
+ "# Fine tune Flan T5 model using the Codeflare stack and Ray distribution"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "439ab88e-e05e-43b5-b506-960aa9a5afaa",
+ "metadata": {},
+ "source": [
+ "This notebook fine tunes the flan T5 model with a summarization dataset. It first uses Instascale to add required machines to the Openshift cluster and then uses Codeflare stack to spawn up a ray cluster. Then it uses Ray train api to distribute the training job over multiple nodes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import pieces from codeflare-sdk\n",
+ "from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n",
+ "from codeflare_sdk.cluster.auth import TokenAuthentication"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "a066b71b-4967-4d03-8601-c2afb2d0b507",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'2.1.0'"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Check ray version: it should match the worker's ray version\n",
+ "import ray\n",
+ "ray.__version__"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2fae774b-1cbb-4548-88bd-841ca0d3b0c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get packages for loading the model in this environment\n",
+ "#!pip install --upgrade ray peft accelerate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "614daa0c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Logged into \"https://api.et-cluster.6mwp.p1.openshiftapps.com:6443\" as \"shanand@redhat.com\" using the token provided.\\n\\nYou have access to 113 projects, the list has been suppressed. You can list all projects with \\'oc projects\\'\\n\\nUsing project \"opendatahub\".\\n'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create authentication object for oc user permissions\n",
+ "auth = TokenAuthentication(\n",
+ " token = \"xxx\",\n",
+ " server = \"https://api.et-cluster.6mwp.p1.openshiftapps.com:6443\",\n",
+ " skip_tls=False\n",
+ ")\n",
+ "auth.login()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Once again, let's start by running through the same cluster setup as before:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0f4bc870-091f-4e11-9642-cba145710159",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Written to: finetuneflan.yaml\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create and configure our cluster object (and appwrapper)\n",
+ "cluster = Cluster(ClusterConfiguration(\n",
+ " name='finetuneflan',\n",
+ " namespace='default',\n",
+ " min_worker=2,\n",
+ " max_worker=2,\n",
+ " min_cpus=1,\n",
+ " max_cpus=2,\n",
+ " min_memory=8,\n",
+ " max_memory=24,\n",
+ " gpu=1,\n",
+ " instascale=True,\n",
+ " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200",
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Waiting for requested resources to be set up...\n"
+ ]
+ },
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn [14], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Bring up the cluster\u001b[39;00m\n\u001b[1;32m 2\u001b[0m cluster\u001b[38;5;241m.\u001b[39mup()\n\u001b[0;32m----> 3\u001b[0m cluster\u001b[38;5;241m.\u001b[39mwait_ready()\n",
+ "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/codeflare_sdk/cluster/cluster.py:229\u001b[0m, in \u001b[0;36mCluster.wait_ready\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mand\u001b[39;00m time \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m timeout:\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwait() timed out after waiting \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtimeout\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124ms\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 229\u001b[0m \u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 230\u001b[0m time \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m5\u001b[39m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRequested cluster up and running!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "# Bring up the cluster\n",
+ "cluster.up()\n",
+ "cluster.wait_ready()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "df71c1ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
๐ CodeFlare Cluster Details ๐ \n",
+ " \n",
+ " โญโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฎ \n",
+ " โ Name โ \n",
+ " โ finetuneflan Inactive โ โ \n",
+ " โ โ \n",
+ " โ URI: ray://finetuneflan-head-svc.default.svc:10001 โ \n",
+ " โ โ \n",
+ " โ Dashboard๐ โ \n",
+ " โ โ \n",
+ " โ Cluster Resources โ \n",
+ " โ โญโ Workers โโโฎ โญโโโโโโโโโ Worker specs(each) โโโโโโโโโโฎ โ \n",
+ " โ โ Min Max โ โ Memory CPU GPU โ โ \n",
+ " โ โ โ โ โ โ \n",
+ " โ โ 2 2 โ โ 8~24 1 1 โ โ \n",
+ " โ โ โ โ โ โ \n",
+ " โ โฐโโโโโโโโโโโโโฏ โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ โ \n",
+ " โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ \n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[3m \u001b[0m\u001b[1;3m ๐ CodeFlare Cluster Details ๐\u001b[0m\u001b[3m \u001b[0m\n",
+ "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
+ " โญโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฎ \n",
+ " โ \u001b[1;37;42mName\u001b[0m โ \n",
+ " โ \u001b[1;4mfinetuneflan\u001b[0m Inactive โ โ \n",
+ " โ โ \n",
+ " โ \u001b[1mURI:\u001b[0m ray://finetuneflan-head-svc.default.svc:10001 โ \n",
+ " โ โ \n",
+ " โ \u001b]8;id=384441;http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com\u001b\\\u001b[4;34mDashboard๐\u001b[0m\u001b]8;;\u001b\\ โ \n",
+ " โ โ \n",
+ " โ \u001b[3m Cluster Resources \u001b[0m โ \n",
+ " โ โญโ Workers โโโฎ โญโโโโโโโโโ Worker specs(each) โโโโโโโโโโฎ โ \n",
+ " โ โ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m โ โ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m โ โ \n",
+ " โ โ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ โ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ โ \n",
+ " โ โ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m โ โ \u001b[36m \u001b[0m\u001b[36m8~24 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m โ โ \n",
+ " โ โ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ โ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ โ \n",
+ " โ โฐโโโโโโโโโโโโโฏ โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ โ \n",
+ " โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ \n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "RayCluster(name='finetuneflan', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=24, worker_cpu=1, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com')"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster.details()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33663f47",
+ "metadata": {},
+ "source": [
+ "This time we will demonstrate another potential method of use: working with the Ray cluster interactively.\n",
+ "\n",
+ "Using the SDK, we can get both the Ray cluster URI and dashboard URI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "c1719bca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "http://ray-dashboard-finetuneflan-default.apps.et-cluster.6mwp.p1.openshiftapps.com\n",
+ "ray://finetuneflan-head-svc.default.svc:10001\n"
+ ]
+ }
+ ],
+ "source": [
+ "ray_dashboard_uri = cluster.cluster_dashboard_uri()\n",
+ "ray_cluster_uri = cluster.cluster_uri()\n",
+ "print(ray_dashboard_uri)\n",
+ "print(ray_cluster_uri)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a2aca6a",
+ "metadata": {},
+ "source": [
+ "Now we can connect directly to our Ray cluster via the Ray python client:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "300146dc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ray cluster is up and running: True\n"
+ ]
+ }
+ ],
+ "source": [
+ "#before proceeding make sure the cluster exists and the uri is not empty\n",
+ "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n",
+ "\n",
+ "import ray\n",
+ "from ray.air.config import ScalingConfig\n",
+ "\n",
+ "# reset the ray context in case there's already one. \n",
+ "ray.shutdown()\n",
+ "# establish connection to ray cluster\n",
+ "\n",
+ "#install additionall libraries that will be required for model training\n",
+ "runtime_env = {\"pip\": [\"transformers\",\n",
+ " \"datasets\",\n",
+ " \"evaluate\",\n",
+ " \"pyarrow<7.0.0\",\n",
+ " \"accelerate\",\n",
+ " \"loralib\",\n",
+ " \"py7zr\",\n",
+ " \"tensorboard\",\n",
+ " \"peft\"], \n",
+ " \"env_vars\": {\"HF_HOME\":\"huggingface\"}}\n",
+ "\n",
+ "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env, _temp_dir=\"huggingface\")\n",
+ "\n",
+ "print(\"Ray cluster is up and running: \", ray.is_initialized())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9711030b",
+ "metadata": {},
+ "source": [
+ "Now that we are connected (and have passed in some package requirements), let's try writing some training code for a DistilBERT transformer model via HuggingFace (using IMDB dataset):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1b36e0d9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@ray.remote\n",
+ "def train_fn():\n",
+ " from datasets import load_dataset\n",
+ " import transformers\n",
+ " from transformers import AutoTokenizer, TrainingArguments\n",
+ " from transformers import AutoModelForSequenceClassification\n",
+ " import numpy as np\n",
+ " from datasets import load_metric\n",
+ " import ray\n",
+ " from ray import tune\n",
+ " from ray.train.huggingface import HuggingFaceTrainer\n",
+ " from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n",
+ " from datasets import load_dataset, concatenate_datasets\n",
+ " from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
+ " from peft import LoraConfig, get_peft_model, TaskType #, prepare_model_for_int8_training\n",
+ "\n",
+ " model_name = \"google/flan-t5-xl\"\n",
+ "\n",
+ " #model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map=\"auto\")\n",
+ " tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+ " \n",
+ " dataset = load_dataset(\"samsum\")\n",
+ "\n",
+ " print(f\"Train dataset size: {len(dataset['train'])}\")\n",
+ " print(f\"Test dataset size: {len(dataset['test'])}\")\n",
+ " \n",
+ " #### COMPUTE MAX SEQ LEN ##########\n",
+ " # The maximum total input sequence length after tokenization.\n",
+ " # Sequences longer than this will be truncated, sequences shorter will be padded.\n",
+ " conc_dataset = concatenate_datasets([dataset[\"train\"], dataset[\"test\"]])\n",
+ "\n",
+ " \n",
+ " tokenized_inputs = conc_dataset.map(lambda x: tokenizer(x[\"dialogue\"],\n",
+ " truncation=True),\n",
+ " batched=True,\n",
+ " remove_columns=[\"dialogue\", \"summary\"])\n",
+ " \n",
+ " input_lengths = [len(x) for x in tokenized_inputs[\"input_ids\"]]\n",
+ " # take 85 percentile of max length for better utilization\n",
+ " max_source_length = int(np.percentile(input_lengths, 85))\n",
+ " print(f\"Max source length: {max_source_length}\")\n",
+ "\n",
+ " # The maximum total sequence length for target text after tokenization.\n",
+ " # Sequences longer than this will be truncated, sequences shorter will be padded.\"\n",
+ " tokenized_targets = conc_dataset.map(lambda x: tokenizer(x[\"dialogue\"],\n",
+ " truncation=True),\n",
+ " batched=True,\n",
+ " remove_columns=[\"dialogue\", \"summary\"]) \n",
+ " target_lengths = [len(x) for x in tokenized_targets[\"input_ids\"]]\n",
+ " # take 90 percentile of max length for better utilization\n",
+ " max_target_length = int(np.percentile(target_lengths, 90))\n",
+ " print(f\"Max target length: {max_target_length}\")\n",
+ " \n",
+ " #### PREPROCESS DATA ##########\n",
+ " \n",
+ " def preprocess_function(sample,padding=\"max_length\"):\n",
+ " # add prefix to the input for t5\n",
+ " inputs = [\"summarize: \" + item for item in sample[\"dialogue\"]]\n",
+ "\n",
+ " # tokenize inputs\n",
+ " model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)\n",
+ "\n",
+ " # Tokenize targets with the `text_target` keyword argument\n",
+ " labels = tokenizer(text_target=sample[\"summary\"], max_length=max_target_length, padding=padding, truncation=True)\n",
+ "\n",
+ " # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore\n",
+ " # padding in the loss.\n",
+ " if padding == \"max_length\":\n",
+ " labels[\"input_ids\"] = [\n",
+ " [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels[\"input_ids\"]\n",
+ " ]\n",
+ "\n",
+ " model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
+ " return model_inputs\n",
+ "\n",
+ " tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=[\"dialogue\", \"summary\", \"id\"])\n",
+ " print(f\"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}\")\n",
+ "\n",
+ " ray_train_ds = ray.data.from_huggingface(tokenized_dataset['train'])\n",
+ " ray_evaluation_ds = ray.data.from_huggingface(tokenized_dataset['test'])\n",
+ "\n",
+ " def compute_metrics(eval_pred):\n",
+ " metric = load_metric(\"accuracy\")\n",
+ " logits, labels = eval_pred\n",
+ " predictions = np.argmax(logits, axis=-1)\n",
+ " return metric.compute(predictions=predictions, references=labels)\n",
+ " \n",
+ " def trainer_init_per_worker(train_dataset, eval_dataset, **config):\n",
+ " model_name = \"google/flan-t5-xl\"\n",
+ " model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map=\"auto\")\n",
+ " lora_config = LoraConfig(\n",
+ " r=16,\n",
+ " lora_alpha=32,\n",
+ " target_modules=[\"q\", \"v\"],\n",
+ " lora_dropout=0.05,\n",
+ " bias=\"none\",\n",
+ " task_type=TaskType.SEQ_2_SEQ_LM\n",
+ " )\n",
+ " # prepare int-8 model for training\n",
+ " #model = prepare_model_for_int8_training(model)\n",
+ "\n",
+ " # add LoRA adaptor\n",
+ " model = get_peft_model(model, lora_config)\n",
+ " model.print_trainable_parameters()\n",
+ " \n",
+ " from transformers import DataCollatorForSeq2Seq\n",
+ "\n",
+ " # we want to ignore tokenizer pad token in the loss\n",
+ " label_pad_token_id = -100\n",
+ " # Data collator\n",
+ " data_collator = DataCollatorForSeq2Seq(\n",
+ " tokenizer,\n",
+ " model=model,\n",
+ " label_pad_token_id=label_pad_token_id,\n",
+ " pad_to_multiple_of=8\n",
+ " )\n",
+ " \n",
+ " output_dir=\"/tmp/flan/test\"\n",
+ "\n",
+ " # Define training args\n",
+ " training_args = Seq2SeqTrainingArguments(\n",
+ " output_dir=output_dir,\n",
+ " auto_find_batch_size=True,\n",
+ " learning_rate=1e-3, # higher learning rate\n",
+ " num_train_epochs=5,\n",
+ " logging_dir=f\"{output_dir}/logs\",\n",
+ " logging_strategy=\"steps\",\n",
+ " logging_steps=500,\n",
+ " save_strategy=\"no\",\n",
+ " report_to=\"tensorboard\",\n",
+ " )\n",
+ "\n",
+ " trainer = Seq2SeqTrainer(model=model,\n",
+ " args=training_args,\n",
+ " data_collator=data_collator,\n",
+ " train_dataset=train_dataset,\n",
+ " eval_dataset=eval_dataset)\n",
+ " \n",
+ " return trainer\n",
+ "\n",
+ " scaling_config = ScalingConfig(num_workers=2, use_gpu=True) #num workers is the number of gpus\n",
+ "\n",
+ " # we are using the ray native HuggingFaceTrainer, but you can swap out to use non ray Huggingface Trainer. Both have the same method signature. \n",
+ " # the ray native HFTrainer has built in support for scaling to multiple GPUs\n",
+ " trainer = HuggingFaceTrainer(\n",
+ " trainer_init_per_worker=trainer_init_per_worker,\n",
+ " scaling_config=scaling_config,\n",
+ " datasets={\"train\": ray_train_ds, \"evaluation\": ray_evaluation_ds},\n",
+ " )\n",
+ " result = trainer.fit()\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4d8fd65",
+ "metadata": {},
+ "source": [
+ "Once we want to test our code out, we can run the training function we defined above remotely on our Ray cluster:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "5901d958",
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading (โฆ)okenizer_config.json: 100%|โโโโโโโโโโ| 2.54k/2.54k [00:00<00:00, 767kB/s]\n",
+ "Downloading spiece.model: 100%|โโโโโโโโโโ| 792k/792k [00:00<00:00, 99.4MB/s]\n",
+ "Downloading (โฆ)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00, ?B/s]\n",
+ "Downloading (โฆ)/main/tokenizer.json: 100%|โโโโโโโโโโ| 2.42M/2.42M [00:00<00:00, 48.7MB/s]\n",
+ "Downloading (โฆ)cial_tokens_map.json: 100%|โโโโโโโโโโ| 2.20k/2.20k [00:00<00:00, 1.34MB/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Train dataset size: 14732\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Test dataset size: 819\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Found cached dataset samsum (/home/ray/workspace/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e)\n",
+ "100%|โโโโโโโโโโ| 3/3 [00:00<00:00, 837.63it/s]\n",
+ "Map: 0%| | 0/15551 [00:00, ? examples/s]\n",
+ "Map: 6%|โ | 1000/15551 [00:00<00:03, 4023.66 examples/s]\n",
+ "Map: 13%|โโ | 2000/15551 [00:00<00:03, 4031.63 examples/s]\n",
+ "Map: 19%|โโ | 3000/15551 [00:00<00:02, 4215.37 examples/s]\n",
+ "Map: 26%|โโโ | 4000/15551 [00:00<00:02, 4380.19 examples/s]\n",
+ "Map: 32%|โโโโ | 5000/15551 [00:01<00:02, 4403.46 examples/s]\n",
+ "Map: 39%|โโโโ | 6000/15551 [00:01<00:02, 4544.88 examples/s]\n",
+ "Map: 45%|โโโโโ | 7000/15551 [00:01<00:01, 4502.46 examples/s]\n",
+ "Map: 51%|โโโโโโ | 8000/15551 [00:01<00:01, 4506.27 examples/s]\n",
+ "Map: 58%|โโโโโโ | 9000/15551 [00:02<00:01, 4525.94 examples/s]\n",
+ "Map: 64%|โโโโโโโ | 10000/15551 [00:02<00:01, 4507.28 examples/s]\n",
+ "Map: 71%|โโโโโโโ | 11000/15551 [00:02<00:01, 4394.20 examples/s]\n",
+ "Map: 77%|โโโโโโโโ | 12000/15551 [00:02<00:00, 4482.44 examples/s]\n",
+ "Map: 84%|โโโโโโโโโ | 13000/15551 [00:02<00:00, 4499.59 examples/s]\n",
+ "Map: 90%|โโโโโโโโโ | 14000/15551 [00:03<00:00, 4488.96 examples/s]\n",
+ "Map: 96%|โโโโโโโโโโ| 15000/15551 [00:03<00:00, 4321.68 examples/s]\n",
+ " \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Max source length: 255\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Map: 0%| | 0/15551 [00:00, ? examples/s]\n",
+ "Map: 6%|โ | 1000/15551 [00:00<00:02, 4940.97 examples/s]\n",
+ "Map: 13%|โโ | 2000/15551 [00:00<00:02, 4525.57 examples/s]\n",
+ "Map: 19%|โโ | 3000/15551 [00:00<00:03, 4073.89 examples/s]\n",
+ "Map: 26%|โโโ | 4000/15551 [00:01<00:03, 3686.14 examples/s]\n",
+ "Map: 32%|โโโโ | 5000/15551 [00:01<00:03, 3482.70 examples/s]\n",
+ "Map: 39%|โโโโ | 6000/15551 [00:01<00:02, 3446.74 examples/s]\n",
+ "Map: 45%|โโโโโ | 7000/15551 [00:01<00:02, 3546.28 examples/s]\n",
+ "Map: 51%|โโโโโโ | 8000/15551 [00:02<00:02, 3634.73 examples/s]\n",
+ "Map: 58%|โโโโโโ | 9000/15551 [00:02<00:02, 3271.87 examples/s]\n",
+ "Map: 64%|โโโโโโโ | 10000/15551 [00:02<00:01, 3237.06 examples/s]\n",
+ "Map: 71%|โโโโโโโ | 11000/15551 [00:03<00:01, 3545.31 examples/s]\n",
+ "Map: 77%|โโโโโโโโ | 12000/15551 [00:03<00:00, 3766.11 examples/s]\n",
+ "Map: 84%|โโโโโโโโโ | 13000/15551 [00:03<00:00, 4014.10 examples/s]\n",
+ "Map: 90%|โโโโโโโโโ | 14000/15551 [00:03<00:00, 4194.66 examples/s]\n",
+ "Map: 96%|โโโโโโโโโโ| 15000/15551 [00:03<00:00, 4252.76 examples/s]\n",
+ " \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Max target length: 297\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Map: 0%| | 0/14732 [00:00, ? examples/s]\n",
+ "Map: 7%|โ | 1000/14732 [00:00<00:08, 1593.46 examples/s]\n",
+ "Map: 14%|โโ | 2000/14732 [00:01<00:08, 1508.77 examples/s]\n",
+ "Map: 20%|โโ | 3000/14732 [00:01<00:07, 1528.24 examples/s]\n",
+ "Map: 27%|โโโ | 4000/14732 [00:02<00:06, 1535.12 examples/s]\n",
+ "Map: 34%|โโโโ | 5000/14732 [00:03<00:06, 1522.68 examples/s]\n",
+ "Map: 41%|โโโโ | 6000/14732 [00:03<00:05, 1551.01 examples/s]\n",
+ "Map: 48%|โโโโโ | 7000/14732 [00:04<00:05, 1491.83 examples/s]\n",
+ "Map: 54%|โโโโโโ | 8000/14732 [00:05<00:04, 1419.68 examples/s]\n",
+ "Map: 61%|โโโโโโ | 9000/14732 [00:06<00:03, 1453.36 examples/s]\n",
+ "Map: 68%|โโโโโโโ | 10000/14732 [00:06<00:03, 1433.24 examples/s]\n",
+ "Map: 75%|โโโโโโโโ | 11000/14732 [00:07<00:02, 1401.46 examples/s]\n",
+ "Map: 81%|โโโโโโโโโ | 12000/14732 [00:08<00:01, 1422.42 examples/s]\n",
+ "Map: 88%|โโโโโโโโโ | 13000/14732 [00:08<00:01, 1439.46 examples/s]\n",
+ "Map: 95%|โโโโโโโโโโ| 14000/14732 [00:09<00:00, 1476.15 examples/s]\n",
+ "Map: 0%| | 0/819 [00:00, ? examples/s] \n",
+ "Map: 0%| | 0/818 [00:00, ? examples/s] \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Keys of tokenized dataset: ['input_ids', 'attention_mask', 'labels']\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m To disable this warning, you can either:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:29 (running for 00:00:08.27)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m 2023-07-27 07:58:32,632\tINFO config.py:87 -- Setting up process group for: env:// [rank=0, world_size=2]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:34 (running for 00:00:13.27)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading (โฆ)lve/main/config.json: 100%|โโโโโโโโโโ| 1.44k/1.44k [00:00<00:00, 416kB/s]\n",
+ "Downloading (โฆ)lve/main/config.json: 100%|โโโโโโโโโโ| 1.44k/1.44k [00:00<00:00, 414kB/s]\n",
+ "Downloading (โฆ)model.bin.index.json: 0%| | 0.00/50.8k [00:00, ?B/s]\n",
+ "Downloading (โฆ)model.bin.index.json: 100%|โโโโโโโโโโ| 50.8k/50.8k [00:00<00:00, 15.2MB/s]\n",
+ "Downloading shards: 0%| | 0/2 [00:00, ?it/s]\n",
+ "Downloading (โฆ)model.bin.index.json: 100%|โโโโโโโโโโ| 50.8k/50.8k [00:00<00:00, 14.9MB/s]\n",
+ "Downloading shards: 0%| | 0/2 [00:00, ?it/s]\n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 0%| | 0.00/9.45G [00:00, ?B/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 0%| | 0.00/9.45G [00:00, ?B/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 0%| | 31.5M/9.45G [00:00<00:37, 249MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 0%| | 41.9M/9.45G [00:00<00:23, 404MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 1%| | 62.9M/9.45G [00:00<00:36, 257MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 1%| | 94.4M/9.45G [00:00<00:20, 465MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 1%| | 94.4M/9.45G [00:00<00:34, 268MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 2%|โ | 147M/9.45G [00:00<00:20, 452MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 2%|โ | 199M/9.45G [00:00<00:20, 457MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 1%|โ | 136M/9.45G [00:00<00:29, 316MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 3%|โ | 252M/9.45G [00:00<00:19, 473MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 2%|โ | 178M/9.45G [00:00<00:27, 334MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 3%|โ | 304M/9.45G [00:00<00:20, 456MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 2%|โ | 220M/9.45G [00:00<00:28, 325MB/s]\u001b[A\n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 3%|โ | 262M/9.45G [00:00<00:27, 334MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:39 (running for 00:00:18.27)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 4.0/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 4%|โ | 357M/9.45G [00:00<00:20, 444MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 3%|โ | 304M/9.45G [00:00<00:26, 344MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 4%|โ | 409M/9.45G [00:00<00:20, 437MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 5%|โ | 461M/9.45G [00:01<00:20, 445MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 4%|โ | 346M/9.45G [00:01<00:28, 323MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 5%|โ | 514M/9.45G [00:01<00:19, 459MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 4%|โ | 388M/9.45G [00:01<00:27, 325MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 6%|โ | 566M/9.45G [00:01<00:19, 450MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 5%|โ | 430M/9.45G [00:01<00:26, 336MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 7%|โ | 619M/9.45G [00:01<00:20, 436MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 5%|โ | 472M/9.45G [00:01<00:25, 352MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 7%|โ | 671M/9.45G [00:01<00:19, 446MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 5%|โ | 514M/9.45G [00:01<00:25, 350MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 8%|โ | 724M/9.45G [00:01<00:18, 460MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 8%|โ | 776M/9.45G [00:01<00:18, 473MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 6%|โ | 556M/9.45G [00:01<00:26, 335MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 6%|โ | 598M/9.45G [00:01<00:27, 323MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 9%|โ | 828M/9.45G [00:01<00:18, 475MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 7%|โ | 640M/9.45G [00:01<00:26, 337MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 9%|โ | 881M/9.45G [00:01<00:17, 482MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 7%|โ | 682M/9.45G [00:02<00:25, 348MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 10%|โ | 933M/9.45G [00:02<00:17, 491MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 8%|โ | 724M/9.45G [00:02<00:24, 355MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 11%|โ | 996M/9.45G [00:02<00:16, 504MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 8%|โ | 765M/9.45G [00:02<00:24, 358MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 11%|โ | 1.05G/9.45G [00:02<00:16, 506MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 9%|โ | 807M/9.45G [00:02<00:24, 359MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 12%|โโ | 1.10G/9.45G [00:02<00:16, 502MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 12%|โโ | 1.15G/9.45G [00:02<00:16, 490MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 9%|โ | 849M/9.45G [00:02<00:25, 342MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 13%|โโ | 1.21G/9.45G [00:02<00:16, 490MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 9%|โ | 891M/9.45G [00:02<00:24, 352MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 13%|โโ | 1.26G/9.45G [00:02<00:16, 488MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 10%|โ | 933M/9.45G [00:02<00:24, 347MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 14%|โโ | 1.31G/9.45G [00:02<00:16, 486MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 14%|โโ | 1.36G/9.45G [00:02<00:16, 486MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 10%|โ | 975M/9.45G [00:02<00:25, 328MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 15%|โโ | 1.42G/9.45G [00:03<00:16, 485MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 11%|โ | 1.02G/9.45G [00:03<00:25, 330MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 16%|โโ | 1.47G/9.45G [00:03<00:16, 484MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 11%|โ | 1.06G/9.45G [00:03<00:25, 334MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 16%|โโ | 1.52G/9.45G [00:03<00:16, 488MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 12%|โโ | 1.10G/9.45G [00:03<00:25, 324MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 17%|โโ | 1.57G/9.45G [00:03<00:16, 489MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 17%|โโ | 1.63G/9.45G [00:03<00:15, 497MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 12%|โโ | 1.14G/9.45G [00:03<00:25, 329MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 18%|โโ | 1.68G/9.45G [00:03<00:15, 497MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 13%|โโ | 1.18G/9.45G [00:03<00:25, 327MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 18%|โโ | 1.73G/9.45G [00:03<00:15, 494MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 13%|โโ | 1.23G/9.45G [00:03<00:25, 328MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 19%|โโ | 1.78G/9.45G [00:03<00:15, 482MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 13%|โโ | 1.27G/9.45G [00:03<00:25, 321MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 19%|โโ | 1.84G/9.45G [00:03<00:19, 393MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 14%|โโ | 1.31G/9.45G [00:03<00:25, 322MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 14%|โโ | 1.35G/9.45G [00:04<00:25, 319MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 20%|โโ | 1.89G/9.45G [00:04<00:23, 321MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 15%|โโ | 1.39G/9.45G [00:04<00:24, 323MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 15%|โโ | 1.44G/9.45G [00:04<00:24, 325MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 20%|โโ | 1.93G/9.45G [00:04<00:25, 297MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 21%|โโ | 1.97G/9.45G [00:04<00:25, 299MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 16%|โโ | 1.48G/9.45G [00:04<00:30, 264MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 21%|โโโ | 2.01G/9.45G [00:04<00:25, 296MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 16%|โโ | 1.51G/9.45G [00:04<00:28, 274MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 22%|โโโ | 2.04G/9.45G [00:04<00:25, 291MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 16%|โโ | 1.55G/9.45G [00:04<00:27, 286MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 22%|โโโ | 2.08G/9.45G [00:04<00:25, 288MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 17%|โโ | 1.58G/9.45G [00:04<00:27, 282MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 22%|โโโ | 2.11G/9.45G [00:04<00:25, 291MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 17%|โโ | 1.63G/9.45G [00:05<00:25, 304MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.14G/9.45G [00:05<00:26, 281MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 18%|โโ | 1.67G/9.45G [00:05<00:25, 310MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.17G/9.45G [00:05<00:26, 275MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 18%|โโ | 1.71G/9.45G [00:05<00:24, 314MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.20G/9.45G [00:05<00:26, 277MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 19%|โโ | 1.75G/9.45G [00:05<00:24, 310MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 24%|โโโ | 2.23G/9.45G [00:05<00:25, 281MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 19%|โโ | 1.79G/9.45G [00:05<00:24, 316MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 24%|โโโ | 2.26G/9.45G [00:05<00:25, 283MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 24%|โโโ | 2.30G/9.45G [00:05<00:25, 278MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 19%|โโ | 1.84G/9.45G [00:05<00:24, 305MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 25%|โโโ | 2.33G/9.45G [00:05<00:25, 278MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 20%|โโ | 1.88G/9.45G [00:05<00:23, 316MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 25%|โโโ | 2.36G/9.45G [00:05<00:24, 284MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:44 (running for 00:00:23.28)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 20%|โโ | 1.92G/9.45G [00:05<00:23, 326MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 25%|โโโ | 2.40G/9.45G [00:06<00:23, 297MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 21%|โโ | 1.96G/9.45G [00:06<00:22, 327MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 26%|โโโ | 2.43G/9.45G [00:06<00:24, 285MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 21%|โโ | 2.00G/9.45G [00:06<00:22, 326MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 26%|โโโ | 2.46G/9.45G [00:06<00:24, 280MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 22%|โโโ | 2.04G/9.45G [00:06<00:22, 335MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 26%|โโโ | 2.50G/9.45G [00:06<00:24, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 22%|โโโ | 2.09G/9.45G [00:06<00:22, 323MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.53G/9.45G [00:06<00:23, 288MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.13G/9.45G [00:06<00:23, 318MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.56G/9.45G [00:06<00:24, 285MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.59G/9.45G [00:06<00:23, 291MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.17G/9.45G [00:06<00:23, 313MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 28%|โโโ | 2.63G/9.45G [00:06<00:22, 303MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 28%|โโโ | 2.66G/9.45G [00:06<00:22, 304MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 23%|โโโ | 2.21G/9.45G [00:06<00:23, 305MB/s]\u001b[A\n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 24%|โโโ | 2.25G/9.45G [00:07<00:22, 317MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 29%|โโโ | 2.69G/9.45G [00:07<00:22, 299MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 29%|โโโ | 2.73G/9.45G [00:07<00:23, 285MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 24%|โโโ | 2.30G/9.45G [00:07<00:22, 324MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 29%|โโโ | 2.76G/9.45G [00:07<00:23, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 25%|โโโ | 2.34G/9.45G [00:07<00:21, 335MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.79G/9.45G [00:07<00:23, 288MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 25%|โโโ | 2.38G/9.45G [00:07<00:20, 340MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.82G/9.45G [00:07<00:23, 285MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 26%|โโโ | 2.42G/9.45G [00:07<00:21, 327MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 26%|โโโ | 2.46G/9.45G [00:07<00:20, 337MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.85G/9.45G [00:07<00:23, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.51G/9.45G [00:07<00:20, 343MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 31%|โโโ | 2.88G/9.45G [00:07<00:23, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 31%|โโโ | 2.92G/9.45G [00:07<00:23, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.55G/9.45G [00:07<00:21, 328MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 31%|โโโ | 2.95G/9.45G [00:07<00:22, 292MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 27%|โโโ | 2.59G/9.45G [00:08<00:22, 304MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 32%|โโโโ | 2.98G/9.45G [00:08<00:22, 282MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 32%|โโโโ | 3.01G/9.45G [00:08<00:22, 290MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 28%|โโโ | 2.62G/9.45G [00:08<00:23, 294MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 32%|โโโโ | 3.04G/9.45G [00:08<00:23, 273MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 28%|โโโ | 2.65G/9.45G [00:08<00:23, 284MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 28%|โโโ | 2.68G/9.45G [00:08<00:23, 287MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.07G/9.45G [00:08<00:25, 255MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 29%|โโโ | 2.72G/9.45G [00:08<00:24, 281MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.10G/9.45G [00:08<00:25, 253MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 29%|โโโ | 2.75G/9.45G [00:08<00:24, 276MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.15G/9.45G [00:08<00:23, 274MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.79G/9.45G [00:08<00:22, 294MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 34%|โโโโ | 3.19G/9.45G [00:08<00:22, 279MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.83G/9.45G [00:08<00:21, 304MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 34%|โโโโ | 3.23G/9.45G [00:08<00:21, 288MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 30%|โโโ | 2.86G/9.45G [00:08<00:21, 306MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 35%|โโโโ | 3.26G/9.45G [00:09<00:21, 292MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 31%|โโโ | 2.90G/9.45G [00:09<00:20, 324MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 35%|โโโโ | 3.29G/9.45G [00:09<00:20, 294MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 31%|โโโ | 2.95G/9.45G [00:09<00:19, 342MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 35%|โโโโ | 3.32G/9.45G [00:09<00:21, 288MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 32%|โโโโ | 2.99G/9.45G [00:09<00:18, 348MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 36%|โโโโ | 3.37G/9.45G [00:09<00:20, 300MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 32%|โโโโ | 3.03G/9.45G [00:09<00:19, 337MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 36%|โโโโ | 3.41G/9.45G [00:09<00:19, 313MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.07G/9.45G [00:09<00:18, 341MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 37%|โโโโ | 3.45G/9.45G [00:09<00:19, 316MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.11G/9.45G [00:09<00:18, 342MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 37%|โโโโ | 3.49G/9.45G [00:09<00:18, 322MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 33%|โโโโ | 3.16G/9.45G [00:09<00:17, 352MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 38%|โโโโ | 3.54G/9.45G [00:09<00:16, 360MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 34%|โโโโ | 3.20G/9.45G [00:09<00:18, 336MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 38%|โโโโ | 3.59G/9.45G [00:10<00:17, 344MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 34%|โโโโ | 3.24G/9.45G [00:10<00:18, 338MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 35%|โโโโ | 3.28G/9.45G [00:10<00:17, 348MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 38%|โโโโ | 3.63G/9.45G [00:10<00:16, 343MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 35%|โโโโ | 3.32G/9.45G [00:10<00:17, 348MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 39%|โโโโ | 3.67G/9.45G [00:10<00:16, 350MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 39%|โโโโ | 3.71G/9.45G [00:10<00:16, 340MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 36%|โโโโ | 3.37G/9.45G [00:10<00:17, 354MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 36%|โโโโ | 3.41G/9.45G [00:10<00:17, 351MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 37%|โโโโ | 3.45G/9.45G [00:10<00:17, 345MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 37%|โโโโ | 3.49G/9.45G [00:10<00:17, 349MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 40%|โโโโ | 3.75G/9.45G [00:10<00:26, 217MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:49 (running for 00:00:28.28)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 37%|โโโโ | 3.53G/9.45G [00:10<00:16, 350MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 40%|โโโโ | 3.79G/9.45G [00:10<00:25, 221MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 38%|โโโโ | 3.58G/9.45G [00:11<00:16, 357MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 40%|โโโโ | 3.82G/9.45G [00:11<00:25, 221MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 38%|โโโโ | 3.62G/9.45G [00:11<00:16, 355MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโ | 3.85G/9.45G [00:11<00:25, 224MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 39%|โโโโ | 3.66G/9.45G [00:11<00:16, 349MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโ | 3.88G/9.45G [00:11<00:22, 243MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 39%|โโโโ | 3.70G/9.45G [00:11<00:15, 360MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโโ | 3.91G/9.45G [00:11<00:22, 249MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 40%|โโโโ | 3.74G/9.45G [00:11<00:15, 365MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 42%|โโโโโ | 3.95G/9.45G [00:11<00:20, 273MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 40%|โโโโ | 3.79G/9.45G [00:11<00:16, 352MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 42%|โโโโโ | 4.00G/9.45G [00:11<00:18, 292MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.03G/9.45G [00:11<00:19, 285MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโ | 3.83G/9.45G [00:11<00:16, 339MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโ | 3.87G/9.45G [00:11<00:17, 321MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.07G/9.45G [00:11<00:17, 301MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.10G/9.45G [00:11<00:18, 291MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 41%|โโโโโ | 3.91G/9.45G [00:12<00:19, 286MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.13G/9.45G [00:12<00:21, 250MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 42%|โโโโโ | 3.94G/9.45G [00:12<00:20, 275MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.16G/9.45G [00:12<00:24, 220MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 42%|โโโโโ | 3.97G/9.45G [00:12<00:21, 260MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 42%|โโโโโ | 4.01G/9.45G [00:12<00:22, 242MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.19G/9.45G [00:12<00:25, 208MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.04G/9.45G [00:12<00:22, 240MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.23G/9.45G [00:12<00:27, 193MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.07G/9.45G [00:12<00:24, 222MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.25G/9.45G [00:12<00:27, 189MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.27G/9.45G [00:12<00:27, 185MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 43%|โโโโโ | 4.10G/9.45G [00:12<00:25, 210MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.29G/9.45G [00:13<00:29, 178MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.13G/9.45G [00:13<00:26, 204MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.31G/9.45G [00:13<00:29, 174MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.15G/9.45G [00:13<00:26, 201MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.33G/9.45G [00:13<00:29, 174MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.17G/9.45G [00:13<00:26, 198MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.35G/9.45G [00:13<00:31, 163MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 44%|โโโโโ | 4.19G/9.45G [00:13<00:27, 194MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.22G/9.45G [00:13<00:27, 193MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.37G/9.45G [00:13<00:31, 160MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.24G/9.45G [00:13<00:29, 177MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.39G/9.45G [00:13<00:31, 158MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.26G/9.45G [00:13<00:28, 184MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.41G/9.45G [00:13<00:32, 153MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.28G/9.45G [00:13<00:28, 180MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 45%|โโโโโ | 4.30G/9.45G [00:14<00:29, 174MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.44G/9.45G [00:14<00:42, 117MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.32G/9.45G [00:14<00:29, 172MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.47G/9.45G [00:14<00:32, 153MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.34G/9.45G [00:14<00:30, 166MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.50G/9.45G [00:14<00:29, 167MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.52G/9.45G [00:14<00:30, 162MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.54G/9.45G [00:14<00:31, 158MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.36G/9.45G [00:14<00:45, 112MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.56G/9.45G [00:14<00:31, 154MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 46%|โโโโโ | 4.39G/9.45G [00:14<00:39, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.58G/9.45G [00:15<00:31, 153MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.44G/9.45G [00:14<00:29, 172MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.47G/9.45G [00:15<00:26, 186MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.60G/9.45G [00:15<00:32, 150MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 47%|โโโโโ | 4.49G/9.45G [00:15<00:27, 178MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.62G/9.45G [00:15<00:32, 150MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.65G/9.45G [00:15<00:32, 149MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.51G/9.45G [00:15<00:29, 167MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.53G/9.45G [00:15<00:29, 165MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.67G/9.45G [00:15<00:32, 145MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.69G/9.45G [00:15<00:33, 144MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.55G/9.45G [00:15<00:30, 162MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 48%|โโโโโ | 4.57G/9.45G [00:15<00:31, 156MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.71G/9.45G [00:15<00:32, 146MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:54 (running for 00:00:33.28)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.59G/9.45G [00:15<00:31, 156MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.73G/9.45G [00:16<00:32, 145MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.61G/9.45G [00:16<00:31, 153MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.75G/9.45G [00:16<00:33, 142MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.63G/9.45G [00:16<00:31, 155MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.77G/9.45G [00:16<00:33, 141MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.66G/9.45G [00:16<00:30, 155MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.79G/9.45G [00:16<00:32, 143MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 49%|โโโโโ | 4.68G/9.45G [00:16<00:31, 150MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.81G/9.45G [00:16<00:33, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.70G/9.45G [00:16<00:32, 145MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.83G/9.45G [00:16<00:33, 138MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.72G/9.45G [00:16<00:32, 145MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโโ | 4.85G/9.45G [00:16<00:33, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.74G/9.45G [00:16<00:33, 142MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.88G/9.45G [00:17<00:34, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 50%|โโโโโ | 4.76G/9.45G [00:17<00:35, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.90G/9.45G [00:17<00:33, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.78G/9.45G [00:17<00:33, 141MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.92G/9.45G [00:17<00:33, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.80G/9.45G [00:17<00:33, 138MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโ | 4.82G/9.45G [00:17<00:33, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.94G/9.45G [00:17<00:34, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.96G/9.45G [00:17<00:34, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโโ | 4.84G/9.45G [00:17<00:33, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 51%|โโโโโโ | 4.87G/9.45G [00:17<00:33, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 4.98G/9.45G [00:17<00:34, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.89G/9.45G [00:18<00:33, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.00G/9.45G [00:18<00:34, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.02G/9.45G [00:18<00:34, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.91G/9.45G [00:18<00:33, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.93G/9.45G [00:18<00:33, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.04G/9.45G [00:18<00:33, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.06G/9.45G [00:18<00:33, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 52%|โโโโโโ | 4.95G/9.45G [00:18<00:33, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 4.97G/9.45G [00:18<00:33, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.09G/9.45G [00:18<00:33, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.11G/9.45G [00:18<00:33, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 4.99G/9.45G [00:18<00:32, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.01G/9.45G [00:18<00:32, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.13G/9.45G [00:19<00:33, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.03G/9.45G [00:19<00:32, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.15G/9.45G [00:19<00:33, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 53%|โโโโโโ | 5.05G/9.45G [00:19<00:32, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.17G/9.45G [00:19<00:33, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.08G/9.45G [00:19<00:32, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.19G/9.45G [00:19<00:32, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.10G/9.45G [00:19<00:32, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.21G/9.45G [00:19<00:33, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.12G/9.45G [00:19<00:32, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.23G/9.45G [00:19<00:32, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 54%|โโโโโโ | 5.14G/9.45G [00:19<00:32, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.25G/9.45G [00:20<00:32, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.16G/9.45G [00:20<00:31, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.27G/9.45G [00:20<00:31, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.18G/9.45G [00:20<00:31, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.30G/9.45G [00:20<00:32, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.20G/9.45G [00:20<00:32, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.32G/9.45G [00:20<00:31, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.22G/9.45G [00:20<00:31, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.34G/9.45G [00:20<00:31, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 55%|โโโโโโ | 5.24G/9.45G [00:20<00:31, 136MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:58:59 (running for 00:00:38.28)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.36G/9.45G [00:20<00:31, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.26G/9.45G [00:20<00:30, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.38G/9.45G [00:20<00:31, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.28G/9.45G [00:21<00:31, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.40G/9.45G [00:21<00:31, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.31G/9.45G [00:21<00:31, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.42G/9.45G [00:21<00:31, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 56%|โโโโโโ | 5.33G/9.45G [00:21<00:31, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.44G/9.45G [00:21<00:30, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.35G/9.45G [00:21<00:30, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.46G/9.45G [00:21<00:30, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.37G/9.45G [00:21<00:30, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.48G/9.45G [00:21<00:30, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.39G/9.45G [00:21<00:30, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.51G/9.45G [00:21<00:30, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.41G/9.45G [00:21<00:31, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.53G/9.45G [00:22<00:30, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 57%|โโโโโโ | 5.43G/9.45G [00:22<00:29, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.55G/9.45G [00:22<00:30, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.45G/9.45G [00:22<00:29, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.57G/9.45G [00:22<00:29, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.47G/9.45G [00:22<00:30, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.59G/9.45G [00:22<00:29, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.49G/9.45G [00:22<00:29, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 58%|โโโโโโ | 5.52G/9.45G [00:22<00:30, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.61G/9.45G [00:22<00:29, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.63G/9.45G [00:22<00:29, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.54G/9.45G [00:22<00:30, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.56G/9.45G [00:23<00:29, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.65G/9.45G [00:23<00:29, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.67G/9.45G [00:23<00:29, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.58G/9.45G [00:23<00:29, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.69G/9.45G [00:23<00:29, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.60G/9.45G [00:23<00:29, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.71G/9.45G [00:23<00:28, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 59%|โโโโโโ | 5.62G/9.45G [00:23<00:28, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.74G/9.45G [00:23<00:29, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.64G/9.45G [00:23<00:28, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.66G/9.45G [00:23<00:27, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.76G/9.45G [00:23<00:28, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.68G/9.45G [00:24<00:27, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.78G/9.45G [00:24<00:28, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 60%|โโโโโโ | 5.70G/9.45G [00:24<00:28, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.73G/9.45G [00:24<00:28, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโโ | 5.80G/9.45G [00:24<00:35, 102MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.75G/9.45G [00:24<00:28, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.84G/9.45G [00:24<00:26, 139MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.86G/9.45G [00:24<00:26, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโ | 5.77G/9.45G [00:24<00:38, 96.5MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.88G/9.45G [00:24<00:26, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโโ | 5.79G/9.45G [00:25<00:33, 109MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 61%|โโโโโโโ | 5.81G/9.45G [00:25<00:28, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.90G/9.45G [00:25<00:26, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.84G/9.45G [00:25<00:23, 152MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.92G/9.45G [00:25<00:26, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.86G/9.45G [00:25<00:24, 148MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.95G/9.45G [00:25<00:26, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.88G/9.45G [00:25<00:25, 142MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.97G/9.45G [00:25<00:26, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.99G/9.45G [00:25<00:27, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 62%|โโโโโโโ | 5.90G/9.45G [00:25<00:26, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.92G/9.45G [00:25<00:26, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.01G/9.45G [00:25<00:26, 129MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:04 (running for 00:00:43.29)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.95G/9.45G [00:26<00:25, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.03G/9.45G [00:26<00:26, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.97G/9.45G [00:26<00:26, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.05G/9.45G [00:26<00:26, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 63%|โโโโโโโ | 5.99G/9.45G [00:26<00:27, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.07G/9.45G [00:26<00:26, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.01G/9.45G [00:26<00:25, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.09G/9.45G [00:26<00:26, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.03G/9.45G [00:26<00:25, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.11G/9.45G [00:26<00:26, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.05G/9.45G [00:26<00:26, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.13G/9.45G [00:26<00:26, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.07G/9.45G [00:27<00:25, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.16G/9.45G [00:27<00:25, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 64%|โโโโโโโ | 6.09G/9.45G [00:27<00:26, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.18G/9.45G [00:27<00:25, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.11G/9.45G [00:27<00:25, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.20G/9.45G [00:27<00:25, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.13G/9.45G [00:27<00:25, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.22G/9.45G [00:27<00:25, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.16G/9.45G [00:27<00:24, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.24G/9.45G [00:27<00:25, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 65%|โโโโโโโ | 6.18G/9.45G [00:27<00:25, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.26G/9.45G [00:27<00:25, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.20G/9.45G [00:27<00:24, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.28G/9.45G [00:28<00:25, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.22G/9.45G [00:28<00:24, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.30G/9.45G [00:28<00:24, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.24G/9.45G [00:28<00:25, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.32G/9.45G [00:28<00:24, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.26G/9.45G [00:28<00:24, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.34G/9.45G [00:28<00:24, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 66%|โโโโโโโ | 6.28G/9.45G [00:28<00:23, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.30G/9.45G [00:28<00:23, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.36G/9.45G [00:28<00:24, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.39G/9.45G [00:28<00:24, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.32G/9.45G [00:28<00:24, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.41G/9.45G [00:28<00:23, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.34G/9.45G [00:29<00:23, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 67%|โโโโโโโ | 6.36G/9.45G [00:29<00:23, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.43G/9.45G [00:29<00:24, 124MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.45G/9.45G [00:29<00:23, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.39G/9.45G [00:29<00:24, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.41G/9.45G [00:29<00:22, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.47G/9.45G [00:29<00:23, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.49G/9.45G [00:29<00:22, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.43G/9.45G [00:29<00:22, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.51G/9.45G [00:29<00:23, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.45G/9.45G [00:29<00:22, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.53G/9.45G [00:29<00:22, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 68%|โโโโโโโ | 6.47G/9.45G [00:30<00:22, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.55G/9.45G [00:30<00:22, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.49G/9.45G [00:30<00:22, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.57G/9.45G [00:30<00:22, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.51G/9.45G [00:30<00:22, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.60G/9.45G [00:30<00:21, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.53G/9.45G [00:30<00:21, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.62G/9.45G [00:30<00:22, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 69%|โโโโโโโ | 6.55G/9.45G [00:30<00:22, 129MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:09 (running for 00:00:48.29)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.64G/9.45G [00:30<00:21, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.57G/9.45G [00:30<00:22, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.66G/9.45G [00:30<00:22, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.60G/9.45G [00:31<00:21, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.68G/9.45G [00:31<00:21, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.62G/9.45G [00:31<00:21, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.70G/9.45G [00:31<00:21, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.64G/9.45G [00:31<00:21, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 70%|โโโโโโโ | 6.66G/9.45G [00:31<00:21, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.72G/9.45G [00:31<00:21, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโโ | 6.74G/9.45G [00:31<00:21, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.68G/9.45G [00:31<00:20, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.70G/9.45G [00:31<00:20, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.76G/9.45G [00:31<00:20, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.78G/9.45G [00:31<00:21, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโ | 6.72G/9.45G [00:31<00:20, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 71%|โโโโโโโโ | 6.74G/9.45G [00:32<00:20, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.81G/9.45G [00:32<00:20, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.83G/9.45G [00:32<00:20, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.76G/9.45G [00:32<00:20, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.85G/9.45G [00:32<00:20, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.78G/9.45G [00:32<00:20, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.81G/9.45G [00:32<00:20, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.87G/9.45G [00:32<00:20, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.89G/9.45G [00:32<00:19, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.83G/9.45G [00:32<00:20, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.91G/9.45G [00:32<00:20, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 72%|โโโโโโโโ | 6.85G/9.45G [00:32<00:20, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.87G/9.45G [00:33<00:19, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.93G/9.45G [00:33<00:19, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.95G/9.45G [00:33<00:19, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.89G/9.45G [00:33<00:19, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.91G/9.45G [00:33<00:19, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.97G/9.45G [00:33<00:19, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.99G/9.45G [00:33<00:18, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 73%|โโโโโโโโ | 6.93G/9.45G [00:33<00:19, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.95G/9.45G [00:33<00:18, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 7.01G/9.45G [00:33<00:18, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.97G/9.45G [00:33<00:18, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 7.04G/9.45G [00:33<00:19, 124MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 6.99G/9.45G [00:34<00:18, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.06G/9.45G [00:34<00:18, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 7.01G/9.45G [00:34<00:18, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.08G/9.45G [00:34<00:18, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.10G/9.45G [00:34<00:18, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 74%|โโโโโโโโ | 7.04G/9.45G [00:34<00:18, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.06G/9.45G [00:34<00:18, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.12G/9.45G [00:34<00:18, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.14G/9.45G [00:34<00:17, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.08G/9.45G [00:34<00:17, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.16G/9.45G [00:34<00:17, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.10G/9.45G [00:34<00:21, 110MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.18G/9.45G [00:35<00:17, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 75%|โโโโโโโโ | 7.12G/9.45G [00:35<00:20, 114MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.20G/9.45G [00:35<00:17, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.14G/9.45G [00:35<00:19, 121MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.22G/9.45G [00:35<00:17, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.17G/9.45G [00:35<00:15, 151MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.19G/9.45G [00:35<00:15, 148MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.25G/9.45G [00:35<00:17, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.27G/9.45G [00:35<00:16, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 76%|โโโโโโโโ | 7.21G/9.45G [00:35<00:15, 144MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.24G/9.45G [00:35<00:15, 141MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.29G/9.45G [00:35<00:16, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.31G/9.45G [00:36<00:16, 128MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:14 (running for 00:00:53.29)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.26G/9.45G [00:36<00:16, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.28G/9.45G [00:36<00:15, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.33G/9.45G [00:36<00:16, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.35G/9.45G [00:36<00:16, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.30G/9.45G [00:36<00:16, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.37G/9.45G [00:36<00:16, 126MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 77%|โโโโโโโโ | 7.32G/9.45G [00:36<00:15, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.34G/9.45G [00:36<00:15, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.39G/9.45G [00:36<00:15, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.41G/9.45G [00:36<00:15, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.36G/9.45G [00:36<00:15, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.38G/9.45G [00:36<00:15, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.43G/9.45G [00:37<00:15, 127MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.46G/9.45G [00:37<00:15, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 78%|โโโโโโโโ | 7.40G/9.45G [00:37<00:15, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.42G/9.45G [00:37<00:15, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.48G/9.45G [00:37<00:15, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.44G/9.45G [00:37<00:15, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.50G/9.45G [00:37<00:15, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.47G/9.45G [00:37<00:14, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.52G/9.45G [00:37<00:17, 107MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.49G/9.45G [00:37<00:14, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.55G/9.45G [00:37<00:13, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 79%|โโโโโโโโ | 7.51G/9.45G [00:37<00:14, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.57G/9.45G [00:38<00:13, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.53G/9.45G [00:38<00:14, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.59G/9.45G [00:38<00:14, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.55G/9.45G [00:38<00:14, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.61G/9.45G [00:38<00:13, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.57G/9.45G [00:38<00:14, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.63G/9.45G [00:38<00:13, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 80%|โโโโโโโโ | 7.59G/9.45G [00:38<00:14, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.65G/9.45G [00:38<00:13, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.61G/9.45G [00:38<00:13, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.68G/9.45G [00:38<00:13, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.63G/9.45G [00:38<00:13, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโโ | 7.70G/9.45G [00:39<00:13, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.65G/9.45G [00:39<00:13, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.72G/9.45G [00:39<00:13, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโ | 7.68G/9.45G [00:39<00:13, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.74G/9.45G [00:39<00:13, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 81%|โโโโโโโโโ | 7.70G/9.45G [00:39<00:13, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.72G/9.45G [00:39<00:13, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.76G/9.45G [00:39<00:13, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.78G/9.45G [00:39<00:12, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.74G/9.45G [00:39<00:13, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.80G/9.45G [00:39<00:12, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.76G/9.45G [00:39<00:12, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.82G/9.45G [00:40<00:12, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 82%|โโโโโโโโโ | 7.78G/9.45G [00:40<00:12, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.84G/9.45G [00:40<00:12, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.80G/9.45G [00:40<00:12, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.86G/9.45G [00:40<00:12, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.82G/9.45G [00:40<00:12, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.89G/9.45G [00:40<00:11, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.84G/9.45G [00:40<00:12, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.86G/9.45G [00:40<00:12, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.91G/9.45G [00:40<00:11, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.93G/9.45G [00:40<00:11, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 83%|โโโโโโโโโ | 7.89G/9.45G [00:40<00:11, 135MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:19 (running for 00:00:58.29)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.91G/9.45G [00:40<00:11, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.95G/9.45G [00:40<00:11, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.97G/9.45G [00:41<00:11, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.93G/9.45G [00:41<00:11, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 7.99G/9.45G [00:41<00:11, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.95G/9.45G [00:41<00:11, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 84%|โโโโโโโโโ | 7.97G/9.45G [00:41<00:11, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.01G/9.45G [00:41<00:11, 128MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.03G/9.45G [00:41<00:10, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 7.99G/9.45G [00:41<00:11, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.01G/9.45G [00:41<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.05G/9.45G [00:41<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.07G/9.45G [00:41<00:10, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.03G/9.45G [00:41<00:10, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.05G/9.45G [00:42<00:10, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.10G/9.45G [00:42<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.12G/9.45G [00:42<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 85%|โโโโโโโโโ | 8.07G/9.45G [00:42<00:10, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.14G/9.45G [00:42<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.10G/9.45G [00:42<00:10, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.12G/9.45G [00:42<00:10, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.16G/9.45G [00:42<00:09, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.18G/9.45G [00:42<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.14G/9.45G [00:42<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 86%|โโโโโโโโโ | 8.16G/9.45G [00:42<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.20G/9.45G [00:42<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.18G/9.45G [00:43<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.22G/9.45G [00:43<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.20G/9.45G [00:43<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.24G/9.45G [00:43<00:09, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.22G/9.45G [00:43<00:09, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.26G/9.45G [00:43<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.24G/9.45G [00:43<00:09, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.28G/9.45G [00:43<00:08, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 87%|โโโโโโโโโ | 8.26G/9.45G [00:43<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.30G/9.45G [00:43<00:08, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.28G/9.45G [00:43<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.33G/9.45G [00:43<00:08, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.30G/9.45G [00:43<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.35G/9.45G [00:44<00:08, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.33G/9.45G [00:44<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.37G/9.45G [00:44<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 88%|โโโโโโโโโ | 8.35G/9.45G [00:44<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.39G/9.45G [00:44<00:08, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.37G/9.45G [00:44<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.41G/9.45G [00:44<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.39G/9.45G [00:44<00:07, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.43G/9.45G [00:44<00:07, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.41G/9.45G [00:44<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.45G/9.45G [00:44<00:07, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.43G/9.45G [00:44<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.47G/9.45G [00:44<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.49G/9.45G [00:45<00:07, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 89%|โโโโโโโโโ | 8.45G/9.45G [00:45<00:10, 96.7MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.51G/9.45G [00:45<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.48G/9.45G [00:45<00:07, 124MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.54G/9.45G [00:45<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.51G/9.45G [00:45<00:06, 152MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.56G/9.45G [00:45<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 90%|โโโโโโโโโ | 8.54G/9.45G [00:45<00:06, 149MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.58G/9.45G [00:45<00:06, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.56G/9.45G [00:45<00:06, 141MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:24 (running for 00:01:03.30)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.60G/9.45G [00:45<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.58G/9.45G [00:46<00:06, 141MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.62G/9.45G [00:46<00:06, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.60G/9.45G [00:46<00:06, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโโ| 8.64G/9.45G [00:46<00:06, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโ | 8.62G/9.45G [00:46<00:06, 138MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.66G/9.45G [00:46<00:06, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 91%|โโโโโโโโโโ| 8.64G/9.45G [00:46<00:05, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.68G/9.45G [00:46<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.66G/9.45G [00:46<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.70G/9.45G [00:46<00:05, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.68G/9.45G [00:46<00:05, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.72G/9.45G [00:46<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.70G/9.45G [00:46<00:05, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.75G/9.45G [00:47<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 92%|โโโโโโโโโโ| 8.72G/9.45G [00:47<00:05, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.77G/9.45G [00:47<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.75G/9.45G [00:47<00:05, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.79G/9.45G [00:47<00:05, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.77G/9.45G [00:47<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.81G/9.45G [00:47<00:04, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.79G/9.45G [00:47<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.81G/9.45G [00:47<00:04, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.83G/9.45G [00:47<00:05, 117MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.86G/9.45G [00:47<00:04, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 93%|โโโโโโโโโโ| 8.83G/9.45G [00:47<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.85G/9.45G [00:48<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.88G/9.45G [00:48<00:04, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.90G/9.45G [00:48<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.87G/9.45G [00:48<00:04, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.89G/9.45G [00:48<00:04, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.92G/9.45G [00:48<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 94%|โโโโโโโโโโ| 8.91G/9.45G [00:48<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.94G/9.45G [00:48<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.93G/9.45G [00:48<00:03, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.97G/9.45G [00:48<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.95G/9.45G [00:48<00:03, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.99G/9.45G [00:48<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 8.98G/9.45G [00:49<00:03, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 9.01G/9.45G [00:49<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 9.00G/9.45G [00:49<00:03, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.03G/9.45G [00:49<00:03, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 95%|โโโโโโโโโโ| 9.02G/9.45G [00:49<00:03, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.05G/9.45G [00:49<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.04G/9.45G [00:49<00:03, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.07G/9.45G [00:49<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.06G/9.45G [00:49<00:02, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.09G/9.45G [00:49<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.08G/9.45G [00:49<00:02, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.11G/9.45G [00:49<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 96%|โโโโโโโโโโ| 9.10G/9.45G [00:49<00:02, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.13G/9.45G [00:50<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.12G/9.45G [00:50<00:02, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.15G/9.45G [00:50<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.14G/9.45G [00:50<00:02, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.18G/9.45G [00:50<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.16G/9.45G [00:50<00:02, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.20G/9.45G [00:50<00:01, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.19G/9.45G [00:50<00:01, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.22G/9.45G [00:50<00:01, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 97%|โโโโโโโโโโ| 9.21G/9.45G [00:50<00:01, 135MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:29 (running for 00:01:08.30)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.24G/9.45G [00:50<00:01, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.23G/9.45G [00:50<00:01, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.26G/9.45G [00:51<00:01, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.25G/9.45G [00:51<00:01, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.28G/9.45G [00:51<00:01, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.27G/9.45G [00:51<00:01, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.30G/9.45G [00:51<00:01, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 98%|โโโโโโโโโโ| 9.29G/9.45G [00:51<00:01, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.32G/9.45G [00:51<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.31G/9.45G [00:51<00:01, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.34G/9.45G [00:51<00:00, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.33G/9.45G [00:51<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.36G/9.45G [00:51<00:00, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.35G/9.45G [00:51<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.38G/9.45G [00:51<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.37G/9.45G [00:52<00:00, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.41G/9.45G [00:52<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 99%|โโโโโโโโโโ| 9.40G/9.45G [00:52<00:00, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.43G/9.45G [00:52<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.42G/9.45G [00:52<00:00, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.45G/9.45G [00:52<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.44G/9.45G [00:52<00:00, 134MB/s]\u001b[A\n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.45G/9.45G [00:52<00:00, 180MB/s]\n",
+ "Downloading shards: 50%|โโโโโ | 1/2 [00:52<00:52, 52.57s/it]\n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 0%| | 0.00/1.95G [00:00, ?B/s]\u001b[A\n",
+ "Downloading (โฆ)l-00001-of-00002.bin: 100%|โโโโโโโโโโ| 9.45G/9.45G [00:52<00:00, 180MB/s]\n",
+ "Downloading shards: 50%|โโโโโ | 1/2 [00:52<00:52, 52.68s/it]\n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 0%| | 0.00/1.95G [00:00, ?B/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 2%|โ | 31.5M/1.95G [00:00<00:11, 172MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 3%|โ | 52.4M/1.95G [00:00<00:11, 163MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 2%|โ | 31.5M/1.95G [00:00<00:09, 212MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 4%|โ | 73.4M/1.95G [00:00<00:12, 149MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 3%|โ | 62.9M/1.95G [00:00<00:11, 160MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 4%|โ | 83.9M/1.95G [00:00<00:12, 149MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 5%|โ | 94.4M/1.95G [00:00<00:13, 143MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 6%|โ | 115M/1.95G [00:00<00:13, 140MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 5%|โ | 105M/1.95G [00:00<00:12, 144MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 6%|โ | 126M/1.95G [00:00<00:13, 140MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 7%|โ | 136M/1.95G [00:00<00:13, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 8%|โ | 157M/1.95G [00:01<00:13, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 8%|โ | 147M/1.95G [00:01<00:12, 139MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 9%|โ | 168M/1.95G [00:01<00:12, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 9%|โ | 178M/1.95G [00:01<00:13, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 10%|โ | 189M/1.95G [00:01<00:12, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 10%|โ | 199M/1.95G [00:01<00:13, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 11%|โ | 210M/1.95G [00:01<00:12, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 11%|โโ | 220M/1.95G [00:01<00:13, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 12%|โโ | 231M/1.95G [00:01<00:12, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 12%|โโ | 241M/1.95G [00:01<00:12, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 13%|โโ | 252M/1.95G [00:01<00:12, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 13%|โโ | 262M/1.95G [00:01<00:12, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 14%|โโ | 273M/1.95G [00:01<00:12, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 15%|โโ | 283M/1.95G [00:02<00:12, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 15%|โโ | 294M/1.95G [00:02<00:12, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 16%|โโ | 304M/1.95G [00:02<00:12, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 16%|โโ | 315M/1.95G [00:02<00:12, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 17%|โโ | 325M/1.95G [00:02<00:12, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 17%|โโ | 336M/1.95G [00:02<00:11, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 18%|โโ | 346M/1.95G [00:02<00:15, 105MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 19%|โโ | 377M/1.95G [00:02<00:11, 139MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 20%|โโ | 398M/1.95G [00:02<00:11, 139MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 18%|โโ | 357M/1.95G [00:02<00:18, 88.2MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 20%|โโ | 398M/1.95G [00:02<00:11, 135MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 22%|โโโ | 419M/1.95G [00:03<00:11, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 23%|โโโ | 440M/1.95G [00:03<00:11, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 22%|โโโ | 430M/1.95G [00:03<00:09, 154MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:34 (running for 00:01:13.30)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 23%|โโโ | 451M/1.95G [00:03<00:10, 148MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 24%|โโโ | 461M/1.95G [00:03<00:11, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 24%|โโโ | 472M/1.95G [00:03<00:10, 145MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 25%|โโโ | 482M/1.95G [00:03<00:11, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 25%|โโโ | 493M/1.95G [00:03<00:10, 141MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 26%|โโโ | 503M/1.95G [00:03<00:10, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 26%|โโโ | 514M/1.95G [00:03<00:10, 140MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 27%|โโโ | 524M/1.95G [00:03<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 27%|โโโ | 535M/1.95G [00:03<00:10, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 28%|โโโ | 545M/1.95G [00:04<00:10, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 29%|โโโ | 556M/1.95G [00:04<00:10, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 30%|โโโ | 577M/1.95G [00:04<00:10, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 29%|โโโ | 566M/1.95G [00:04<00:11, 125MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 30%|โโโ | 587M/1.95G [00:04<00:10, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 31%|โโโ | 598M/1.95G [00:04<00:09, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 32%|โโโโ | 619M/1.95G [00:04<00:09, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 31%|โโโ | 608M/1.95G [00:04<00:10, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 32%|โโโโ | 629M/1.95G [00:04<00:10, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 33%|โโโโ | 640M/1.95G [00:04<00:09, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 33%|โโโโ | 650M/1.95G [00:04<00:10, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 34%|โโโโ | 661M/1.95G [00:04<00:09, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 35%|โโโโ | 682M/1.95G [00:04<00:09, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 34%|โโโโ | 671M/1.95G [00:05<00:09, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 35%|โโโโ | 692M/1.95G [00:05<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 36%|โโโโ | 703M/1.95G [00:05<00:09, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 37%|โโโโ | 724M/1.95G [00:05<00:09, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 37%|โโโโ | 713M/1.95G [00:05<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 38%|โโโโ | 734M/1.95G [00:05<00:09, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 38%|โโโโ | 744M/1.95G [00:05<00:08, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 39%|โโโโ | 765M/1.95G [00:05<00:08, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 39%|โโโโ | 755M/1.95G [00:05<00:09, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 40%|โโโโ | 776M/1.95G [00:05<00:08, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 40%|โโโโ | 786M/1.95G [00:05<00:08, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 41%|โโโโโ | 807M/1.95G [00:05<00:08, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 41%|โโโโ | 797M/1.95G [00:05<00:08, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 42%|โโโโโ | 828M/1.95G [00:06<00:08, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 42%|โโโโโ | 818M/1.95G [00:06<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 44%|โโโโโ | 849M/1.95G [00:06<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 43%|โโโโโ | 839M/1.95G [00:06<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 45%|โโโโโ | 870M/1.95G [00:06<00:08, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 44%|โโโโโ | 860M/1.95G [00:06<00:08, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 45%|โโโโโ | 881M/1.95G [00:06<00:08, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 46%|โโโโโ | 891M/1.95G [00:06<00:08, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 47%|โโโโโ | 912M/1.95G [00:06<00:07, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 46%|โโโโโ | 902M/1.95G [00:06<00:08, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 47%|โโโโโ | 923M/1.95G [00:06<00:07, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 48%|โโโโโ | 933M/1.95G [00:06<00:07, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 49%|โโโโโ | 954M/1.95G [00:07<00:07, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 48%|โโโโโ | 944M/1.95G [00:07<00:07, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 50%|โโโโโ | 975M/1.95G [00:07<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 49%|โโโโโ | 965M/1.95G [00:07<00:07, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 51%|โโโโโ | 996M/1.95G [00:07<00:07, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 51%|โโโโโ | 986M/1.95G [00:07<00:07, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 52%|โโโโโโ | 1.02G/1.95G [00:07<00:06, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 52%|โโโโโโ | 1.01G/1.95G [00:07<00:07, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 53%|โโโโโโ | 1.04G/1.95G [00:07<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 53%|โโโโโโ | 1.03G/1.95G [00:07<00:07, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 54%|โโโโโโ | 1.06G/1.95G [00:07<00:06, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 54%|โโโโโโ | 1.05G/1.95G [00:07<00:06, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 55%|โโโโโโ | 1.07G/1.95G [00:08<00:06, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 55%|โโโโโโ | 1.08G/1.95G [00:07<00:06, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 56%|โโโโโโ | 1.10G/1.95G [00:08<00:06, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 56%|โโโโโโ | 1.09G/1.95G [00:08<00:06, 132MB/s]\u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:39 (running for 00:01:18.30)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 57%|โโโโโโ | 1.11G/1.95G [00:08<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 58%|โโโโโโ | 1.12G/1.95G [00:08<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 59%|โโโโโโ | 1.14G/1.95G [00:08<00:06, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 58%|โโโโโโ | 1.13G/1.95G [00:08<00:06, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 60%|โโโโโโ | 1.16G/1.95G [00:08<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 59%|โโโโโโ | 1.15G/1.95G [00:08<00:06, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 61%|โโโโโโ | 1.18G/1.95G [00:08<00:05, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 60%|โโโโโโ | 1.17G/1.95G [00:08<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 62%|โโโโโโโ | 1.21G/1.95G [00:08<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 61%|โโโโโโโ | 1.20G/1.95G [00:09<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 63%|โโโโโโโ | 1.23G/1.95G [00:09<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 62%|โโโโโโโ | 1.22G/1.95G [00:09<00:05, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 64%|โโโโโโโ | 1.25G/1.95G [00:09<00:05, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 63%|โโโโโโโ | 1.24G/1.95G [00:09<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 65%|โโโโโโโ | 1.27G/1.95G [00:09<00:05, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 65%|โโโโโโโ | 1.26G/1.95G [00:09<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 66%|โโโโโโโ | 1.29G/1.95G [00:09<00:05, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 66%|โโโโโโโ | 1.28G/1.95G [00:09<00:05, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 67%|โโโโโโโ | 1.31G/1.95G [00:09<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 67%|โโโโโโโ | 1.30G/1.95G [00:09<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 68%|โโโโโโโ | 1.33G/1.95G [00:09<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 68%|โโโโโโโ | 1.32G/1.95G [00:09<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 69%|โโโโโโโ | 1.35G/1.95G [00:10<00:04, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 69%|โโโโโโโ | 1.34G/1.95G [00:10<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 70%|โโโโโโโ | 1.37G/1.95G [00:10<00:04, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 70%|โโโโโโโ | 1.36G/1.95G [00:10<00:04, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 72%|โโโโโโโโ | 1.39G/1.95G [00:10<00:04, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 71%|โโโโโโโ | 1.38G/1.95G [00:10<00:04, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 73%|โโโโโโโโ | 1.42G/1.95G [00:10<00:03, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 72%|โโโโโโโโ | 1.41G/1.95G [00:10<00:04, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 74%|โโโโโโโโ | 1.44G/1.95G [00:10<00:03, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 73%|โโโโโโโโ | 1.43G/1.95G [00:10<00:04, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 75%|โโโโโโโโ | 1.46G/1.95G [00:10<00:03, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 74%|โโโโโโโโ | 1.45G/1.95G [00:10<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 76%|โโโโโโโโ | 1.48G/1.95G [00:11<00:03, 129MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 75%|โโโโโโโโ | 1.47G/1.95G [00:11<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 77%|โโโโโโโโ | 1.50G/1.95G [00:11<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 76%|โโโโโโโโ | 1.49G/1.95G [00:11<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 78%|โโโโโโโโ | 1.52G/1.95G [00:11<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 77%|โโโโโโโโ | 1.51G/1.95G [00:11<00:03, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 79%|โโโโโโโโ | 1.54G/1.95G [00:11<00:03, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 79%|โโโโโโโโ | 1.53G/1.95G [00:11<00:03, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 80%|โโโโโโโโ | 1.56G/1.95G [00:11<00:02, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 80%|โโโโโโโโ | 1.55G/1.95G [00:11<00:03, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 81%|โโโโโโโโ | 1.58G/1.95G [00:11<00:02, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 81%|โโโโโโโโ | 1.57G/1.95G [00:11<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 82%|โโโโโโโโโ | 1.60G/1.95G [00:11<00:02, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 82%|โโโโโโโโโ | 1.59G/1.95G [00:12<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 83%|โโโโโโโโโ | 1.63G/1.95G [00:12<00:02, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 83%|โโโโโโโโโ | 1.61G/1.95G [00:12<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 84%|โโโโโโโโโ | 1.65G/1.95G [00:12<00:02, 130MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 84%|โโโโโโโโโ | 1.64G/1.95G [00:12<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 86%|โโโโโโโโโ | 1.67G/1.95G [00:12<00:02, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 85%|โโโโโโโโโ | 1.66G/1.95G [00:12<00:02, 131MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 87%|โโโโโโโโโ | 1.69G/1.95G [00:12<00:01, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 88%|โโโโโโโโโ | 1.71G/1.95G [00:12<00:01, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 86%|โโโโโโโโโ | 1.68G/1.95G [00:12<00:02, 98.3MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 88%|โโโโโโโโโ | 1.72G/1.95G [00:13<00:01, 142MB/s] \u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 89%|โโโโโโโโโ | 1.74G/1.95G [00:13<00:01, 140MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 89%|โโโโโโโโโ | 1.73G/1.95G [00:13<00:02, 97.3MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 90%|โโโโโโโโโ | 1.76G/1.95G [00:13<00:01, 132MB/s] \u001b[A\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:44 (running for 00:01:23.31)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 90%|โโโโโโโโโ | 1.76G/1.95G [00:13<00:01, 138MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 91%|โโโโโโโโโโ| 1.78G/1.95G [00:13<00:01, 137MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 92%|โโโโโโโโโโ| 1.79G/1.95G [00:13<00:01, 146MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 93%|โโโโโโโโโโ| 1.81G/1.95G [00:13<00:00, 144MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 93%|โโโโโโโโโโ| 1.80G/1.95G [00:13<00:01, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 94%|โโโโโโโโโโ| 1.84G/1.95G [00:13<00:00, 139MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 94%|โโโโโโโโโโ| 1.82G/1.95G [00:13<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 95%|โโโโโโโโโโ| 1.86G/1.95G [00:13<00:00, 136MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 95%|โโโโโโโโโโ| 1.85G/1.95G [00:13<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 96%|โโโโโโโโโโ| 1.88G/1.95G [00:14<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 96%|โโโโโโโโโโ| 1.87G/1.95G [00:14<00:00, 133MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 97%|โโโโโโโโโโ| 1.90G/1.95G [00:14<00:00, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 97%|โโโโโโโโโโ| 1.89G/1.95G [00:14<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 98%|โโโโโโโโโโ| 1.92G/1.95G [00:14<00:00, 134MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 98%|โโโโโโโโโโ| 1.91G/1.95G [00:14<00:00, 132MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 100%|โโโโโโโโโโ| 1.94G/1.95G [00:14<00:00, 135MB/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 99%|โโโโโโโโโโ| 1.93G/1.95G [00:14<00:00, 132MB/s]\u001b[A\n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 100%|โโโโโโโโโโ| 1.95G/1.95G [00:14<00:00, 133MB/s]\n",
+ "Downloading shards: 100%|โโโโโโโโโโ| 2/2 [01:07<00:00, 33.67s/it]\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=378, ip=10.128.30.22)\u001b[0m \n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 100%|โโโโโโโโโโ| 1.95G/1.95G [00:14<00:00, 130MB/s]\u001b[A\n",
+ "Downloading (โฆ)l-00002-of-00002.bin: 100%|โโโโโโโโโโ| 1.95G/1.95G [00:14<00:00, 132MB/s]\n",
+ "Downloading shards: 100%|โโโโโโโโโโ| 2/2 [01:07<00:00, 33.71s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:49 (running for 00:01:28.31)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]\n",
+ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:54 (running for 00:01:33.31)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 07:59:59 (running for 00:01:38.31)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:04 (running for 00:01:43.31)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:09 (running for 00:01:48.32)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:14 (running for 00:01:53.32)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:19 (running for 00:01:58.32)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:24 (running for 00:02:03.33)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:29 (running for 00:02:08.33)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:34 (running for 00:02:13.33)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:39 (running for 00:02:18.33)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:44 (running for 00:02:23.33)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:49 (running for 00:02:28.34)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:54 (running for 00:02:33.34)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:00:59 (running for 00:02:38.34)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:04 (running for 00:02:43.34)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:09 (running for 00:02:48.35)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:14 (running for 00:02:53.35)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading checkpoint shards: 50%|โโโโโ | 1/2 [01:25<01:25, 85.26s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:19 (running for 00:02:58.35)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:24 (running for 00:03:03.35)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:29 (running for 00:03:08.36)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading checkpoint shards: 100%|โโโโโโโโโโ| 2/2 [01:39<00:00, 49.95s/it]\n",
+ "Downloading (โฆ)neration_config.json: 100%|โโโโโโโโโโ| 147/147 [00:00<00:00, 44.3kB/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:34 (running for 00:03:13.36)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:39 (running for 00:03:18.36)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m trainable params: 9,437,184 || all params: 2,859,194,368 || trainable%: 0.33006444422319176\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m /tmp/ray/session_2023-07-27_07-21-47_353834_9/runtime_resources/pip/04a15979ef108d1f8e906345b347c268df9d6aa1/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ "\u001b[2m\u001b[36m(RayTrainWorker pid=277, ip=10.128.32.21)\u001b[0m warnings.warn(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:44 (running for 00:03:23.36)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:49 (running for 00:03:28.37)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:54 (running for 00:03:33.37)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:01:59 (running for 00:03:38.37)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:02:04 (running for 00:03:43.37)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:02:09 (running for 00:03:48.38)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 1.0/6 CPUs, 2.0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | RUNNING | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff537034a8f0299b4acc1e1f4e05000000 Worker ID: 20fd7164328a7217f296071d2bde261324cae0cd44aa1a5887306bbd Node ID: 37f6f748268c35754f1bf5790acbd3c6a9f245776da67ebc9830fc1d Worker IP address: 10.128.30.22 Worker port: 10007 Worker PID: 378 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.\n",
+ "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=345, ip=10.128.30.22)\u001b[0m 2023-07-27 08:02:12,849\tINFO utils.py:57 -- Worker 1 has failed.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result for HuggingFaceTrainer_06f2f_00000:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m date: 2023-07-27_07-58-29\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m experiment_id: 182233fc8cb24d72bfa113cb6a3f25bd\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m hostname: finetuneflan-worker-small-group-finetuneflan-6f6ft\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m node_ip: 10.128.30.22\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m pid: 345\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m timestamp: 1690469909\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m trial_id: 06f2f_00000\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Current time: 2023-07-27 08:02:13 (running for 00:03:52.79)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Memory usage on this node: 3.9/15.4 GiB \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Resources requested: 0/6 CPUs, 0/2 GPUs, 0.0/52.15 GiB heap, 0.0/11.29 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of trials: 1/1 (1 ERROR)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | ERROR | 10.128.30.22:345 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m Number of errored trials: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+--------------+-----------------------------------------------------------------------------------------------------------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | Trial name | # failures | error file |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m |--------------------------------+--------------+-----------------------------------------------------------------------------------------------------------------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m | HuggingFaceTrainer_06f2f_00000 | 1 | /home/ray/ray_results/HuggingFaceTrainer_2023-07-27_07-58-20/HuggingFaceTrainer_06f2f_00000_0_2023-07-27_07-58-22/error.txt |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m +--------------------------------+--------------+-----------------------------------------------------------------------------------------------------------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m 2023-07-27 08:02:13,669\tERROR trial_runner.py:993 -- Trial HuggingFaceTrainer_06f2f_00000: Error processing event.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m ray.exceptions.RayTaskError(RuntimeError): \u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m ray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \tclass_name: RayTrainWorker\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \tactor_id: 537034a8f0299b4acc1e1f4e05000000\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \tpid: 378\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \tnamespace: 79e19797-9a9d-4359-9e7e-135e143c02c0\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \tip: 10.128.30.22\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m The actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m The above exception was the direct cause of the following exception:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m \u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/trainable.py\", line 355, in train\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m raise skipped from exception_cause(skipped)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 325, in entrypoint\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m return self._trainable_func(\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 475, in _trainable_func\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m super()._trainable_func(self._merged_config, reporter, checkpoint_dir)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 651, in _trainable_func\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m output = fn()\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 390, in train_func\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m trainer.training_loop()\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 371, in training_loop\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m self._report(training_iterator)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 320, in _report\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m for results in training_iterator:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 225, in __next__\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m next_results = self._run_with_error_handling(self._fetch_next_result)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 188, in _run_with_error_handling\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m return func()\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 257, in _fetch_next_result\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m results = self._backend_executor.get_next_results()\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 390, in get_next_results\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m results = self.get_with_failure_handling(futures)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 483, in get_with_failure_handling\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m self._increment_failures()\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 533, in _increment_failures\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m raise exc.with_traceback(None) from self._last_failure\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m RuntimeError: Training has failed after 1 attempts. You can change the number of max failure attempts by setting the `max_retries` arg in your `Trainer`.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m 2023-07-27 08:02:13,782\tERROR tune.py:773 -- Trials did not complete: [HuggingFaceTrainer_06f2f_00000]\n",
+ "\u001b[2m\u001b[36m(train_fn pid=4614)\u001b[0m 2023-07-27 08:02:13,782\tINFO tune.py:777 -- Total run time: 232.95 seconds (232.79 seconds for the tuning loop).\n"
+ ]
+ },
+ {
+ "ename": "RayTaskError(RuntimeError)",
+ "evalue": "\u001b[36mray::train_fn()\u001b[39m (pid=4614, ip=10.128.28.7)\n File \"/tmp/ipykernel_14249/2624701892.py\", line 150, in train_fn\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 360, in fit\n raise result.error\nray.exceptions.RayTaskError(RuntimeError): \u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\nray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.\n\tclass_name: RayTrainWorker\n\tactor_id: 537034a8f0299b4acc1e1f4e05000000\n\tpid: 378\n\tnamespace: 79e19797-9a9d-4359-9e7e-135e143c02c0\n\tip: 10.128.30.22\nThe actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.\n\nThe above exception was the direct cause of the following exception:\n\n\u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/trainable.py\", line 355, in train\n raise skipped from exception_cause(skipped)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 325, in entrypoint\n return self._trainable_func(\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 475, in _trainable_func\n super()._trainable_func(self._merged_config, reporter, checkpoint_dir)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 651, in _trainable_func\n output = fn()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 390, in train_func\n trainer.training_loop()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 371, in training_loop\n self._report(training_iterator)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 320, in _report\n for results in training_iterator:\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 225, in __next__\n next_results = self._run_with_error_handling(self._fetch_next_result)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 188, in _run_with_error_handling\n return func()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 257, in _fetch_next_result\n results = self._backend_executor.get_next_results()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 390, in get_next_results\n results = self.get_with_failure_handling(futures)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 483, in get_with_failure_handling\n self._increment_failures()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 533, in _increment_failures\n raise exc.with_traceback(None) from self._last_failure\nRuntimeError: Training has failed after 1 attempts. You can change the number of max failure attempts by setting the `max_retries` arg in your `Trainer`.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mRayTaskError(RuntimeError)\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn [8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#call the above cell as a remote ray function\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m r \u001b[38;5;241m=\u001b[39m ray\u001b[38;5;241m.\u001b[39mget(train_fn\u001b[38;5;241m.\u001b[39mremote())\n",
+ "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/_private/client_mode_hook.py:104\u001b[0m, in \u001b[0;36mclient_mode_hook..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client_mode_should_convert(auto_init\u001b[38;5;241m=\u001b[39mauto_init):\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# Legacy code\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;66;03m# we only convert init function if RAY_CLIENT_MODE=1\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m is_client_mode_enabled_by_default:\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+ "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/api.py:42\u001b[0m, in \u001b[0;36m_ClientAPI.get\u001b[0;34m(self, vals, timeout)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, vals, \u001b[38;5;241m*\u001b[39m, timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124;03m\"\"\"get is the hook stub passed on to replace `ray.get`\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124;03m vals: [Client]ObjectRef or list of these refs to retrieve.\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m timeout: Optional timeout in milliseconds\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 42\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvals\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/worker.py:434\u001b[0m, in \u001b[0;36mWorker.get\u001b[0;34m(self, vals, timeout)\u001b[0m\n\u001b[1;32m 432\u001b[0m op_timeout \u001b[38;5;241m=\u001b[39m max_blocking_operation_time\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get\u001b[49m\u001b[43m(\u001b[49m\u001b[43mto_get\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_timeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 436\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GetTimeoutError:\n",
+ "File \u001b[0;32m/opt/app-root/lib64/python3.8/site-packages/ray/util/client/worker.py:462\u001b[0m, in \u001b[0;36mWorker._get\u001b[0;34m(self, ref, timeout)\u001b[0m\n\u001b[1;32m 460\u001b[0m logger\u001b[38;5;241m.\u001b[39mexception(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to deserialize \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(chunk\u001b[38;5;241m.\u001b[39merror))\n\u001b[1;32m 461\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m--> 462\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mtotal_size \u001b[38;5;241m>\u001b[39m OBJECT_TRANSFER_WARNING_SIZE \u001b[38;5;129;01mand\u001b[39;00m log_once(\n\u001b[1;32m 464\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclient_object_transfer_size_warning\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 465\u001b[0m ):\n\u001b[1;32m 466\u001b[0m size_gb \u001b[38;5;241m=\u001b[39m chunk\u001b[38;5;241m.\u001b[39mtotal_size \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m \u001b[38;5;241m30\u001b[39m\n",
+ "\u001b[0;31mRayTaskError(RuntimeError)\u001b[0m: \u001b[36mray::train_fn()\u001b[39m (pid=4614, ip=10.128.28.7)\n File \"/tmp/ipykernel_14249/2624701892.py\", line 150, in train_fn\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 360, in fit\n raise result.error\nray.exceptions.RayTaskError(RuntimeError): \u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\nray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.\n\tclass_name: RayTrainWorker\n\tactor_id: 537034a8f0299b4acc1e1f4e05000000\n\tpid: 378\n\tnamespace: 79e19797-9a9d-4359-9e7e-135e143c02c0\n\tip: 10.128.30.22\nThe actor is dead because its worker process has died. Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 2. End of file. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.\n\nThe above exception was the direct cause of the following exception:\n\n\u001b[36mray::_Inner.train()\u001b[39m (pid=345, ip=10.128.30.22, repr=HuggingFaceTrainer)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/trainable.py\", line 355, in train\n raise skipped from exception_cause(skipped)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 325, in entrypoint\n return self._trainable_func(\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 475, in _trainable_func\n super()._trainable_func(self._merged_config, reporter, checkpoint_dir)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py\", line 651, in _trainable_func\n output = fn()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/base_trainer.py\", line 390, in train_func\n trainer.training_loop()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 371, in training_loop\n self._report(training_iterator)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/data_parallel_trainer.py\", line 320, in _report\n for results in training_iterator:\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 225, in __next__\n next_results = self._run_with_error_handling(self._fetch_next_result)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 188, in _run_with_error_handling\n return func()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/trainer.py\", line 257, in _fetch_next_result\n results = self._backend_executor.get_next_results()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 390, in get_next_results\n results = self.get_with_failure_handling(futures)\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 483, in get_with_failure_handling\n self._increment_failures()\n File \"/home/ray/anaconda3/lib/python3.8/site-packages/ray/train/_internal/backend_executor.py\", line 533, in _increment_failures\n raise exc.with_traceback(None) from self._last_failure\nRuntimeError: Training has failed after 1 attempts. You can change the number of max failure attempts by setting the `max_retries` arg in your `Trainer`."
+ ]
+ }
+ ],
+ "source": [
+ "#call the above cell as a remote ray function\n",
+ "r = ray.get(train_fn.remote())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25819219-0317-43e5-bc31-d1fddd1fe897",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from ray.train.huggingface.transformers.transformers_checkpoint import TransformersCheckpoint\n",
+ "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
+ "from peft import PeftModel, PeftConfig\n",
+ "\n",
+ "model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')\n",
+ "tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')\n",
+ "\n",
+ "checkpoint = TransformersCheckpoint.from_checkpoint(r.checkpoint)\n",
+ "\n",
+ "# Save model in a directory\n",
+ "model_output_dir = '../../models/raytune'\n",
+ "checkpoint.to_directory(model_output_dir)\n",
+ "\n",
+ "# Load the Lora model\n",
+ "model = PeftModel.from_pretrained(model, model_output_dir)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5af8cd32",
+ "metadata": {},
+ "source": [
+ "Once complete, we can bring our Ray cluster down and clean up:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f995319e-17a1-4e1c-80bb-5cd1014e719a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To do next:\n",
+ "# - train on ROSA data and add inference code\n",
+ "# - train a higher param model\n",
+ "# - Add bitsandbytes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cluster.down()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0d41b90e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "auth.logout()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.13"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}