diff --git a/images/GCPGenStudio2.png b/images/GCPGenStudio2.png index bbd1c8b..09fe8ba 100644 Binary files a/images/GCPGenStudio2.png and b/images/GCPGenStudio2.png differ diff --git a/images/GCPGenStudio3.png b/images/GCPGenStudio3.png index 927d1b6..ed081f7 100644 Binary files a/images/GCPGenStudio3.png and b/images/GCPGenStudio3.png differ diff --git a/images/GCPGenStudio4.png b/images/GCPGenStudio4.png index 1a5cadc..29f7f89 100644 Binary files a/images/GCPGenStudio4.png and b/images/GCPGenStudio4.png differ diff --git a/notebooks/GenAI/GCP_GenAI_Huggingface.ipynb b/notebooks/GenAI/GCP_GenAI_Huggingface.ipynb index 09b3d8d..467c44b 100644 --- a/notebooks/GenAI/GCP_GenAI_Huggingface.ipynb +++ b/notebooks/GenAI/GCP_GenAI_Huggingface.ipynb @@ -38,219 +38,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "a6e5884b-ac90-42d4-aafd-d34d5495d24d", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting transformers\n", - " Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/c1/bd/f64d67df4d3b05a460f281defe830ffab6d7940b7ca98ec085e94e024781/transformers-4.34.1-py3-none-any.whl.metadata\n", - " Downloading transformers-4.34.1-py3-none-any.whl.metadata (121 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.5/121.5 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting datasets\n", - " Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/7c/55/b3432f43d6d7fee999bb23a547820d74c48ec540f5f7842e41aa5d8d5f3a/datasets-2.14.6-py3-none-any.whl.metadata\n", - " Downloading datasets-2.14.6-py3-none-any.whl.metadata (19 kB)\n", - "Collecting rouge_score\n", - " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting evaluate\n", - " Obtaining dependency information for evaluate from https://files.pythonhosted.org/packages/70/63/7644a1eb7b0297e585a6adec98ed9e575309bb973c33b394dae66bc35c69/evaluate-0.4.1-py3-none-any.whl.metadata\n", - " Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n", - "Collecting keras_nlp\n", - " Obtaining dependency information for keras_nlp from https://files.pythonhosted.org/packages/37/d4/dfd85606db811af2138e97fc480eb7ed709042dd96dd453868bede0929fe/keras_nlp-0.6.2-py3-none-any.whl.metadata\n", - " Downloading keras_nlp-0.6.2-py3-none-any.whl.metadata (7.2 kB)\n", - "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers) (3.12.4)\n", - "Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)\n", - " Obtaining dependency information for huggingface-hub<1.0,>=0.16.4 from https://files.pythonhosted.org/packages/ef/b5/b6107bd65fa4c96fdf00e4733e2fe5729bb9e5e09997f63074bb43d3ab28/huggingface_hub-0.18.0-py3-none-any.whl.metadata\n", - " Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (1.23.5)\n", - "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers) (23.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (6.0.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (2023.8.8)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers) (2.31.0)\n", - "Collecting tokenizers<0.15,>=0.14 (from transformers)\n", - " Obtaining dependency information for tokenizers<0.15,>=0.14 from https://files.pythonhosted.org/packages/a7/7b/c1f643eb086b6c5c33eef0c3752e37624bd23e4cbc9f1332748f1c6252d1/tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", - "Collecting safetensors>=0.3.1 (from transformers)\n", - " Obtaining dependency information for safetensors>=0.3.1 from https://files.pythonhosted.org/packages/20/4e/878b080dbda92666233ec6f316a53969edcb58eab1aa399a64d0521cf953/safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", - "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers) (4.66.1)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (9.0.0)\n", - "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.1.1)\n", - "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.3)\n", - "Collecting xxhash (from datasets)\n", - " Obtaining dependency information for xxhash from https://files.pythonhosted.org/packages/80/8a/1dd41557883b6196f8f092011a5c1f72d4d44cf36d7b67d4a5efe3127949/xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess (from datasets)\n", - " Obtaining dependency information for multiprocess from https://files.pythonhosted.org/packages/35/a8/36d8d7b3e46b377800d8dec47891cdf05842d1a2366909ae4a0c89fbc5e6/multiprocess-0.70.15-py310-none-any.whl.metadata\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)\n", - "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.9.2)\n", - "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)\n", - "Requirement already satisfied: absl-py in /opt/conda/lib/python3.10/site-packages (from rouge_score) (1.4.0)\n", - "Collecting nltk (from rouge_score)\n", - " Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m80.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: six>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from rouge_score) (1.16.0)\n", - "Collecting responses<0.19 (from evaluate)\n", - " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", - "Requirement already satisfied: keras-core in /opt/conda/lib/python3.10/site-packages (from keras_nlp) (0.1.7)\n", - "Requirement already satisfied: rich in /opt/conda/lib/python3.10/site-packages (from keras_nlp) (13.5.3)\n", - "Requirement already satisfied: dm-tree in /opt/conda/lib/python3.10/site-packages (from keras_nlp) (0.1.8)\n", - "Collecting tensorflow-text (from keras_nlp)\n", - " Obtaining dependency information for tensorflow-text from https://files.pythonhosted.org/packages/0b/5f/8b301d2d0cea8334c22aaeb8880ce115ec34d7eba20f7b08c64202011a85/tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (2023.7.22)\n", - "Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)\n", - " Obtaining dependency information for huggingface-hub<1.0,>=0.16.4 from https://files.pythonhosted.org/packages/aa/f3/3fc97336a0e90516901befd4f500f08d691034d387406fdbde85bea827cc/huggingface_hub-0.17.3-py3-none-any.whl.metadata\n", - " Downloading huggingface_hub-0.17.3-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: namex in /opt/conda/lib/python3.10/site-packages (from keras-core->keras_nlp) (0.0.7)\n", - "Requirement already satisfied: h5py in /opt/conda/lib/python3.10/site-packages (from keras-core->keras_nlp) (3.9.0)\n", - "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", - " Obtaining dependency information for dill<0.3.8,>=0.3.0 from https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl.metadata\n", - " Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n", - "Requirement already satisfied: click in /opt/conda/lib/python3.10/site-packages (from nltk->rouge_score) (8.1.7)\n", - "Requirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from nltk->rouge_score) (1.3.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras_nlp) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras_nlp) (2.16.1)\n", - "Requirement already satisfied: tensorflow-hub>=0.13.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow-text->keras_nlp) (0.14.0)\n", - "Collecting tensorflow<2.15,>=2.14.0 (from tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for tensorflow<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/e2/7a/c7762c698fb1ac41a7e3afee51dc72aa3ec74ae8d2f57ce19a9cded3a4af/tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras_nlp) (0.1.2)\n", - "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=23.5.26 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (23.5.26)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.4.0)\n", - "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (16.0.6)\n", - "Collecting ml-dtypes==0.2.0 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for ml-dtypes==0.2.0 from https://files.pythonhosted.org/packages/d1/1d/d5cf76e5e40f69dbd273036e3172ae4a614577cb141673427b80cac948df/ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (3.3.0)\n", - "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (3.20.3)\n", - "Requirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (68.2.2)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (2.3.0)\n", - "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (1.14.1)\n", - "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.31.0)\n", - "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (1.48.0)\n", - "Collecting tensorboard<2.15,>=2.14 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for tensorboard<2.15,>=2.14 from https://files.pythonhosted.org/packages/73/a2/66ed644f6ed1562e0285fcd959af17670ea313c8f331c46f79ee77187eb9/tensorboard-2.14.1-py3-none-any.whl.metadata\n", - " Downloading tensorboard-2.14.1-py3-none-any.whl.metadata (1.7 kB)\n", - "Collecting tensorflow-estimator<2.15,>=2.14.0 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for tensorflow-estimator<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/d1/da/4f264c196325bb6e37a6285caec5b12a03def489b57cc1fdac02bb6272cd/tensorflow_estimator-2.14.0-py2.py3-none-any.whl.metadata\n", - " Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl.metadata (1.3 kB)\n", - "Collecting keras<2.15,>=2.14.0 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for keras<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/fe/58/34d4d8f1aa11120c2d36d7ad27d0526164b1a8ae45990a2fede31d0e59bf/keras-2.14.0-py3-none-any.whl.metadata\n", - " Downloading keras-2.14.0-py3-none-any.whl.metadata (2.4 kB)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.41.2)\n", - "Collecting grpcio<2.0,>=1.24.3 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp)\n", - " Obtaining dependency information for grpcio<2.0,>=1.24.3 from https://files.pythonhosted.org/packages/29/cc/e6883efbbcaa6570a0d2207ba53c796137f11293e47d11e2696f37b66811/grpcio-1.59.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading grpcio-1.59.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (2.23.0)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (3.4.4)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.7.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (2.1.2)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (4.2.4)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (4.9)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (1.3.1)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp) (3.2.2)\n", - "Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m98.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading datasets-2.14.6-py3-none-any.whl (493 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.7/493.7 kB\u001b[0m \u001b[31m57.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading keras_nlp-0.6.2-py3-none-any.whl (590 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m590.1/590.1 kB\u001b[0m \u001b[31m58.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m84.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m108.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m46.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.5/6.5 MB\u001b[0m \u001b[31m108.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m489.8/489.8 MB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m69.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading keras-2.14.0-py3-none-any.whl (1.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m89.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m113.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading grpcio-1.59.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m115.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl (440 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: rouge_score\n", - " Building wheel for rouge_score (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=57aa40a32d8d9171d43b9bc47cc3472fac0fb1192aa80eba9defb8e4ffd2352a\n", - " Stored in directory: /home/jupyter/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", - "Successfully built rouge_score\n", - "Installing collected packages: xxhash, tensorflow-estimator, safetensors, nltk, ml-dtypes, keras, grpcio, dill, rouge_score, responses, multiprocess, huggingface-hub, tokenizers, transformers, tensorboard, datasets, tensorflow, evaluate, tensorflow-text, keras_nlp\n", - " Attempting uninstall: tensorflow-estimator\n", - " Found existing installation: tensorflow-estimator 2.12.0\n", - " Uninstalling tensorflow-estimator-2.12.0:\n", - " Successfully uninstalled tensorflow-estimator-2.12.0\n", - " Attempting uninstall: ml-dtypes\n", - " Found existing installation: ml-dtypes 0.3.1\n", - " Uninstalling ml-dtypes-0.3.1:\n", - " Successfully uninstalled ml-dtypes-0.3.1\n", - " Attempting uninstall: keras\n", - " Found existing installation: keras 2.12.0\n", - " Uninstalling keras-2.12.0:\n", - " Successfully uninstalled keras-2.12.0\n", - " Attempting uninstall: grpcio\n", - " Found existing installation: grpcio 1.48.0\n", - " Uninstalling grpcio-1.48.0:\n", - " Successfully uninstalled grpcio-1.48.0\n", - " Attempting uninstall: dill\n", - " Found existing installation: dill 0.3.1.1\n", - " Uninstalling dill-0.3.1.1:\n", - " Successfully uninstalled dill-0.3.1.1\n", - " Attempting uninstall: tensorboard\n", - " Found existing installation: tensorboard 2.12.3\n", - " Uninstalling tensorboard-2.12.3:\n", - " Successfully uninstalled tensorboard-2.12.3\n", - " Attempting uninstall: tensorflow\n", - " Found existing installation: tensorflow 2.12.0\n", - " Uninstalling tensorflow-2.12.0:\n", - " Successfully uninstalled tensorflow-2.12.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.7 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed datasets-2.14.6 dill-0.3.7 evaluate-0.4.1 grpcio-1.58.0 huggingface-hub-0.17.3 keras-2.14.0 keras_nlp-0.6.2 ml-dtypes-0.2.0 multiprocess-0.70.15 nltk-3.8.1 responses-0.18.0 rouge_score-0.1.2 safetensors-0.4.0 tensorboard-2.14.1 tensorflow-2.14.0 tensorflow-estimator-2.14.0 tensorflow-text-2.14.0 tokenizers-0.14.1 transformers-4.34.1 xxhash-3.4.1\n" - ] - } - ], - "source": [ - "!pip install \"transformers\" \"datasets\" \"rouge_score\" \"evaluate\" \"keras_nlp\"" + "outputs": [], + "source": [ + "!pip install \"transformers\" \"datasets\" \"rouge_score\" \"evaluate\" \"keras_nlp\" \"tf_keras\"" ] }, { @@ -271,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 211, + "execution_count": null, "id": "8f59e17e-c006-45ee-be0b-766774f9d420", "metadata": { "scrolled": true, @@ -295,21 +91,12 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": null, "id": "760c9128-793a-4bed-a127-b92ef496e33b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset({\n", - " features: ['article', 'abstract'],\n", - " num_rows: 5996\n", - "})\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "print(train)" ] @@ -343,9 +130,11 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": null, "id": "bfd433b3-9790-4a10-ac08-6c90c194d8b0", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#model name\n", @@ -354,41 +143,13 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": null, "id": "1988cbcb-4bec-4aa2-a356-a211584ceacb", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-11-03 15:13:42.327557: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2023-11-03 15:13:42.327603: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2023-11-03 15:13:42.327636: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2023-11-03 15:13:42.336037: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-11-03 15:13:44.543851: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:44.554372: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:44.557202: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:44.560698: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:44.563540: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:44.566113: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:45.308267: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:45.310177: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:45.311838: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", - "2023-11-03 15:13:45.313437: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13589 MB memory: -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\n", - "/opt/conda/lib/python3.10/site-packages/keras/src/initializers/initializers.py:120: UserWarning: The initializer RandomNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once.\n", - " warnings.warn(\n", - "All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.\n", - "\n", - "All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.\n" - ] - } - ], + "outputs": [], "source": [ "from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer\n", "\n", @@ -413,9 +174,11 @@ }, { "cell_type": "code", - "execution_count": 212, + "execution_count": null, "id": "f101c309-f214-4b3f-b77b-d55491e48a59", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "prefix = \"summarize: \"\n", @@ -443,16 +206,18 @@ }, { "cell_type": "code", - "execution_count": 210, + "execution_count": null, "id": "5e58eb58-a655-4e2b-8665-b4b770bc87a7", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "tokenized_train = train.map(preprocess_function, batched=True)\n", "\n", - "#tokenized_test = test.map(preprocess_function, batched=True)\n", + "tokenized_test = test.map(preprocess_function, batched=True)\n", "\n", - "#tokenized_validation = validation.map(preprocess_function, batched=True)" + "tokenized_validation = validation.map(preprocess_function, batched=True)" ] }, { @@ -469,21 +234,12 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "80a25bc8-00db-4b8d-9b68-d52c5d6ca7fe", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset({\n", - " features: ['article', 'abstract', 'input_ids', 'attention_mask', 'labels'],\n", - " num_rows: 5996\n", - "})\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "print(tokenized_train)" ] @@ -498,9 +254,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "875ef33d-5ef3-4b07-b1de-6d471743a8ad", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from transformers import DataCollatorForSeq2Seq\n", @@ -517,18 +275,12 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "6fcac5a8-912f-461f-bfab-990e472c01ca", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "tf_train_set = model.prepare_tf_dataset(\n", " tokenized_train,\n", @@ -557,18 +309,12 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "11aaf028-c713-4064-84cc-f699df3151ec", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(10, None), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(10, None), dtype=tf.int64, name=None)}, TensorSpec(shape=(10, None), dtype=tf.int64, name=None))>\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "print (tf_train_set)" ] @@ -583,9 +329,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "50ed6068-763a-46a6-8aed-4862f84413a9", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from transformers import AdamWeightDecay\n", @@ -611,18 +359,12 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "f9c6b45b-7349-4965-938b-3a334ced3882", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using TensorFlow backend\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "import keras_nlp\n", "\n", @@ -652,9 +394,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "06e014b1-e9d2-4d9f-a149-c6c0381f7407", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from transformers.keras_callbacks import KerasMetricCallback\n", @@ -672,61 +416,12 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "b8fd0c64-4d85-4b5e-86fe-538c7dc65da7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/3\n", - "599/599 [==============================] - ETA: 0s - loss: 2.5073" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py:371: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. recommend setting `max_new_tokens` to control the maximum length of the generation.\n", - " return py_builtins.overload_of(f)(*args)\n", - "2023-11-02 13:09:59.053088: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55faf0d80f50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", - "2023-11-02 13:09:59.053132: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla T4, Compute Capability 7.5\n", - "2023-11-02 13:10:00.019242: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less/Assert/Assert\n", - "2023-11-02 13:10:00.163195: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", - "2023-11-02 13:10:00.714302: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less_1/Assert/Assert\n", - "2023-11-02 13:10:01.396732: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less/Assert/Assert\n", - "2023-11-02 13:10:02.853947: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8900\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n", - "2023-11-02 13:10:12.362168: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n", - "2023-11-02 13:10:25.996665: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less/Assert/Assert\n", - "2023-11-02 13:10:26.174121: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less_1/Assert/Assert\n", - "2023-11-02 13:10:26.666553: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:38] Ignoring Assert operator shared/assert_less/Assert/Assert\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n", - "warning: Linking two modules of different target triples: 'LLVMDialectModule' is 'nvptx64-nvidia-gpulibs' whereas '' is 'nvptx64-nvidia-cuda'\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "599/599 [==============================] - 731s 1s/step - loss: 2.5073 - val_loss: 2.0886 - RougeL: 0.1196\n", - "Epoch 2/3\n", - "599/599 [==============================] - 662s 1s/step - loss: 2.3710 - val_loss: 2.0231 - RougeL: 0.1191\n", - "Epoch 3/3\n", - "599/599 [==============================] - 662s 1s/step - loss: 2.3102 - val_loss: 1.9996 - RougeL: 0.1172\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=metric_callback)\n", "\n", @@ -751,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": null, "id": "980d0053-0c3b-4d0d-91b9-9dd6e6dd3e64", "metadata": {}, "outputs": [], @@ -791,31 +486,10 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": null, "id": "fc2206c7-1bbf-41eb-8c63-abb17752d00d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.\n", - "\n", - "All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at saved_model.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.\n" - ] - }, - { - "data": { - "text/plain": [ - "'We describe the basic virology of Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) and its role in preventing the pandemic of acute respiratory disease, named ‘coronavirus disease 2019’ (COVID-19), which threatens human health and public safety.'" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "\n", @@ -855,31 +529,13 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": null, "id": "d69aa008-80c0-4a19-aa0f-8f5798673c47", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-11-02 20:07:00-- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7784226/pdf/12248_2020_Article_532.pdf\n", - "Resolving www.ncbi.nlm.nih.gov (www.ncbi.nlm.nih.gov)... 130.14.29.110, 2607:f220:41e:4290::110\n", - "Connecting to www.ncbi.nlm.nih.gov (www.ncbi.nlm.nih.gov)|130.14.29.110|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 5757370 (5.5M) [application/pdf]\n", - "Saving to: ‘12248_2020_Article_532.pdf’\n", - "\n", - "12248_2020_Article_ 100%[===================>] 5.49M 7.25MB/s in 0.8s \n", - "\n", - "2023-11-02 20:07:01 (7.25 MB/s) - ‘12248_2020_Article_532.pdf’ saved [5757370/5757370]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "! wget --user-agent=\"Chrome\" https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7784226/pdf/12248_2020_Article_532.pdf" ] @@ -894,78 +550,13 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "1347b3cd-5ce0-44c9-864d-a688bcacb1d0", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: fitz in /opt/conda/lib/python3.10/site-packages (0.0.1.dev2)\n", - "Collecting PyMuPDF\n", - " Obtaining dependency information for PyMuPDF from https://files.pythonhosted.org/packages/41/4a/530017aaf0a554aa6d9abd547932a02c0188962d12122fe611bf7a6d0c26/PyMuPDF-1.23.5-cp310-none-manylinux2014_x86_64.whl.metadata\n", - " Downloading PyMuPDF-1.23.5-cp310-none-manylinux2014_x86_64.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: configobj in /opt/conda/lib/python3.10/site-packages (from fitz) (5.0.8)\n", - "Requirement already satisfied: configparser in /opt/conda/lib/python3.10/site-packages (from fitz) (6.0.0)\n", - "Requirement already satisfied: httplib2 in /opt/conda/lib/python3.10/site-packages (from fitz) (0.21.0)\n", - "Requirement already satisfied: nibabel in /opt/conda/lib/python3.10/site-packages (from fitz) (5.1.0)\n", - "Requirement already satisfied: nipype in /opt/conda/lib/python3.10/site-packages (from fitz) (1.8.6)\n", - "Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from fitz) (1.23.5)\n", - "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from fitz) (2.0.3)\n", - "Requirement already satisfied: pyxnat in /opt/conda/lib/python3.10/site-packages (from fitz) (1.6)\n", - "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from fitz) (1.11.2)\n", - "Collecting PyMuPDFb==1.23.5 (from PyMuPDF)\n", - " Obtaining dependency information for PyMuPDFb==1.23.5 from https://files.pythonhosted.org/packages/cf/14/de59687368ad2c047b038b5b9b04e40bd5d486d5b36c6aef42c18c35ea2c/PyMuPDFb-1.23.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata\n", - " Downloading PyMuPDFb-1.23.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n", - "Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from configobj->fitz) (1.16.0)\n", - "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /opt/conda/lib/python3.10/site-packages (from httplib2->fitz) (3.1.1)\n", - "Requirement already satisfied: packaging>=17 in /opt/conda/lib/python3.10/site-packages (from nibabel->fitz) (23.1)\n", - "Requirement already satisfied: click>=6.6.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (8.1.7)\n", - "Requirement already satisfied: networkx>=2.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (3.1)\n", - "Requirement already satisfied: prov>=1.5.2 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (2.0.0)\n", - "Requirement already satisfied: pydot>=1.2.3 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (1.4.2)\n", - "Requirement already satisfied: python-dateutil>=2.2 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (2.8.2)\n", - "Requirement already satisfied: rdflib>=5.0.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (7.0.0)\n", - "Requirement already satisfied: simplejson>=3.8.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (3.19.2)\n", - "Requirement already satisfied: traits!=5.0,<6.4,>=4.6 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (6.3.2)\n", - "Requirement already satisfied: filelock>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (3.12.4)\n", - "Requirement already satisfied: etelemetry>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (0.3.1)\n", - "Requirement already satisfied: looseversion in /opt/conda/lib/python3.10/site-packages (from nipype->fitz) (1.3.0)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fitz) (2023.3.post1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->fitz) (2023.3)\n", - "Requirement already satisfied: future>=0.16 in /opt/conda/lib/python3.10/site-packages (from pyxnat->fitz) (0.18.3)\n", - "Requirement already satisfied: lxml>=4.3 in /opt/conda/lib/python3.10/site-packages (from pyxnat->fitz) (4.9.3)\n", - "Requirement already satisfied: pathlib>=1.0 in /opt/conda/lib/python3.10/site-packages (from pyxnat->fitz) (1.0.1)\n", - "Requirement already satisfied: requests>=2.20 in /opt/conda/lib/python3.10/site-packages (from pyxnat->fitz) (2.31.0)\n", - "Requirement already satisfied: ci-info>=0.2 in /opt/conda/lib/python3.10/site-packages (from etelemetry>=0.2.0->nipype->fitz) (0.3.0)\n", - "Requirement already satisfied: isodate<0.7.0,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from rdflib>=5.0.0->nipype->fitz) (0.6.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.20->pyxnat->fitz) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.20->pyxnat->fitz) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.20->pyxnat->fitz) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.20->pyxnat->fitz) (2023.7.22)\n", - "Downloading PyMuPDF-1.23.5-cp310-none-manylinux2014_x86_64.whl (4.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.3/4.3 MB\u001b[0m \u001b[31m46.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading PyMuPDFb-1.23.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (30.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.6/30.6 MB\u001b[0m \u001b[31m42.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hInstalling collected packages: PyMuPDFb, PyMuPDF\n", - "Successfully installed PyMuPDF-1.23.5 PyMuPDFb-1.23.5\n" - ] - } - ], + "outputs": [], "source": [ "!pip install \"fitz\" \"PyMuPDF\"" ] @@ -980,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": null, "id": "d77b6ffe-90e1-4a01-aa52-9cf93a9c5c85", "metadata": {}, "outputs": [], @@ -1006,37 +597,13 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": null, "id": "9a1f5dbd-6a9f-4533-a12e-8a6c4073df74", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.\n", - "\n", - "All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at saved_model.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.\n" - ] - }, - { - "ename": "TypeError", - "evalue": "Cannot convert 'Summary:' to EagerTensor of dtype int32", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[97], line 12\u001b[0m\n\u001b[1;32m 8\u001b[0m model \u001b[38;5;241m=\u001b[39m TFAutoModelForSeq2SeqLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msaved_model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 10\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mgenerate(inputs, max_new_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, do_sample\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 12\u001b[0m tokenizer\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSummary:\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43moutputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m, skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m--> 153\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/tensorflow/python/framework/constant_op.py:102\u001b[0m, in \u001b[0;36mconvert_to_eager_tensor\u001b[0;34m(value, ctx, dtype)\u001b[0m\n\u001b[1;32m 100\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtypes\u001b[38;5;241m.\u001b[39mas_dtype(dtype)\u001b[38;5;241m.\u001b[39mas_datatype_enum\n\u001b[1;32m 101\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[0;32m--> 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEagerTensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mTypeError\u001b[0m: Cannot convert 'Summary:' to EagerTensor of dtype int32" - ] - } - ], + "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "\n", @@ -1080,7 +647,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": null, "id": "d3f49896-b2c1-47e6-a7cc-aca7753bb6c4", "metadata": {}, "outputs": [], @@ -1093,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1b91fe7c-5970-45c1-9401-1db3206a8ce9", "metadata": {}, "outputs": [], @@ -1106,32 +673,13 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": null, "id": "0066ad72-e451-41c0-b30a-c3a7dfa5f17c", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "ename": "Conflict", - "evalue": "409 POST https://storage.googleapis.com/storage/v1/b?project=cit-oconnellka-9999&prettyPrint=false: Your previous request to create the named bucket succeeded and you already own it.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mConflict\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[105], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Create bucket\u001b[39;00m\n\u001b[1;32m 2\u001b[0m bucket \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mbucket(BUCKET)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mbucket\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/cloud/storage/bucket.py:972\u001b[0m, in \u001b[0;36mBucket.create\u001b[0;34m(self, client, project, location, predefined_acl, predefined_default_object_acl, timeout, retry)\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Creates current bucket.\u001b[39;00m\n\u001b[1;32m 926\u001b[0m \n\u001b[1;32m 927\u001b[0m \u001b[38;5;124;03mIf the bucket already exists, will raise\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 968\u001b[0m \u001b[38;5;124;03m (Optional) How to retry the RPC. See: :ref:`configuring_retries`\u001b[39;00m\n\u001b[1;32m 969\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 971\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_require_client(client)\n\u001b[0;32m--> 972\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_bucket\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 973\u001b[0m \u001b[43m \u001b[49m\u001b[43mbucket_or_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 974\u001b[0m \u001b[43m \u001b[49m\u001b[43mproject\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproject\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 975\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_project\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_project\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 976\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 977\u001b[0m \u001b[43m \u001b[49m\u001b[43mpredefined_acl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpredefined_acl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 978\u001b[0m \u001b[43m \u001b[49m\u001b[43mpredefined_default_object_acl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpredefined_default_object_acl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 979\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 980\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 981\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/cloud/storage/client.py:954\u001b[0m, in \u001b[0;36mClient.create_bucket\u001b[0;34m(self, bucket_or_name, requester_pays, project, user_project, location, data_locations, predefined_acl, predefined_default_object_acl, timeout, retry)\u001b[0m\n\u001b[1;32m 951\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_locations \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 952\u001b[0m properties[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomPlacementConfig\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataLocations\u001b[39m\u001b[38;5;124m\"\u001b[39m: data_locations}\n\u001b[0;32m--> 954\u001b[0m api_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post_resource\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 955\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/b\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperties\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43m_target_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 963\u001b[0m bucket\u001b[38;5;241m.\u001b[39m_set_properties(api_response)\n\u001b[1;32m 964\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m bucket\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/cloud/storage/client.py:618\u001b[0m, in \u001b[0;36mClient._post_resource\u001b[0;34m(self, path, data, query_params, headers, timeout, retry, _target_object)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_post_resource\u001b[39m(\n\u001b[1;32m 558\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 559\u001b[0m path,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 565\u001b[0m _target_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 566\u001b[0m ):\n\u001b[1;32m 567\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Helper for bucket / blob methods making API 'POST' calls.\u001b[39;00m\n\u001b[1;32m 568\u001b[0m \n\u001b[1;32m 569\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 615\u001b[0m \u001b[38;5;124;03m If the bucket is not found.\u001b[39;00m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 618\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 619\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 621\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 623\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 624\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 625\u001b[0m \u001b[43m \u001b[49m\u001b[43mretry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 626\u001b[0m \u001b[43m \u001b[49m\u001b[43m_target_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_target_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 627\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/cloud/storage/_http.py:72\u001b[0m, in \u001b[0;36mConnection.api_request\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retry:\n\u001b[1;32m 71\u001b[0m call \u001b[38;5;241m=\u001b[39m retry(call)\n\u001b[0;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/api_core/retry.py:349\u001b[0m, in \u001b[0;36mRetry.__call__..retry_wrapped_func\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 345\u001b[0m target \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mpartial(func, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 346\u001b[0m sleep_generator \u001b[38;5;241m=\u001b[39m exponential_sleep_generator(\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_initial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maximum, multiplier\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multiplier\n\u001b[1;32m 348\u001b[0m )\n\u001b[0;32m--> 349\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mretry_target\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_predicate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[43msleep_generator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 353\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 354\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 355\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/api_core/retry.py:191\u001b[0m, in \u001b[0;36mretry_target\u001b[0;34m(target, predicate, sleep_generator, timeout, on_error, **kwargs)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m sleep \u001b[38;5;129;01min\u001b[39;00m sleep_generator:\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtarget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;66;03m# pylint: disable=broad-except\u001b[39;00m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;66;03m# This function explicitly must deal with broad exceptions.\u001b[39;00m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/google/cloud/_http/__init__.py:494\u001b[0m, in \u001b[0;36mJSONConnection.api_request\u001b[0;34m(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)\u001b[0m\n\u001b[1;32m 482\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_request(\n\u001b[1;32m 483\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod,\n\u001b[1;32m 484\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 490\u001b[0m extra_api_info\u001b[38;5;241m=\u001b[39mextra_api_info,\n\u001b[1;32m 491\u001b[0m )\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m200\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m300\u001b[39m:\n\u001b[0;32m--> 494\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mfrom_http_response(response)\n\u001b[1;32m 496\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m expect_json \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mcontent:\n\u001b[1;32m 497\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\u001b[38;5;241m.\u001b[39mjson()\n", - "\u001b[0;31mConflict\u001b[0m: 409 POST https://storage.googleapis.com/storage/v1/b?project=cit-oconnellka-9999&prettyPrint=false: Your previous request to create the named bucket succeeded and you already own it." - ] - } - ], + "outputs": [], "source": [ "#Create bucket\n", "bucket = client.bucket(BUCKET)\n", @@ -1148,25 +696,10 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "id": "1bfbbd92-4b2c-4e5c-95f8-d4e645a6ab24", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "094bf0b9c0bf44b0859f2b9c5f375e8c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Creating CSV from Arrow format: 0%| | 0/6 [00:00\n", - " import evaluate\n", - " File \"/opt/conda/lib/python3.10/site-packages/evaluate/__init__.py\", line 29, in \n", - " from .evaluation_suite import EvaluationSuite\n", - " File \"/opt/conda/lib/python3.10/site-packages/evaluate/evaluation_suite/__init__.py\", line 10, in \n", - " from ..evaluator import evaluator\n", - " File \"/opt/conda/lib/python3.10/site-packages/evaluate/evaluator/__init__.py\", line 17, in \n", - " from transformers.pipelines import SUPPORTED_TASKS as SUPPORTED_PIPELINE_TASKS\n", - " File \"/opt/conda/lib/python3.10/site-packages/transformers/pipelines/__init__.py\", line 72, in \n", - " from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline\n", - " File \"/opt/conda/lib/python3.10/site-packages/transformers/pipelines/table_question_answering.py\", line 26, in \n", - " import tensorflow_probability as tfp\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/__init__.py\", line 20, in \n", - " from tensorflow_probability import substrates\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/substrates/__init__.py\", line 17, in \n", - " from tensorflow_probability.python.internal import all_util\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/__init__.py\", line 138, in \n", - " dir(globals()[pkg_name]) # Forces loading the package from its lazy loader.\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/internal/lazy_loader.py\", line 57, in __dir__\n", - " module = self._load()\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/internal/lazy_loader.py\", line 40, in _load\n", - " module = importlib.import_module(self.__name__)\n", - " File \"/opt/conda/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n", - " return _bootstrap._gcd_import(name[level:], package, level)\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/experimental/__init__.py\", line 31, in \n", - " from tensorflow_probability.python.experimental import bayesopt\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/experimental/bayesopt/__init__.py\", line 17, in \n", - " from tensorflow_probability.python.experimental.bayesopt import acquisition\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/experimental/bayesopt/acquisition/__init__.py\", line 17, in \n", - " from tensorflow_probability.python.experimental.bayesopt.acquisition.acquisition_function import AcquisitionFunction\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/experimental/bayesopt/acquisition/acquisition_function.py\", line 22, in \n", - " from tensorflow_probability.python.internal import prefer_static as ps\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/internal/prefer_static.py\", line 361, in \n", - " ones_like = _copy_docstring(tf.ones_like, _ones_like)\n", - " File \"/opt/conda/lib/python3.10/site-packages/tensorflow_probability/python/internal/prefer_static.py\", line 84, in _copy_docstring\n", - " raise ValueError(\n", - "ValueError: Arg specs do not match: original=FullArgSpec(args=['input', 'dtype', 'name', 'layout'], varargs=None, varkw=None, defaults=(None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={}), new=FullArgSpec(args=['input', 'dtype', 'name'], varargs=None, varkw=None, defaults=(None, None), kwonlyargs=[], kwonlydefaults=None, annotations={}), fn=\n" - ] - } - ], + "outputs": [], "source": [ "#to view options and defaults you can run the command below\n", "!python autopkg-summarizer/trainer/task.py --help" @@ -1603,7 +1056,7 @@ }, { "cell_type": "code", - "execution_count": 258, + "execution_count": null, "id": "b21c8c79-1709-4052-8522-ae332cfec934", "metadata": {}, "outputs": [], @@ -1629,7 +1082,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "09392ddd-aa9d-4358-95a6-3e64fa1692ad", "metadata": {}, "outputs": [], @@ -1659,406 +1112,13 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": null, "id": "252d8e16-5b3d-409b-bc86-9da0ce996f72", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using endpoint [https://us-central1-aiplatform.googleapis.com/]\n", - "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/python3.9/subprocess.py:935: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n", - " self.stdin = io.open(p2cwrite, 'wb', bufsize)\n", - "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/python3.9/subprocess.py:941: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n", - " self.stdout = io.open(c2pread, 'rb', bufsize)\n", - "Sending build context to Docker daemon 18.99kB\n", - "Step 1/10 : FROM us-docker.pkg.dev/vertex-ai/training/tf-gpu.2-12.py310:latest\n", - " ---> bd2bbbab7d71\n", - "Step 2/10 : RUN mkdir -m 777 -p /usr/app /home\n", - " ---> Running in 358dbf3724e8\n", - "Removing intermediate container 358dbf3724e8\n", - " ---> edf7be7209d7\n", - "Step 3/10 : WORKDIR /usr/app\n", - " ---> Running in a23be90e59c5\n", - "Removing intermediate container a23be90e59c5\n", - " ---> c35f2baa964c\n", - "Step 4/10 : ENV HOME=/home\n", - " ---> Running in 0137537b093b\n", - "Removing intermediate container 0137537b093b\n", - " ---> 64af9b387e54\n", - "Step 5/10 : ENV PYTHONDONTWRITEBYTECODE=1\n", - " ---> Running in cc5806ee80a2\n", - "Removing intermediate container cc5806ee80a2\n", - " ---> dfe914f7ecbc\n", - "Step 6/10 : RUN rm -rf /var/sitecustomize\n", - " ---> Running in 3e7c5fa57fe2\n", - "Removing intermediate container 3e7c5fa57fe2\n", - " ---> fa997bc68c88\n", - "Step 7/10 : COPY [\"./requirements.txt\", \"./requirements.txt\"]\n", - " ---> 7c46da48c940\n", - "Step 8/10 : RUN pip3 install --no-cache-dir -r ./requirements.txt\n", - " ---> Running in 6502f72390d6\n", - "Collecting evaluate (from -r ./requirements.txt (line 1))\n", - " Obtaining dependency information for evaluate from https://files.pythonhosted.org/packages/70/63/7644a1eb7b0297e585a6adec98ed9e575309bb973c33b394dae66bc35c69/evaluate-0.4.1-py3-none-any.whl.metadata\n", - " Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)\n", - "Collecting nltk (from -r ./requirements.txt (line 2))\n", - " Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.5/1.5 MB 61.9 MB/s eta 0:00:00\n", - "Collecting transformers (from -r ./requirements.txt (line 3))\n", - " Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/9a/06/e4ec2a321e57c03b7e9345d709d554a52c33760e5015fdff0919d9459af0/transformers-4.35.0-py3-none-any.whl.metadata\n", - " Downloading transformers-4.35.0-py3-none-any.whl.metadata (123 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 123.1/123.1 kB 203.6 MB/s eta 0:00:00\n", - "Collecting keras_nlp (from -r ./requirements.txt (line 4))\n", - " Obtaining dependency information for keras_nlp from https://files.pythonhosted.org/packages/37/d4/dfd85606db811af2138e97fc480eb7ed709042dd96dd453868bede0929fe/keras_nlp-0.6.2-py3-none-any.whl.metadata\n", - " Downloading keras_nlp-0.6.2-py3-none-any.whl.metadata (7.2 kB)\n", - "Collecting datasets (from -r ./requirements.txt (line 5))\n", - " Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/7c/55/b3432f43d6d7fee999bb23a547820d74c48ec540f5f7842e41aa5d8d5f3a/datasets-2.14.6-py3-none-any.whl.metadata\n", - " Downloading datasets-2.14.6-py3-none-any.whl.metadata (19 kB)\n", - "Collecting rouge_score (from -r ./requirements.txt (line 6))\n", - " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", - " Preparing metadata (setup.py): started\n", - " Preparing metadata (setup.py): finished with status 'done'\n", - "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (1.23.5)\n", - "Collecting dill (from evaluate->-r ./requirements.txt (line 1))\n", - " Obtaining dependency information for dill from https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl.metadata\n", - " Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n", - "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (2.0.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (4.65.0)\n", - "Collecting xxhash (from evaluate->-r ./requirements.txt (line 1))\n", - " Obtaining dependency information for xxhash from https://files.pythonhosted.org/packages/80/8a/1dd41557883b6196f8f092011a5c1f72d4d44cf36d7b67d4a5efe3127949/xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", - "Collecting multiprocess (from evaluate->-r ./requirements.txt (line 1))\n", - " Obtaining dependency information for multiprocess from https://files.pythonhosted.org/packages/35/a8/36d8d7b3e46b377800d8dec47891cdf05842d1a2366909ae4a0c89fbc5e6/multiprocess-0.70.15-py310-none-any.whl.metadata\n", - " Downloading multiprocess-0.70.15-py310-none-any.whl.metadata (7.2 kB)\n", - "Requirement already satisfied: fsspec[http]>=2021.05.0 in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (2023.6.0)\n", - "Collecting huggingface-hub>=0.7.0 (from evaluate->-r ./requirements.txt (line 1))\n", - " Obtaining dependency information for huggingface-hub>=0.7.0 from https://files.pythonhosted.org/packages/ef/b5/b6107bd65fa4c96fdf00e4733e2fe5729bb9e5e09997f63074bb43d3ab28/huggingface_hub-0.18.0-py3-none-any.whl.metadata\n", - " Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from evaluate->-r ./requirements.txt (line 1)) (23.1)\n", - "Collecting responses<0.19 (from evaluate->-r ./requirements.txt (line 1))\n", - " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", - "Requirement already satisfied: click in /opt/conda/lib/python3.10/site-packages (from nltk->-r ./requirements.txt (line 2)) (8.1.6)\n", - "Requirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from nltk->-r ./requirements.txt (line 2)) (1.3.1)\n", - "Collecting regex>=2021.8.3 (from nltk->-r ./requirements.txt (line 2))\n", - " Obtaining dependency information for regex>=2021.8.3 from https://files.pythonhosted.org/packages/8f/3e/4b8b40eb3c80aeaf360f0361d956d129bb3d23b2a3ecbe3a04a8f3bdd6d3/regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.9/40.9 kB 178.9 MB/s eta 0:00:00\n", - "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers->-r ./requirements.txt (line 3)) (3.12.2)\n", - "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers->-r ./requirements.txt (line 3)) (6.0.1)\n", - "Collecting tokenizers<0.15,>=0.14 (from transformers->-r ./requirements.txt (line 3))\n", - " Obtaining dependency information for tokenizers<0.15,>=0.14 from https://files.pythonhosted.org/packages/a7/7b/c1f643eb086b6c5c33eef0c3752e37624bd23e4cbc9f1332748f1c6252d1/tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", - "Collecting safetensors>=0.3.1 (from transformers->-r ./requirements.txt (line 3))\n", - " Obtaining dependency information for safetensors>=0.3.1 from https://files.pythonhosted.org/packages/20/4e/878b080dbda92666233ec6f316a53969edcb58eab1aa399a64d0521cf953/safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", - "Collecting keras-core (from keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for keras-core from https://files.pythonhosted.org/packages/95/f7/b8dcff937ea64f822f0d3fe8c6010793406b82d14467cd0e9eecea458a40/keras_core-0.1.7-py3-none-any.whl.metadata\n", - " Downloading keras_core-0.1.7-py3-none-any.whl.metadata (4.3 kB)\n", - "Requirement already satisfied: absl-py in /opt/conda/lib/python3.10/site-packages (from keras_nlp->-r ./requirements.txt (line 4)) (1.4.0)\n", - "Requirement already satisfied: rich in /opt/conda/lib/python3.10/site-packages (from keras_nlp->-r ./requirements.txt (line 4)) (13.5.1)\n", - "Requirement already satisfied: dm-tree in /opt/conda/lib/python3.10/site-packages (from keras_nlp->-r ./requirements.txt (line 4)) (0.1.8)\n", - "Collecting tensorflow-text (from keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for tensorflow-text from https://files.pythonhosted.org/packages/0b/5f/8b301d2d0cea8334c22aaeb8880ce115ec34d7eba20f7b08c64202011a85/tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets->-r ./requirements.txt (line 5)) (12.0.1)\n", - "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets->-r ./requirements.txt (line 5)) (3.8.5)\n", - "Requirement already satisfied: six>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from rouge_score->-r ./requirements.txt (line 6)) (1.16.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (23.1.0)\n", - "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (3.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (4.0.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets->-r ./requirements.txt (line 5)) (1.3.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.7.0->evaluate->-r ./requirements.txt (line 1)) (4.7.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->evaluate->-r ./requirements.txt (line 1)) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->evaluate->-r ./requirements.txt (line 1)) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->evaluate->-r ./requirements.txt (line 1)) (2023.7.22)\n", - "Collecting huggingface-hub>=0.7.0 (from evaluate->-r ./requirements.txt (line 1))\n", - " Obtaining dependency information for huggingface-hub>=0.7.0 from https://files.pythonhosted.org/packages/aa/f3/3fc97336a0e90516901befd4f500f08d691034d387406fdbde85bea827cc/huggingface_hub-0.17.3-py3-none-any.whl.metadata\n", - " Downloading huggingface_hub-0.17.3-py3-none-any.whl.metadata (13 kB)\n", - "Collecting namex (from keras-core->keras_nlp->-r ./requirements.txt (line 4))\n", - " Downloading namex-0.0.7-py3-none-any.whl (5.8 kB)\n", - "Requirement already satisfied: h5py in /opt/conda/lib/python3.10/site-packages (from keras-core->keras_nlp->-r ./requirements.txt (line 4)) (3.9.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->evaluate->-r ./requirements.txt (line 1)) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->evaluate->-r ./requirements.txt (line 1)) (2023.3)\n", - "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->evaluate->-r ./requirements.txt (line 1)) (2023.3)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras_nlp->-r ./requirements.txt (line 4)) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras_nlp->-r ./requirements.txt (line 4)) (2.15.1)\n", - "Collecting tensorflow-hub>=0.13.0 (from tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for tensorflow-hub>=0.13.0 from https://files.pythonhosted.org/packages/6e/1a/fbae76f4057b9bcdf9468025d7a8ca952dec14bfafb9fc0b1e4244ce212f/tensorflow_hub-0.15.0-py2.py3-none-any.whl.metadata\n", - " Downloading tensorflow_hub-0.15.0-py2.py3-none-any.whl.metadata (1.3 kB)\n", - "Collecting tensorflow<2.15,>=2.14.0 (from tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for tensorflow<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/e2/7a/c7762c698fb1ac41a7e3afee51dc72aa3ec74ae8d2f57ce19a9cded3a4af/tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", - " Downloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras_nlp->-r ./requirements.txt (line 4)) (0.1.2)\n", - "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=23.5.26 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (23.5.26)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.4.0)\n", - "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (16.0.6)\n", - "Requirement already satisfied: ml-dtypes==0.2.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.2.0)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (3.3.0)\n", - "Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 from https://files.pythonhosted.org/packages/ae/32/45b1cf0c5d4a3ba881f5164c26af877c0dabfe6de0019d426aa0e5cf6806/protobuf-4.25.0-cp37-abi3-manylinux2014_x86_64.whl.metadata\n", - " Downloading protobuf-4.25.0-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", - "Requirement already satisfied: setuptools in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (68.0.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (2.3.0)\n", - "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (1.14.1)\n", - "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.32.0)\n", - "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.10/site-packages (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (1.56.2)\n", - "Collecting tensorboard<2.15,>=2.14 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for tensorboard<2.15,>=2.14 from https://files.pythonhosted.org/packages/73/a2/66ed644f6ed1562e0285fcd959af17670ea313c8f331c46f79ee77187eb9/tensorboard-2.14.1-py3-none-any.whl.metadata\n", - " Downloading tensorboard-2.14.1-py3-none-any.whl.metadata (1.7 kB)\n", - "Collecting tensorflow-estimator<2.15,>=2.14.0 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for tensorflow-estimator<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/d1/da/4f264c196325bb6e37a6285caec5b12a03def489b57cc1fdac02bb6272cd/tensorflow_estimator-2.14.0-py2.py3-none-any.whl.metadata\n", - " Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl.metadata (1.3 kB)\n", - "Collecting keras<2.15,>=2.14.0 (from tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4))\n", - " Obtaining dependency information for keras<2.15,>=2.14.0 from https://files.pythonhosted.org/packages/fe/58/34d4d8f1aa11120c2d36d7ad27d0526164b1a8ae45990a2fede31d0e59bf/keras-2.14.0-py3-none-any.whl.metadata\n", - " Downloading keras-2.14.0-py3-none-any.whl.metadata (2.4 kB)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.41.0)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (2.22.0)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (3.4.4)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.7.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (2.3.6)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (5.3.1)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (4.9)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (1.3.1)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/conda/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (2.1.3)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow<2.15,>=2.14.0->tensorflow-text->keras_nlp->-r ./requirements.txt (line 4)) (3.2.2)\n", - "Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.1/84.1 kB 198.5 MB/s eta 0:00:00\n", - "Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.9/7.9 MB 124.2 MB/s eta 0:00:00\n", - "Downloading keras_nlp-0.6.2-py3-none-any.whl (590 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 590.1/590.1 kB 225.3 MB/s eta 0:00:00\n", - "Downloading datasets-2.14.6-py3-none-any.whl (493 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 493.7/493.7 kB 214.8 MB/s eta 0:00:00\n", - "Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 115.3/115.3 kB 230.1 MB/s eta 0:00:00\n", - "Downloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (773 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 773.9/773.9 kB 233.9 MB/s eta 0:00:00\n", - "Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 228.3 MB/s eta 0:00:00\n", - "Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.8/3.8 MB 223.3 MB/s eta 0:00:00\n", - "Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 295.0/295.0 kB 240.5 MB/s eta 0:00:00\n", - "Downloading keras_core-0.1.7-py3-none-any.whl (950 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 950.8/950.8 kB 236.1 MB/s eta 0:00:00\n", - "Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.8/134.8 kB 216.1 MB/s eta 0:00:00\n", - "Downloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.5 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.5/6.5 MB 158.6 MB/s eta 0:00:00\n", - "Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.1/194.1 kB 223.4 MB/s eta 0:00:00\n", - "Downloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.8 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 489.8/489.8 MB 212.8 MB/s eta 0:00:00\n", - "Downloading tensorflow_hub-0.15.0-py2.py3-none-any.whl (85 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 85.4/85.4 kB 185.0 MB/s eta 0:00:00\n", - "Downloading keras-2.14.0-py3-none-any.whl (1.7 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 219.8 MB/s eta 0:00:00\n", - "Downloading protobuf-4.25.0-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 294.4/294.4 kB 217.2 MB/s eta 0:00:00\n", - "Downloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/5.5 MB 212.0 MB/s eta 0:00:00\n", - "Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl (440 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 440.7/440.7 kB 223.9 MB/s eta 0:00:00\n", - "Building wheels for collected packages: rouge_score\n", - " Building wheel for rouge_score (setup.py): started\n", - " Building wheel for rouge_score (setup.py): finished with status 'done'\n", - " Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=7fb2b5092b892710a8c128f5633d6f5f22dc260df119b78067900b8c74e972a4\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-sagd5q__/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", - "Successfully built rouge_score\n", - "Installing collected packages: namex, xxhash, tensorflow-estimator, safetensors, regex, protobuf, keras, dill, tensorflow-hub, responses, nltk, multiprocess, huggingface-hub, tokenizers, rouge_score, keras-core, transformers, tensorboard, datasets, tensorflow, evaluate, tensorflow-text, keras_nlp\n", - " Attempting uninstall: tensorflow-estimator\n", - " Found existing installation: tensorflow-estimator 2.12.0\n", - " Uninstalling tensorflow-estimator-2.12.0:\n", - " Successfully uninstalled tensorflow-estimator-2.12.0\n", - " Attempting uninstall: protobuf\n", - " Found existing installation: protobuf 3.20.1\n", - " Uninstalling protobuf-3.20.1:\n", - " Successfully uninstalled protobuf-3.20.1\n", - " Attempting uninstall: keras\n", - " Found existing installation: keras 2.12.0\n", - " Uninstalling keras-2.12.0:\n", - " Successfully uninstalled keras-2.12.0\n", - " Attempting uninstall: tensorboard\n", - " Found existing installation: tensorboard 2.12.3\n", - " Uninstalling tensorboard-2.12.3:\n", - " Successfully uninstalled tensorboard-2.12.3\n", - " Attempting uninstall: tensorflow\n", - " Found existing installation: tensorflow 2.12.0\n", - " Uninstalling tensorflow-2.12.0:\n", - " Successfully uninstalled tensorflow-2.12.0\n", - "Successfully installed datasets-2.14.6 dill-0.3.7 evaluate-0.4.1 huggingface-hub-0.17.3 keras-2.14.0 keras-core-0.1.7 keras_nlp-0.6.2 multiprocess-0.70.15 namex-0.0.7 nltk-3.8.1 protobuf-4.25.0 regex-2023.10.3 responses-0.18.0 rouge_score-0.1.2 safetensors-0.4.0 tensorboard-2.14.1 tensorflow-2.14.0 tensorflow-estimator-2.14.0 tensorflow-hub-0.15.0 tensorflow-text-2.14.0 tokenizers-0.14.1 transformers-4.35.0 xxhash-3.4.1\n", - "\u001b[91mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "google-cloud-datastore 1.15.5 requires protobuf<4.0.0dev, but you have protobuf 4.25.0 which is incompatible.\n", - "\u001b[0m\u001b[91mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", - "\u001b[0mRemoving intermediate container 6502f72390d6\n", - " ---> 97a4b7990a59\n", - "Step 9/10 : COPY [\"trainer\", \"trainer\"]\n", - " ---> dce93f89c146\n", - "Step 10/10 : ENTRYPOINT [\"python3\", \"-m\", \"trainer.task\"]\n", - " ---> Running in beccb40ff5ce\n", - "Removing intermediate container beccb40ff5ce\n", - " ---> 6be133543c75\n", - "Successfully built 6be133543c75\n", - "Successfully tagged gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231103.17.39.12.779660\n", - "\n", - "A custom container image is built locally.\n", - "\n", - "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/python3.9/subprocess.py:935: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n", - " self.stdin = io.open(p2cwrite, 'wb', bufsize)\n", - "/usr/lib/google-cloud-sdk/platform/bundledpythonunix/lib/python3.9/subprocess.py:941: RuntimeWarning: line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used\n", - " self.stdout = io.open(c2pread, 'rb', bufsize)\n", - "The push refers to repository [gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3]\n", - "de565aa0e952: Preparing\n", - "8027f564cadd: Preparing\n", - "5bd43a783137: Preparing\n", - "c2cec13eda62: Preparing\n", - "73c814d198fd: Preparing\n", - "e42695c7b436: Preparing\n", - "e42695c7b436: Preparing\n", - "7e34967c8575: Preparing\n", - "19c1ff49a1a3: Preparing\n", - "724eb7d1e386: Preparing\n", - "e7df186da59e: Preparing\n", - "e7df186da59e: Preparing\n", - "d9e5455afa58: Preparing\n", - "a4f1c7b5b5c5: Preparing\n", - "1eeca563762d: Preparing\n", - "b3f8d9df367e: Preparing\n", - "29e2658ae6ea: Preparing\n", - "228616cf4f10: Preparing\n", - "ae32b7336b96: Preparing\n", - "ae32b7336b96: Preparing\n", - "ea7b0ccc272e: Preparing\n", - "01d4173a3960: Preparing\n", - "c235d251a607: Preparing\n", - "f2833e4d69b4: Preparing\n", - "49fc5a524f1f: Preparing\n", - "e175e85d3600: Preparing\n", - "55bfb3527de7: Preparing\n", - "ee67859f37c6: Preparing\n", - "ed7e041f0699: Preparing\n", - "0235cf47cbae: Preparing\n", - "724eb7d1e386: Waiting\n", - "2971cdbb4b45: Preparing\n", - "8374b2bc65e7: Preparing\n", - "3b93a6feba89: Preparing\n", - "b15400eb0fa7: Preparing\n", - "29ecaf0c2ae0: Preparing\n", - "a4f1c7b5b5c5: Waiting\n", - "41e673079fce: Preparing\n", - "e7df186da59e: Waiting\n", - "1eeca563762d: Waiting\n", - "cda9215846ee: Preparing\n", - "d9e5455afa58: Waiting\n", - "c5eafb4bee8f: Preparing\n", - "b3f8d9df367e: Waiting\n", - "29e2658ae6ea: Waiting\n", - "81182eb0608d: Preparing\n", - "f2baf76d88ee: Preparing\n", - "228616cf4f10: Waiting\n", - "01d4173a3960: Waiting\n", - "cdd7c7392317: Preparing\n", - "ae32b7336b96: Waiting\n", - "c235d251a607: Waiting\n", - "ea7b0ccc272e: Waiting\n", - "e175e85d3600: Waiting\n", - "f2833e4d69b4: Waiting\n", - "b15400eb0fa7: Waiting\n", - "29ecaf0c2ae0: Waiting\n", - "2971cdbb4b45: Waiting\n", - "49fc5a524f1f: Waiting\n", - "41e673079fce: Waiting\n", - "55bfb3527de7: Waiting\n", - "ee67859f37c6: Waiting\n", - "cda9215846ee: Waiting\n", - "3b93a6feba89: Waiting\n", - "8374b2bc65e7: Waiting\n", - "ed7e041f0699: Waiting\n", - "c5eafb4bee8f: Waiting\n", - "0235cf47cbae: Waiting\n", - "81182eb0608d: Waiting\n", - "cdd7c7392317: Waiting\n", - "f2baf76d88ee: Waiting\n", - "e42695c7b436: Waiting\n", - "7e34967c8575: Waiting\n", - "19c1ff49a1a3: Waiting\n", - "73c814d198fd: Pushed\n", - "5bd43a783137: Pushed\n", - "c2cec13eda62: Pushed\n", - "de565aa0e952: Pushed\n", - "e42695c7b436: Layer already exists\n", - "7e34967c8575: Layer already exists\n", - "19c1ff49a1a3: Layer already exists\n", - "e7df186da59e: Layer already exists\n", - "724eb7d1e386: Layer already exists\n", - "d9e5455afa58: Layer already exists\n", - "a4f1c7b5b5c5: Layer already exists\n", - "1eeca563762d: Layer already exists\n", - "b3f8d9df367e: Layer already exists\n", - "228616cf4f10: Layer already exists\n", - "29e2658ae6ea: Layer already exists\n", - "ae32b7336b96: Layer already exists\n", - "ea7b0ccc272e: Layer already exists\n", - "01d4173a3960: Layer already exists\n", - "c235d251a607: Layer already exists\n", - "f2833e4d69b4: Layer already exists\n", - "49fc5a524f1f: Layer already exists\n", - "e175e85d3600: Layer already exists\n", - "55bfb3527de7: Layer already exists\n", - "ee67859f37c6: Layer already exists\n", - "ed7e041f0699: Layer already exists\n", - "0235cf47cbae: Layer already exists\n", - "2971cdbb4b45: Layer already exists\n", - "8374b2bc65e7: Layer already exists\n", - "3b93a6feba89: Layer already exists\n", - "b15400eb0fa7: Layer already exists\n", - "41e673079fce: Layer already exists\n", - "29ecaf0c2ae0: Layer already exists\n", - "c5eafb4bee8f: Layer already exists\n", - "cda9215846ee: Layer already exists\n", - "81182eb0608d: Layer already exists\n", - "f2baf76d88ee: Layer already exists\n", - "cdd7c7392317: Layer already exists\n", - "8027f564cadd: Pushed\n", - "20231103.17.39.12.779660: digest: sha256:1240e61185c933e273e7bc6b5112358d85942e1f8bcb2cf076b3a144e5b748eb size: 8901\n", - "\n", - "Custom container image [gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231103.17.39.12.779660] is created for your custom job.\n", - "\n", - "CustomJob [projects/144763482491/locations/us-central1/customJobs/6207308081613766656] is submitted successfully.\n", - "\n", - "Your job is still active. You may view the status of your job with the command\n", - "\n", - " $ gcloud ai custom-jobs describe projects/144763482491/locations/us-central1/customJobs/6207308081613766656\n", - "\n", - "or continue streaming the logs with the command\n", - "\n", - " $ gcloud ai custom-jobs stream-logs projects/144763482491/locations/us-central1/customJobs/6207308081613766656\n" - ] - } - ], + "outputs": [], "source": [ "!gcloud ai custom-jobs create \\\n", "--region=us-central1 \\\n", @@ -2109,7 +1169,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "26042230-dc95-4b6c-bd32-bf3596e5de52", "metadata": {}, "outputs": [], @@ -2119,22 +1179,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "f5a3ef3b-8080-4e2d-bb8f-7e2f22c59e05", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating Model\n", - "Create Model backing LRO: projects/144763482491/locations/us-central1/models/3296764669607280640/operations/1237604172191236096\n", - "Model created. Resource name: projects/144763482491/locations/us-central1/models/3296764669607280640@1\n", - "To use this Model in another session:\n", - "model = aiplatform.Model('projects/144763482491/locations/us-central1/models/3296764669607280640@1')\n" - ] - } - ], + "outputs": [], "source": [ "from google.cloud import aiplatform as vertexai\n", "from google.cloud import aiplatform\n", @@ -2177,25 +1225,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "74a2c3dd-0e34-4049-804b-940c9a440570", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating Endpoint\n", - "Create Endpoint backing LRO: projects/144763482491/locations/us-central1/endpoints/5468832298092724224/operations/884634551396073472\n", - "Endpoint created. Resource name: projects/144763482491/locations/us-central1/endpoints/5468832298092724224\n", - "To use this Endpoint in another session:\n", - "endpoint = aiplatform.Endpoint('projects/144763482491/locations/us-central1/endpoints/5468832298092724224')\n", - "Deploying model to Endpoint : projects/144763482491/locations/us-central1/endpoints/5468832298092724224\n", - "Deploy Endpoint model backing LRO: projects/144763482491/locations/us-central1/endpoints/5468832298092724224/operations/5601029261159825408\n", - "Endpoint model deployed. Resource name: projects/144763482491/locations/us-central1/endpoints/5468832298092724224\n" - ] - } - ], + "outputs": [], "source": [ "ENDPOINT_DISPLAY_NAME = \"summarizer-endpoint\" \n", "endpoint = aiplatform.Endpoint.create(display_name=ENDPOINT_DISPLAY_NAME)\n", @@ -2246,187 +1279,23 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "9721f52d-040f-4dc7-808e-8d1ffb5efb4a", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using endpoint [https://us-central1-aiplatform.googleapis.com/]\n", - "---\n", - "createTime: '2023-11-03T17:43:15.502041Z'\n", - "displayName: flan-t5-training-tf3\n", - "endTime: '2023-11-03T18:03:29Z'\n", - "jobSpec:\n", - " workerPoolSpecs:\n", - " - containerSpec:\n", - " args:\n", - " - --model_name_or_path=google/flan-t5-small\n", - " - --train_file=gs://flan-t5-model-resources/train.csv\n", - " - --test_file=gs://flan-t5-model-resources/test.csv\n", - " - --validation_file=gs://flan-t5-model-resources/validation.csv\n", - " - --text_column=article\n", - " - --summary_column=abstract\n", - " - --output_dir=gs://flan-t5-model-resources/\n", - " - '--source_prefix=summarize:'\n", - " imageUri: gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231103.17.39.12.779660\n", - " diskSpec:\n", - " bootDiskSizeGb: 100\n", - " bootDiskType: pd-ssd\n", - " machineSpec:\n", - " acceleratorCount: 1\n", - " acceleratorType: NVIDIA_TESLA_V100\n", - " machineType: n1-standard-4\n", - " replicaCount: '1'\n", - "name: projects/144763482491/locations/us-central1/customJobs/6207308081613766656\n", - "startTime: '2023-11-03T17:48:23Z'\n", - "state: JOB_STATE_SUCCEEDED\n", - "updateTime: '2023-11-03T18:03:44.992454Z'\n", - "---\n", - "createTime: '2023-11-02T04:29:33.732327Z'\n", - "displayName: flan-t5-training-tf3\n", - "endTime: '2023-11-02T04:34:24Z'\n", - "error:\n", - " code: 3\n", - " message: 'The replica workerpool0-0 exited with a non-zero status of 1. To find\n", - " out more about why your job exited please check the logs: https://console.cloud.google.com/logs/viewer?project=144763482491&resource=ml_job%2Fjob_id%2F2998009561996066816&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%222998009561996066816%22'\n", - "jobSpec:\n", - " workerPoolSpecs:\n", - " - containerSpec:\n", - " args:\n", - " - --model_name_or_path=google/flan-t5-small\n", - " - --train_file=gs://flan-t5-model-resources/train.csv\n", - " - --test_file=gs://flan-t5-model-resources/test.csv\n", - " - --validation_file=gs://flan-t5-model-resources/validation.csv\n", - " - --text_column=article\n", - " - --summary_column=abstract\n", - " - --output_dir=gs://flan-t5-model-resources\n", - " - '--source_prefix=summarize:'\n", - " imageUri: gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231102.04.26.29.256583\n", - " diskSpec:\n", - " bootDiskSizeGb: 100\n", - " bootDiskType: pd-ssd\n", - " machineSpec:\n", - " acceleratorCount: 1\n", - " acceleratorType: NVIDIA_TESLA_V100\n", - " machineType: n1-standard-4\n", - " replicaCount: '1'\n", - "name: projects/144763482491/locations/us-central1/customJobs/2998009561996066816\n", - "startTime: '2023-11-02T04:33:54Z'\n", - "state: JOB_STATE_FAILED\n", - "updateTime: '2023-11-02T04:34:28.106045Z'\n", - "---\n", - "createTime: '2023-10-30T11:24:17.577560Z'\n", - "displayName: flan-t5-training-tf\n", - "endTime: '2023-10-30T11:44:47Z'\n", - "jobSpec:\n", - " workerPoolSpecs:\n", - " - containerSpec:\n", - " args:\n", - " - --job_dir=gs://flan-t5-model-resources\n", - " imageUri: gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf:20231030.11.21.47.379363\n", - " diskSpec:\n", - " bootDiskSizeGb: 100\n", - " bootDiskType: pd-ssd\n", - " machineSpec:\n", - " acceleratorCount: 1\n", - " acceleratorType: NVIDIA_TESLA_V100\n", - " machineType: n1-standard-4\n", - " replicaCount: '1'\n", - "name: projects/144763482491/locations/us-central1/customJobs/612998417047617536\n", - "startTime: '2023-10-30T11:29:12Z'\n", - "state: JOB_STATE_SUCCEEDED\n", - "updateTime: '2023-10-30T11:45:16.382233Z'\n", - "---\n", - "createTime: '2023-10-30T10:53:26.358002Z'\n", - "displayName: flan-t5-training-tf\n", - "endTime: '2023-10-30T11:12:59Z'\n", - "error:\n", - " code: 3\n", - " message: 'The replica workerpool0-0 exited with a non-zero status of 1. To find\n", - " out more about why your job exited please check the logs: https://console.cloud.google.com/logs/viewer?project=144763482491&resource=ml_job%2Fjob_id%2F6864276174814576640&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%226864276174814576640%22'\n", - "jobSpec:\n", - " workerPoolSpecs:\n", - " - containerSpec:\n", - " args:\n", - " - --job_dir=gs://flan-t5-model-resources\n", - " imageUri: gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf:20231030.10.50.08.545796\n", - " diskSpec:\n", - " bootDiskSizeGb: 100\n", - " bootDiskType: pd-ssd\n", - " machineSpec:\n", - " acceleratorCount: 1\n", - " acceleratorType: NVIDIA_TESLA_V100\n", - " machineType: n1-standard-4\n", - " replicaCount: '1'\n", - "name: projects/144763482491/locations/us-central1/customJobs/6864276174814576640\n", - "startTime: '2023-10-30T10:57:55Z'\n", - "state: JOB_STATE_FAILED\n", - "updateTime: '2023-10-30T11:13:29.896168Z'\n", - "---\n", - "createTime: '2023-10-26T21:28:18.991136Z'\n", - "displayName: flan-t5-training\n", - "endTime: '2023-10-26T21:53:59Z'\n", - "jobSpec:\n", - " workerPoolSpecs:\n", - " - containerSpec:\n", - " args:\n", - " - --per_device_train_batch_size=2\n", - " - --per_device_eval_batch_size=4\n", - " - --model_name_or_path=google/flan-t5-small\n", - " - --train_file=gs://flan-t5-model-resources/datasets/train.csv\n", - " - --test_file=gs://flan-t5-model-resources/datasets/test.csv\n", - " - --text_column=article\n", - " - --summary_column=abstract\n", - " - --do_train=True\n", - " - --do_eval=False\n", - " - --do_predict=True\n", - " - --predict_with_generate=True\n", - " - --output_dir=gs://flan-t5-model-resources/model_output\n", - " - --num_train_epochs=3\n", - " - --learning_rate=5e-5\n", - " - --seed=7\n", - " - --fp16=True\n", - " imageUri: gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training:20231026.21.27.25.218708\n", - " diskSpec:\n", - " bootDiskSizeGb: 100\n", - " bootDiskType: pd-ssd\n", - " machineSpec:\n", - " acceleratorCount: 1\n", - " acceleratorType: NVIDIA_TESLA_V100\n", - " machineType: n1-standard-4\n", - " replicaCount: '1'\n", - "name: projects/144763482491/locations/us-central1/customJobs/8666538460460351488\n", - "startTime: '2023-10-26T21:33:52Z'\n", - "state: JOB_STATE_SUCCEEDED\n", - "updateTime: '2023-10-26T21:54:18.730721Z'\n" - ] - } - ], + "outputs": [], "source": [ "!gcloud ai custom-jobs list --project=$project --region=$location" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "f6e10933-3d84-41fd-8785-fa801b97bfb0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Long running operation: projects/144763482491/locations/us-central1/operations/3654348322228928512\n", - "delete_custom_job_response: \n" - ] - } - ], + "outputs": [], "source": [ "from google.cloud import aiplatform\n", "custom_job_id=''\n", @@ -2483,21 +1352,10 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "588af684-4c7e-43d5-a1f5-5510157aa40f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Listed 0 items.\n", - "DIGEST TAGS TIMESTAMP\n", - "1240e61185c9 20231103.17.39.12.779660 2023-11-03T17:42:05\n", - "ca99b71c4661 20231103.16.13.42.102563 2023-11-03T16:21:43\n" - ] - } - ], + "outputs": [], "source": [ "#list the containers\n", "!gcloud container images list-tags gcr.io/$project/cloudai-autogenerated/$display_name" @@ -2505,7 +1363,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "id": "635cc519-230d-48a0-b9b7-c350c2d62ac4", "metadata": {}, "outputs": [], @@ -2516,26 +1374,10 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "id": "946fa12b-ad77-4d19-a556-c926309a14c4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;33mWARNING:\u001b[0m Successfully resolved tag to sha256, but it is recommended to use sha256 directly.\n", - "Digests:\n", - "- gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3@sha256:ca99b71c466168f467152e04791710a9e269e767985b22a6cd1702e4fac2f691\n", - " Associated tags:\n", - " - 20231103.16.13.42.102563\n", - "Tags:\n", - "- gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231103.16.13.42.102563\n", - "Deleted [gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3:20231103.16.13.42.102563].\n", - "Deleted [gcr.io/cit-oconnellka-9999/cloudai-autogenerated/flan-t5-training-tf3@sha256:ca99b71c466168f467152e04791710a9e269e767985b22a6cd1702e4fac2f691].\n" - ] - } - ], + "outputs": [], "source": [ "#delete \n", "!gcloud container images delete gcr.io/$project/cloudai-autogenerated/$display_name:$tag_id --force-delete-tags --quiet" @@ -2562,15 +1404,15 @@ ], "metadata": { "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-12.m112", + "kernel": "conda-root-py", + "name": "workbench-notebooks.m119", "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-12:m112" + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m119" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel) (Local)", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -2582,7 +1424,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/GenAI/Gemini_Intro.ipynb b/notebooks/GenAI/Gemini_Intro.ipynb index 94b8d7a..a81002e 100644 --- a/notebooks/GenAI/Gemini_Intro.ipynb +++ b/notebooks/GenAI/Gemini_Intro.ipynb @@ -36,137 +36,15 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "ed9781dd-9764-4e9c-88ba-fcd7bb95842a", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: google-cloud-aiplatform in /opt/conda/lib/python3.10/site-packages (1.37.0)\n", - "Collecting google-cloud-aiplatform\n", - " Downloading google_cloud_aiplatform-1.39.0-py2.py3-none-any.whl.metadata (28 kB)\n", - "Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.34.0)\n", - "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (1.23.0)\n", - "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (3.20.3)\n", - "Requirement already satisfied: packaging>=14.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (23.2)\n", - "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (2.13.0)\n", - "Requirement already satisfied: google-cloud-bigquery<4.0.0dev,>=1.15.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (3.13.0)\n", - "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (1.11.0)\n", - "Requirement already satisfied: shapely<3.0.0dev in /opt/conda/lib/python3.10/site-packages (from google-cloud-aiplatform) (2.0.2)\n", - "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.62.0)\n", - "Requirement already satisfied: google-auth<3.0dev,>=1.25.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2.25.2)\n", - "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.10/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2.31.0)\n", - "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.60.0)\n", - "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /opt/conda/lib/python3.10/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.48.2)\n", - "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.4.1)\n", - "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.6.0)\n", - "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /opt/conda/lib/python3.10/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.8.2)\n", - "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /opt/conda/lib/python3.10/site-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform) (0.13.0)\n", - "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/conda/lib/python3.10/site-packages (from google-cloud-storage<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.5.0)\n", - "Requirement already satisfied: numpy>=1.14 in /opt/conda/lib/python3.10/site-packages (from shapely<3.0.0dev->google-cloud-aiplatform) (1.25.2)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (5.3.2)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (4.9)\n", - "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil<3.0dev,>=2.7.2->google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (1.16.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.26.18)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2023.11.17)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (0.5.1)\n", - "Downloading google_cloud_aiplatform-1.39.0-py2.py3-none-any.whl (3.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hInstalling collected packages: google-cloud-aiplatform\n", - " Attempting uninstall: google-cloud-aiplatform\n", - " Found existing installation: google-cloud-aiplatform 1.37.0\n", - " Uninstalling google-cloud-aiplatform-1.37.0:\n", - " Successfully uninstalled google-cloud-aiplatform-1.37.0\n", - "Successfully installed google-cloud-aiplatform-1.39.0\n" - ] - } - ], - "source": [ - "!pip install --upgrade google-cloud-aiplatform" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "54499594-1d7b-41cc-858b-ffe6f6c2770b", - "metadata": { - "scrolled": true, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting langchain\n", - " Downloading langchain-0.1.0-py3-none-any.whl.metadata (13 kB)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (6.0.1)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.0.23)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (3.9.1)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (4.0.3)\n", - "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n", - " Downloading dataclasses_json-0.6.3-py3-none-any.whl.metadata (25 kB)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.33)\n", - "Collecting langchain-community<0.1,>=0.0.9 (from langchain)\n", - " Downloading langchain_community-0.0.11-py3-none-any.whl.metadata (7.3 kB)\n", - "Collecting langchain-core<0.2,>=0.1.7 (from langchain)\n", - " Downloading langchain_core-0.1.9-py3-none-any.whl.metadata (4.0 kB)\n", - "Requirement already satisfied: langsmith<0.1.0,>=0.0.77 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.0.77)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.25.2)\n", - "Requirement already satisfied: pydantic<3,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.10.13)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.31.0)\n", - "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (8.2.3)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", - "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", - " Downloading marshmallow-3.20.2-py3-none-any.whl.metadata (7.5 kB)\n", - "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n", - " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n", - "Requirement already satisfied: anyio<5,>=3 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.2,>=0.1.7->langchain) (3.7.1)\n", - "Requirement already satisfied: packaging<24.0,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.2,>=0.1.7->langchain) (23.2)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3,>=1->langchain) (4.8.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (1.26.18)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2023.11.17)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.2)\n", - "Requirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.7->langchain) (1.3.0)\n", - "Requirement already satisfied: exceptiongroup in /opt/conda/lib/python3.10/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.7->langchain) (1.2.0)\n", - "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n", - " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", - "Downloading langchain-0.1.0-py3-none-any.whl (797 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m798.0/798.0 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hDownloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)\n", - "Downloading langchain_community-0.0.11-py3-none-any.whl (1.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m26.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading langchain_core-0.1.9-py3-none-any.whl (216 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.5/216.5 kB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", - "Installing collected packages: mypy-extensions, marshmallow, typing-inspect, langchain-core, dataclasses-json, langchain-community, langchain\n", - " Attempting uninstall: langchain-core\n", - " Found existing installation: langchain-core 0.1.6\n", - " Uninstalling langchain-core-0.1.6:\n", - " Successfully uninstalled langchain-core-0.1.6\n", - "Successfully installed dataclasses-json-0.6.3 langchain-0.1.0 langchain-community-0.0.11 langchain-core-0.1.9 marshmallow-3.20.2 mypy-extensions-1.0.0 typing-inspect-0.9.0\n" - ] - } - ], - "source": [ - "!pip install langchain" + "outputs": [], + "source": [ + "!pip install --upgrade google-cloud-aiplatform langchain langchain-community" ] }, { @@ -184,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "47dc9232-383f-405b-b1a8-fab64a80492d", "metadata": { "tags": [] @@ -192,12 +70,12 @@ "outputs": [], "source": [ "from google.cloud import aiplatform\n", - "import vertexai.preview\n", - "from vertexai.preview.generative_models import GenerativeModel, ChatSession, Part, Image, GenerationConfig\n", + "import vertexai\n", + "from vertexai.generative_models import GenerativeModel, Image, GenerativeModel, ChatSession, Part, GenerationConfig\n", "\n", "# TODO(developer): Update and un-comment below lines\n", - "project_id = \n", - "location = \n", + "project_id = \"\"\n", + "location = \"\" #(e.g., us-central1)\n", "vertexai.init(project=project_id, location=location)" ] }, @@ -219,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": null, "id": "70bc5b25-c796-4015-82dc-6bc861bb525f", "metadata": { "tags": [] @@ -255,32 +133,12 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "342a0e3d-fbcb-4562-bb5f-b439a92e80e2", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "**Generative AI use cases that are Life Science or Health Care related:**\n", - "\n", - "* **Drug discovery and development:** Generative AI can be used to generate new molecules with desired properties, design new drugs, and predict how drugs will interact with biological systems. This can help to accelerate the drug discovery and development process and make it more efficient.\n", - "* **Personalized medicine:** Generative AI can be used to generate personalized treatment plans for patients based on their individual genetic and health data. This can help to improve the effectiveness of treatment and reduce side effects.\n", - "* **Disease diagnosis and prognosis:** Generative AI can be used to develop new diagnostic tools and methods, and to predict the course of a disease. This can help to improve patient outcomes and reduce healthcare costs.\n", - "* **Medical imaging:** Generative AI can be used to generate synthetic medical images, which can be used to train medical students and residents, develop new imaging technologies, and improve the accuracy of diagnosis.\n", - "* **Electronic health records (EHRs):** Generative AI can be used to generate synthetic EHRs, which can be used to train machine learning algorithms, develop new clinical decision support tools, and improve the efficiency of healthcare operations.\n", - "* **Healthcare chatbots:** Generative AI can be used to develop healthcare chatbots that can provide patients with information and support, answer questions, and schedule appointments. This can help to improve patient access to care and reduce the burden on healthcare providers.\n", - "* **Drug repurposing:** Generative AI can be used to identify new uses for existing drugs, which can help to expand treatment options for patients and reduce the cost of drug development.\n", - "* **Clinical trial design:** Generative AI can be used to design more efficient and effective clinical trials, which can help to accelerate the development of new treatments and improve patient outcomes.\n", - "* **Healthcare fraud detection:** Generative AI can be used to detect fraudulent healthcare claims, which can help to reduce costs and improve the efficiency of healthcare operations.\n", - "\n", - "These are just a few examples of the many potential use cases for generative AI in the life science and healthcare industries. As generative AI technology continues to develop, we can expect to see even more innovative and groundbreaking applications in the years to come.\n" - ] - } - ], + "outputs": [], "source": [ "prompt = \"List gen ai use cases that are Life Science or Health Care related. \"\n", "print(get_chat_response(chat, prompt))" @@ -296,57 +154,12 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": null, "id": "f0b917b2-22b5-4011-a9c4-d8a667cf6b1d", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sure, here's a Python script that will replace all null values (empty cells) with zeros within a CSV file:\n", - "\n", - "\n", - "```python\n", - "import csv\n", - "\n", - "# Open the CSV file for reading and writing.\n", - "with open('input.csv', 'r+', newline='') as csvfile:\n", - " # Create a CSV reader and writer.\n", - " reader = csv.reader(csvfile)\n", - " writer = csv.writer(csvfile)\n", - "\n", - " # Read the header row.\n", - " header = next(reader)\n", - "\n", - " # Replace null values with zeros in the remaining rows.\n", - " for row in reader:\n", - " for i, cell in enumerate(row):\n", - " if cell == '':\n", - " row[i] = '0'\n", - "\n", - " # Write the updated row to the CSV file.\n", - " writer.writerow(row)\n", - "```\n", - "\n", - "\n", - "To use this script, save it as a file (e.g. `replace_nulls.py`) and run it from the command line:\n", - "\n", - "\n", - "```\n", - "python replace_nulls.py\n", - "```\n", - "\n", - "\n", - "This will replace all null values in the 'input.csv' file with zeros and create a new CSV file called 'output.csv'.\n", - "\n", - "\n", - "**Note:** Make sure to replace `input.csv` with the actual name of your input CSV file. You can also change the output file name by modifying the `output.csv` part of the script.\n" - ] - } - ], + "outputs": [], "source": [ "prompt = \"create a python code that will replace all null values to zero within a csv file\"\n", "print(get_chat_response(chat, prompt))" @@ -378,16 +191,29 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, + "id": "ee4cf184-c815-425c-9742-7625123e02bf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#download the article\n", + "!wget --user-agent \"Chrome\" https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10954554/pdf/41586_2024_Article_7159.pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "3becd6c2-daf0-4287-80e5-06cf419287bd", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.document_loaders import PyPDFLoader\n", "\n", - "loader = TextLoader(\"./PMC10000003.txt\")\n", + "loader = PyPDFLoader(\"41586_2024_Article_7159.pdf\")\n", "ex_file=loader.load()" ] }, @@ -405,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": null, "id": "c4228e44-9639-40da-8f69-343be93b65b0", "metadata": {}, "outputs": [], @@ -443,18 +269,10 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": null, "id": "97e7ea82-c58e-42ee-b01e-6aa51e324b05", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The article \"Mechanical Treatment of Inflammation of the Knee Joint\" published in the Chicago Medical Examiner on January 1867, discusses the use of mechanical treatments for inflammation of the knee joint. The author emphasizes the importance of overcoming the reflex contraction of muscles surrounding the joint to prevent or correct deformities. Tenotomy of the flexor tendons may be necessary to achieve this. Additionally, the relief of pressure on the inflamed joint surfaces is crucial for recovery. This can be achieved through various methods such as adhesive strap dressings, application of an air cushion, or evacuation of pus from the joint. The author also introduces a new apparatus for making extension, which allows for optimal counter-extension and can be used in both acute and chronic cases. The advantages of this apparatus include its large counter-extending surface, security, and patient comfort. By utilizing this principle, various instruments can be crafted to address knee deformities.\n" - ] - } - ], + "outputs": [], "source": [ "print(summarizer(ex_file[0].page_content))" ] @@ -502,7 +320,7 @@ " image1=Part.from_uri(image_path, mime_type=\"image/jpeg\")\n", " else: \n", " image1=Image.load_from_file(image_path)\n", - " #image1=Image.load_from_file(image_path)\n", + " \n", " responses = multimodal_model.generate_content(\n", " [image1, img_prompt],\n", " generation_config={\n", @@ -527,30 +345,12 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": null, "id": "b939f105-89c2-4c38-80f8-2cddf8dcb0ca", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2024-01-11 05:24:55-- https://phil.cdc.gov//PHIL_Images/23312/23312_lores.jpg\n", - "Resolving phil.cdc.gov (phil.cdc.gov)... 198.246.102.26\n", - "Connecting to phil.cdc.gov (phil.cdc.gov)|198.246.102.26|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 31823 (31K) [image/jpeg]\n", - "Saving to: ‘example_image_covid.jpg’\n", - "\n", - "example_image_covid 100%[===================>] 31.08K --.-KB/s in 0.07s \n", - "\n", - "2024-01-11 05:24:55 (455 KB/s) - ‘example_image_covid.jpg’ saved [31823/31823]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "! wget -O example_image_covid.jpg \"https://phil.cdc.gov//PHIL_Images/23312/23312_lores.jpg\" " ] @@ -565,20 +365,12 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": null, "id": "34e81656-4943-439d-9fbe-df439e0e30df", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " The image is a 3D rendering of a coronavirus. The virus is round and has a spiky outer coat. The spikes are made of proteins that help the virus attach to and infect cells. The virus is colored gray and red.None\n" - ] - } - ], + "outputs": [], "source": [ "print(img2text(\"example_image_covid.jpg\", \"describe this image.\"))" ] @@ -593,20 +385,12 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": null, "id": "81197d53-dd3d-4358-9835-ef513ec11d33", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " The image shows a table with a pink peony bouquet, two cups of coffee, a bowl of blueberries, and a silver spoon with the words \"Let's Jam\" on it. There are also five scones with blueberries on them. The table is covered with a white tablecloth with purple stains.None\n" - ] - } - ], + "outputs": [], "source": [ "print(img2text(\"gs://generativeai-downloads/images/scones.jpg\", \"describe this image.\"))" ] @@ -621,26 +405,12 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": null, "id": "d8395784-ea68-4a95-a0bb-b3d618f68054", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Preheat oven to 375 degrees F (190 degrees C). Grease a baking sheet.\n", - "\n", - "In a large bowl, combine the flour, sugar, baking powder, and salt. Cut in butter until mixture resembles coarse crumbs. Stir in blueberries.\n", - "\n", - "Turn out onto a lightly floured surface; knead for 10 to 12 times. Pat into a 1/2-in.-thick circle. Cut with a 3-in. floured biscuit cutter. Place 2 in. apart on the prepared baking sheet.\n", - "\n", - "Bake for 12-15 minutes or until golden brown. Cool for 2 minutes before removing to a wire rack to cool completely.None\n" - ] - } - ], + "outputs": [], "source": [ "img_prompt=\"How do you make whats in this image?\"\n", "image=\"gs://generativeai-downloads/images/scones.jpg\"\n", @@ -690,6 +460,7 @@ "source": [ "def video2text(video_path: str, video_prompt: str) -> str:\n", " # Query the model\n", + " multimodal_model = GenerativeModel(\"gemini-pro-vision\")\n", " response = multimodal_model.generate_content(\n", " [\n", " # Add an example image\n", @@ -715,21 +486,12 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": null, "id": "55990074-0365-45f5-9fa6-bedbe93c9932", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " This video is about a messy world. It shows a bunch of different things that are messy, like a messy room, a messy desk, and a messy\n", - "None\n" - ] - } - ], + "outputs": [], "source": [ "video_prompt=\"What is this video about in detail?\"\n", "video=\"gs://cloud-samples-data/video/Machine Learning Solving Problems Big, Small, and Prickly.mp4\"\n", @@ -757,7 +519,7 @@ "id": "8a634e11-3bd5-4048-b6e4-46d5aac6ce34", "metadata": {}, "source": [ - "![Gemini1](../../../images/Gemini_1.png)" + "![Gemini1](../../images/Gemini_1.png)" ] }, { @@ -775,7 +537,7 @@ "tags": [] }, "source": [ - "![Gemini3](../../../images/Gemini_3.png)" + "![Gemini3](../../images/Gemini_3.png)" ] }, { @@ -793,7 +555,7 @@ "tags": [] }, "source": [ - "![Gemini2](../../../images/Gemini_2.png)" + "![Gemini2](../../images/Gemini_2.png)" ] }, { @@ -811,7 +573,7 @@ "tags": [] }, "source": [ - "![Gemini4](../../../images/Gemini_4.png)" + "![Gemini4](../../images/Gemini_4.png)" ] }, { @@ -825,15 +587,15 @@ ], "metadata": { "environment": { - "kernel": "python3", - "name": "common-cpu.m114", + "kernel": "conda-root-py", + "name": "workbench-notebooks.m119", "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/base-cpu:m114" + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m119" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel) (Local)", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -845,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/GenAI/Pubmed_RAG_chatbot.ipynb b/notebooks/GenAI/Pubmed_RAG_chatbot.ipynb index 9798d2c..2ba7d90 100644 --- a/notebooks/GenAI/Pubmed_RAG_chatbot.ipynb +++ b/notebooks/GenAI/Pubmed_RAG_chatbot.ipynb @@ -13,7 +13,7 @@ "id": "3ecea2ad-7c65-4367-87e1-b021167c3a1d", "metadata": {}, "source": [ - "For this tutorial we are creating a PubMed chatbot that will answer questions by gathering information from documents we have provided via an index. The model we will be using today is a pretrained 'text-bison@001' model from GCP.\n", + "For this tutorial we are creating a PubMed chatbot that will answer questions by gathering information from documents we have provided via an index. The model we will be using today is a pretrained 'test-bison@001' model from GCP.\n", "\n", "This tutorial will go over the following topics:\n", "- Introduce langchain\n", @@ -67,6 +67,18 @@ "We will be exploring both methods!" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c39e2160-660a-40cd-886d-e4179fbe6c13", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!pip install langchain langchain-google-vertexai langchain-community unstructured" + ] + }, { "cell_type": "markdown", "id": "bcf1690d-e93d-4cd3-89c6-8d06b5a071a8", @@ -97,18 +109,6 @@ "bucket = ''" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7a349bb-2853-4028-972d-af7f3e857867", - "metadata": {}, - "outputs": [], - "source": [ - "project_id='cit-oconnellka-9999'\n", - "location='us-central1'\n", - "bucket = 'pubmed-chatbot-resources'" - ] - }, { "cell_type": "markdown", "id": "02053f4d-fad7-44ab-a7c3-cfa1c218240f", @@ -131,7 +131,9 @@ "cell_type": "code", "execution_count": null, "id": "99d49432-cf03-4f19-aa82-ef7f8bad5bde", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#make bucket\n", @@ -150,7 +152,9 @@ "cell_type": "code", "execution_count": null, "id": "7b395e34-062d-4f77-afee-3601d471954a", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#download the metadata file\n", @@ -162,7 +166,7 @@ "id": "93a8595a-767f-4cad-9273-62d8e2cf60d1", "metadata": {}, "source": [ - "We only want the metadata of the first 100 files." + "We only want the metadata of the first 50 files." ] }, { @@ -178,8 +182,8 @@ "import pandas as pd\n", "\n", "df = pd.read_csv('oa_comm.filelist.csv')\n", - "#first 100 files\n", - "first_100=df[0:100]" + "#first 50 files\n", + "first_50=df[0:50]" ] }, { @@ -187,17 +191,19 @@ "id": "abd1ae93-450e-4c79-83cc-ea46a1b507c1", "metadata": {}, "source": [ - "Lets look at our metadata! We can see that the bucket path to the files are under the **Key** column this is what we will use to loop through the PMC bucket and copy the first 100 files to our bucket." + "Lets look at our metadata! We can see that the bucket path to the files are under the **Key** column this is what we will use to loop through the PMC bucket and copy the first 50 files to our bucket." ] }, { "cell_type": "code", "execution_count": null, "id": "ff77b2aa-ed1b-4d27-8163-fdaa7a304582", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "first_100" + "first_50.head()" ] }, { @@ -211,16 +217,34 @@ { "cell_type": "code", "execution_count": null, - "id": "7d63a7e2-dbf1-49ec-bc84-b8c2c8bde62d", - "metadata": {}, + "id": "b3cca24a-59d0-4dc7-b887-c8cc8547774f", + "metadata": { + "scrolled": true, + "tags": [] + }, "outputs": [], "source": [ + "from google.cloud import storage\n", "import os\n", - "from io import BytesIO\n", - "#gather path to files in bucket\n", - "for i in first_100['Key']:\n", + "import requests\n", + "\n", + "def upload_blob_from_memory(bucket_name, contents, destination_blob_name):\n", + " \"\"\"Uploads a file to the bucket.\"\"\"\n", + " storage_client = storage.Client()\n", + " bucket = storage_client.bucket(bucket_name)\n", + " blob = bucket.blob(destination_blob_name)\n", + "\n", + " blob.upload_from_string(contents)\n", + "\n", + " return print(\n", + " f\"{destination_blob_name} uploaded to {bucket_name}.\"\n", + " )\n", + "\n", + "for i in first_50['Key']:\n", " doc_name=i.split(r'/')[-1]\n", - " os.system(f'curl http://pmc-oa-opendata.s3.amazonaws.com/{i} | curl -T - -v -H \"Authorization: Bearer `gcloud auth print-access-token`\" \"https://storage.googleapis.com/{bucket}/docs/{doc_name} \"')" + " x = requests.get(f'https://pmc-oa-opendata.s3.amazonaws.com/{i}')\n", + " doc = x.text\n", + " upload_blob_from_memory(bucket, doc, f'docs/{doc_name}')" ] }, { @@ -243,7 +267,9 @@ "cell_type": "code", "execution_count": null, "id": "6cf5092c-23f3-4f28-9308-f34b8d90c62b", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import uuid\n", @@ -260,7 +286,9 @@ "cell_type": "code", "execution_count": null, "id": "8e8a4c42-dc17-48a3-a0bb-0cbea527ee7f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#move inital embeddings file to bucket\n", @@ -281,7 +309,9 @@ "cell_type": "code", "execution_count": null, "id": "39aa7bba-3d15-4a3f-86c2-59d2c92a95ef", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from google.cloud import aiplatform\n", @@ -320,7 +350,9 @@ "cell_type": "code", "execution_count": null, "id": "55596202-13b9-4e35-8099-0602a2b13e72", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#Create the endpoint\n", @@ -329,16 +361,37 @@ " public_endpoint_enabled = True,\n", " location = location\n", ")\n", - "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f771328-31c6-4da2-9d7d-8a548abd12a1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ "#save endpoint id\n", - "endpoint_id = endpoint.name" + "endpoint_id = index_endpoint.name" + ] + }, + { + "cell_type": "markdown", + "id": "aa79b19a-ca28-4610-824a-44f5bc6b72ab", + "metadata": {}, + "source": [ + "Here we are deploying our index to our endpoint, which can take up to a hour. Its also okay if this cell stops or gets interrupted because the actions are carried out in the console." ] }, { "cell_type": "code", "execution_count": null, "id": "51412f2f-f32b-44a9-93bc-3e2f6185cada", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#deploy our index to our endpoint\n", @@ -369,14 +422,16 @@ "cell_type": "code", "execution_count": null, "id": "b9016f15-db02-4073-b4c7-288d919bbb55", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pandas as pd\n", "#Remove the Key column to be replaced later\n", - "first_100.pop('Key')\n", + "first_50.pop('Key')\n", "#convert the metadata to dict\n", - "first_100_dict = first_100.to_dict('records')" + "first_50_dict = first_50.to_dict('records')" ] }, { @@ -391,10 +446,12 @@ "cell_type": "code", "execution_count": null, "id": "69ce004e-ab8d-4b9c-91d8-9320e1679fcd", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "first_100_dict" + "first_50_dict[0]" ] }, { @@ -402,32 +459,34 @@ "id": "2a607a48-31b8-4081-a347-bb1528f8e725", "metadata": {}, "source": [ - "Now we can load in our documents, add in the location of our docs in our bucket and the document name to our metadata, and finally attach that metadata to our documents. At the end we should have 100 documents before splitting the data." + "Now we can load in our documents, add in the location of our docs in our bucket and the document name to our metadata, and finally attach that metadata to our documents. At the end we should have 50 documents before splitting the data." ] }, { "cell_type": "code", "execution_count": null, "id": "47170e83-3e9e-48e6-ab0f-cabdd39507e1", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#add metadata\n", - "from langchain.document_loaders import GCSDirectoryLoader\n", + "from langchain_community.document_loaders import GCSDirectoryLoader\n", "print(f\"Processing documents from {bucket}\")\n", "loader = GCSDirectoryLoader(\n", - " project_name=project_id, bucket=bucket, prefix='docs'\n", + " project_name=project_id, bucket=bucket, prefix = 'docs'\n", ")\n", "documents = loader.load()\n", "\n", "# loop through docs to add metadata to each one\n", - "for i in range(len(documents)):\n", + "for i in range(50):\n", " doc_md = documents[i].metadata\n", " document_name = doc_md[\"source\"].split(\"/\")[-1]\n", " source = f\"{bucket}/docs/{document_name}\"\n", " # Add document name and source to the metadata\n", " documents[i].metadata = {\"source\": source, \"document_name\": document_name}\n", - " documents[i].metadata.update(first_100_dict[i])# attached other metadata to doc\n", + " documents[i].metadata.update(first_50_dict[i])# attached other metadata to doc\n", "print(f\"# of documents loaded (pre-chunking) = {len(documents)}\")" ] }, @@ -472,7 +531,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "# split the documents into chunks\n", "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=1000,\n", @@ -503,7 +562,7 @@ "metadata": {}, "outputs": [], "source": [ - "doc_splits[0].metadata" + "doc_splits[0]" ] }, { @@ -529,12 +588,12 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.vectorstores import MatchingEngine\n", - "from langchain.embeddings import VertexAIEmbeddings\n", - "embeddings = VertexAIEmbeddings()\n", + "from langchain_google_vertexai import VectorSearchVectorStore\n", + "from langchain_google_vertexai import VertexAIEmbeddings\n", + "embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@003\")\n", "\n", "# initialize vector store\n", - "vector_store = MatchingEngine.from_components(\n", + "vector_store = VectorSearchVectorStore.from_components(\n", " project_id=project_id,\n", " region=location,\n", " gcs_bucket_name=bucket,\n", @@ -544,6 +603,16 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb060a0d-45c5-4ba3-98de-9e73bdb0a70d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "4e3bfb5b-a3a6-4156-bca3-394774a94565", @@ -562,22 +631,7 @@ "# Store docs as embeddings in Matching Engine index\n", "# It may take a while since API is rate limited\n", "texts = [doc.page_content for doc in doc_splits]\n", - "metadatas = [\n", - " [\n", - " {\"namespace\": \"source\", \"allow_list\": [doc.metadata[\"source\"]]},\n", - " {\"namespace\": \"document_name\", \"allow_list\": [doc.metadata[\"document_name\"]]},\n", - " {\"namespace\": \"ETag\", \"allow_list\": [doc.metadata[\"ETag\"]]},\n", - " {\"namespace\": \"Article Citation\", \"allow_list\": [doc.metadata[\"Article Citation\"]]},\n", - " {\"namespace\": \"AccessionID\", \"allow_list\": [doc.metadata[\"AccessionID\"]]},\n", - " {\"namespace\": \"Last Updated UTC (YYYY-MM-DD HH:MM:SS)\", \"allow_list\": [doc.metadata[\"Last Updated UTC (YYYY-MM-DD HH:MM:SS)\"]]},\n", - " {\"namespace\": \"PMID\", \"allow_list\": [str(doc.metadata[\"PMID\"])]},\n", - " {\"namespace\": \"License\", \"allow_list\": [doc.metadata[\"License\"]]},\n", - " {\"namespace\": \"Retracted\", \"allow_list\": [doc.metadata[\"Retracted\"]]},\n", - " {\"namespace\": \"chunk\", \"allow_list\": [str(doc.metadata[\"chunk\"])]}\n", - " \n", - " ]\n", - " for doc in doc_splits\n", - "]" + "metadatas = [doc.metadata for doc in doc_splits]" ] }, { @@ -655,7 +709,7 @@ "- **Connect to sources of context** (e.g. providing our model with tasks and examples)\n", "- **Rely on reason** (e.g. instruct our model how to answer based on provided context)\n", "\n", - "**Warning**: The following tools must be installed via your terminal `pip install \"langchain\" \"xmltodict\"` and the over all inference script must be run on the terminal via the command `python YOUR_SCRIPT.py`." + "**Warning**: The following tools must be installed via your terminal `pip install \"langchain\" \"xmltodict\" \"langchain-google-vertexai\" \"langchain-community\" \"unstructured\"` and the over all inference script must be run on the terminal via the command `python YOUR_SCRIPT.py`." ] }, { @@ -680,13 +734,13 @@ }, "source": [ "```python\n", - "from langchain.retrievers import PubMedRetriever\n", - "from langchain.vectorstores import MatchingEngine\n", - "#from langchain.llms import VertexAIModelGarden #uncomment if utilizing models from Model Garden\n", + "from langchain_community.retrievers import PubMedRetriever\n", "from langchain.chains import ConversationalRetrievalChain\n", "from langchain.prompts import PromptTemplate\n", - "from langchain.embeddings import VertexAIEmbeddings\n", - "from langchain.llms import VertexAI\n", + "#from langchain_google_vertexai import VertexAIModelGarden\n", + "from langchain_google_vertexai import VertexAIEmbeddings\n", + "from langchain_google_vertexai import VectorSearchVectorStore\n", + "from langchain_google_vertexai import ChatVertexAI\n", "import sys\n", "import json\n", "import os\n", @@ -773,7 +827,7 @@ "source": [ "```python\n", "llm = VertexAI(\n", - " model_name=\"text-bison@001\",\n", + " model_name=\"chat-bison@002\",\n", " max_output_tokens=1024,\n", " temperature=0.2,\n", " top_p=0.8,\n", @@ -808,9 +862,9 @@ "\n", "#only if using Vector Search as a retriever\n", "\n", - "embeddings = VertexAIEmbeddings() #Make sure embedding model is compatible with model\n", + "embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@003\") #Make sure embedding model is compatible with model\n", "\n", - " vector_store = MatchingEngine.from_components(\n", + "vector_store = VectorSearchVectorStore.from_components(\n", " project_id=PROJECT_ID,\n", " region=LOCATION_ID,\n", " gcs_bucket_name=BUCKET,\n", @@ -818,6 +872,7 @@ " index_id=VC_INDEX_ID,\n", " endpoint_id=VC_ENDPOINT_ID\n", " )\n", + "\n", "retriever = vector_store.as_retriever(\n", " search_type=\"similarity\",\n", " search_kwargs={\"k\":3}\n", @@ -1058,7 +1113,7 @@ "id": "80c8fb4b-e74f-4e8d-892b-0f913eff747d", "metadata": {}, "source": [ - "![PubMed Chatbot Results](../../../images/GCP_chatbot_results.png)" + "![PubMed Chatbot Results](../../images/GCP_chatbot_results.png)" ] }, { @@ -1123,15 +1178,15 @@ ], "metadata": { "environment": { - "kernel": "python3", - "name": "common-cpu.m113", + "kernel": "conda-root-py", + "name": "workbench-notebooks.m119", "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/base-cpu:m113" + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m119" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel) (Local)", "language": "python", - "name": "python3" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -1143,7 +1198,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/GenAI/VertexAIStudioGCP.ipynb b/notebooks/GenAI/VertexAIStudioGCP.ipynb index 3366fff..d8bfd84 100644 --- a/notebooks/GenAI/VertexAIStudioGCP.ipynb +++ b/notebooks/GenAI/VertexAIStudioGCP.ipynb @@ -56,15 +56,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Go to the Generative AI Studio console [here](https://console.cloud.google.com/vertex-ai/generative/language?_ga=2.182664366.923116401.1692009977-1042353744.1691708677).\n", + "Go to the Vertex AI Studio console by navigating to Vertex AI via the search bar on the console. On the left side menu scroll down to Vertex AI Studio, click **Language**.\n", "\n", - "Scroll down to **Summarization** and click on the model **Article Summary**. You will see a prompt session were you will need to enter in the contents of your article as the console does not allow you to upload files. For this tutorial this article is about how gut microbiota affects Alzeheimer's disease because of the gut-brain-microbiota axis network [here](https://www.aging-us.com/article/102930/pdf).\n", + " \n", "\n", - " \n", "\n", - "To the left you can control the parameters that we have been using before this is a great way to test what each parameter does and how they effect each other. Once you are done click **submit**, you should have a similar output as below. For explainations on the parameters **temperature, token limit max, top p, and top k** see the following article [here](https://cloud.google.com/vertex-ai/docs/generative-ai/text/test-text-prompts#generative-ai-test-text-prompt-drest).\n", + "Scroll down **\"Prompt examples\"** then to **Summarization** and click **\"Open\"** on **Article Summary**. You will see a prompt session were you will need to enter in the contents of your article as the console does not allow you to upload files. For this tutorial this article is about how gut microbiota affects Alzeheimer's disease because of the gut-brain-microbiota axis network [here](https://www.aging-us.com/article/102930/pdf).\n", "\n", - " \n", + " \n", + "\n", + "To the right you can control the parameters that we have been using before this is a great way to test what each parameter does and how they effect each other. Once you are done click **submit**, you should have a similar output as below. For explainations on the parameters **temperature, Output token limit, top p, and top k** see the following article [here](https://cloud.google.com/vertex-ai/docs/generative-ai/text/test-text-prompts#generative-ai-test-text-prompt-drest).\n", + "\n", + " \n", " \n" ] }, @@ -79,7 +82,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "" + "" ] }, { @@ -125,15 +128,15 @@ "toc_visible": true }, "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-11.m108", + "kernel": "conda-root-py", + "name": "workbench-notebooks.m119", "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-11:m108" + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m119" }, "kernelspec": { - "display_name": "Python (Local)", + "display_name": "Python 3 (ipykernel) (Local)", "language": "python", - "name": "local-base" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -145,7 +148,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/GenAI/example_scripts/example_langchain_chat_llama_2_zeroshot.py b/notebooks/GenAI/example_scripts/example_langchain_chat_llama_2_zeroshot.py index 1756c90..e554caf 100644 --- a/notebooks/GenAI/example_scripts/example_langchain_chat_llama_2_zeroshot.py +++ b/notebooks/GenAI/example_scripts/example_langchain_chat_llama_2_zeroshot.py @@ -1,8 +1,8 @@ -from langchain.retrievers import PubMedRetriever +from langchain_community.retrievers import PubMedRetriever from langchain.chains import ConversationalRetrievalChain from langchain.prompts import PromptTemplate #from langchain.llms import VertexAIModelGarden -from langchain.llms import VertexAI +from langchain_google_vertexai import ChatVertexAI import sys import json import os @@ -30,7 +30,7 @@ def build_chain(): #llm = VertexAIModelGarden(project=PROJECT_ID, endpoint_id=ENDPOINT_ID, location=LOCATION_ID) llm = VertexAI( - model_name="text-bison@001", + model_name="chat-bison@002", max_output_tokens=1024, temperature=0.2, top_p=0.8, diff --git a/notebooks/GenAI/example_scripts/example_vectorsearch_chat_llama_2_zeroshot.py b/notebooks/GenAI/example_scripts/example_vectorsearch_chat_llama_2_zeroshot.py index bcc8acb..85a9033 100644 --- a/notebooks/GenAI/example_scripts/example_vectorsearch_chat_llama_2_zeroshot.py +++ b/notebooks/GenAI/example_scripts/example_vectorsearch_chat_llama_2_zeroshot.py @@ -1,9 +1,9 @@ from langchain.chains import ConversationalRetrievalChain from langchain.prompts import PromptTemplate -#from langchain.llms import VertexAIModelGarden -from langchain.embeddings import VertexAIEmbeddings -from langchain.vectorstores import MatchingEngine -from langchain.llms import VertexAI +#from langchain_google_vertexai import VertexAIModelGarden +from langchain_google_vertexai import VertexAIEmbeddings +from langchain_google_vertexai import VectorSearchVectorStore +from langchain_google_vertexai import ChatVertexAI import sys import json import os @@ -35,16 +35,16 @@ def build_chain(): #llm = VertexAIModelGarden(project=PROJECT_ID, endpoint_id=ENDPOINT_ID, location=LOCATION_ID) llm = VertexAI( - model_name="text-bison@001", + model_name="chat-bison@002", max_output_tokens=1024, temperature=0.2, top_p=0.8, top_k=40, verbose=True, ) - embeddings = VertexAIEmbeddings() + embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@003") - vector_store = MatchingEngine.from_components( + vector_store = VectorSearchVectorStore.from_components( project_id=PROJECT_ID, region=LOCATION_ID, gcs_bucket_name=BUCKET, @@ -52,6 +52,7 @@ def build_chain(): index_id=VC_INDEX_ID, endpoint_id=VC_ENDPOINT_ID ) + retriever = vector_store.as_retriever( search_type="similarity", diff --git a/notebooks/GenAI/langchain_on_vertex.ipynb b/notebooks/GenAI/langchain_on_vertex.ipynb index ebfec60..716bfda 100644 --- a/notebooks/GenAI/langchain_on_vertex.ipynb +++ b/notebooks/GenAI/langchain_on_vertex.ipynb @@ -9,14 +9,6 @@ "This tutorial is designed to give you the basics of using langchain to work with Large Language Models (LLMs) for document summarization and basic chat bot functionality. You could take what we have here to build a front end application using something like streamlit, or other further iterations." ] }, - { - "cell_type": "markdown", - "id": "5ebc47a9-958b-4250-a3bf-688e627f2c6a", - "metadata": {}, - "source": [ - "**Increase Max Tokens**" - ] - }, { "cell_type": "markdown", "id": "d2f0d198-f1cf-40ec-b813-4b1e8d50ab80", @@ -29,10 +21,13 @@ "cell_type": "code", "execution_count": null, "id": "8662e8f8-66ce-4ca6-a121-d087c499390f", - "metadata": {}, + "metadata": { + "scrolled": true, + "tags": [] + }, "outputs": [], "source": [ - "!pip install google-cloud-aiplatform==1.34.0 langchain==0.0.310 pypdf faiss-cpu --user" + "!pip install -U google-cloud-aiplatform langchain langchain-community langchain-google-vertexai pypdf faiss-cpu --user" ] }, { @@ -47,20 +42,22 @@ "cell_type": "code", "execution_count": null, "id": "27e6851a-f15d-4881-8173-9b788a009201", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "import langchain\n", - "from langchain.llms import VertexAI\n", - "from langchain.vectorstores import FAISS\n", - "from langchain.prompts import PromptTemplate\n", - "from langchain.schema import StrOutputParser\n", - "from langchain.document_loaders import PyPDFLoader\n", - "from langchain.embeddings import VertexAIEmbeddings\n", - "from langchain.document_loaders import WebBaseLoader\n", + "import bs4\n", + "from langchain_google_vertexai import ChatVertexAI\n", + "from langchain_community.vectorstores import FAISS\n", + "from langchain_core.prompts import PromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain_google_vertexai import VertexAIEmbeddings\n", + "from langchain_community.document_loaders import WebBaseLoader\n", "from langchain.chains.summarize import load_summarize_chain\n", "from langchain.schema.prompt_template import format_document\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter" + "from langchain_text_splitters import RecursiveCharacterTextSplitter" ] }, { @@ -75,7 +72,9 @@ "cell_type": "code", "execution_count": null, "id": "46d1b6cc-862e-4a67-a755-fbc4f7595c6f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "loader = WebBaseLoader(\"https://pubmed.ncbi.nlm.nih.gov/37883540/\")\n", @@ -86,10 +85,12 @@ "cell_type": "code", "execution_count": null, "id": "e34bd138-d852-40ba-87bd-ee559483aa20", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "llm = VertexAI()\n", + "llm = ChatVertexAI()\n", "print('the LLM and default params are : ', llm)\n", "\n", "chain = load_summarize_chain(llm, chain_type=\"stuff\")\n", @@ -101,12 +102,14 @@ "cell_type": "code", "execution_count": null, "id": "dee2c20d-7678-4f6d-81c7-0b2a2b62d055", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "print('the summary of the document in a single paragraph is: ')\n", "\n", - "chain.run(docs)\n" + "print(chain.invoke(docs))\n" ] }, { @@ -129,7 +132,9 @@ "cell_type": "code", "execution_count": null, "id": "0ad234c3-47c4-4aaf-a5b1-a3323555a8a5", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "template = \"\"\"Question: {question}\n", @@ -143,7 +148,9 @@ "cell_type": "code", "execution_count": null, "id": "126cdbda-6446-4bbb-8018-f24fce5a7216", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "chain = prompt | llm" @@ -153,7 +160,9 @@ "cell_type": "code", "execution_count": null, "id": "7323a512-5826-4498-baa6-65dca1dc6a6f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "question = \"What evidence do we have for chimpanzees going through menopause?\"\n", @@ -188,11 +197,23 @@ { "cell_type": "code", "execution_count": null, - "id": "2c5bcbbb-8e24-424d-931d-c9b6c09fb888", + "id": "332b3383-a8ea-45f7-95bb-a5da4055c806", "metadata": {}, "outputs": [], "source": [ - "loader = PyPDFLoader(\"articles/science.add5473.pdf\")\n", + "!wget --user-agent \"Chrome\" https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10954554/pdf/41586_2024_Article_7159.pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c5bcbbb-8e24-424d-931d-c9b6c09fb888", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "loader = PyPDFLoader(\"41586_2024_Article_7159.pdf\")\n", "pages = loader.load_and_split()" ] }, @@ -235,7 +256,8 @@ "outputs": [], "source": [ "# index the document using FAISS\n", - "faiss_index = FAISS.from_documents(pages, VertexAIEmbeddings())" + "embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@003\")\n", + "faiss_index = FAISS.from_documents(pages, embeddings)" ] }, { @@ -253,7 +275,7 @@ "metadata": {}, "outputs": [], "source": [ - "query = 'What evidence is there that chimpanzees go through menopause'" + "query = 'What evidence is there that toothed whales go through menopause'" ] }, { @@ -263,8 +285,8 @@ "metadata": {}, "outputs": [], "source": [ - "docs = faiss_index.similarity_search(query, k=5)\n", - "docs[0]" + "pages = faiss_index.similarity_search(query, k=5)\n", + "pages[0]" ] }, { @@ -286,12 +308,12 @@ "\n", "chain = (\n", " {\n", - " \"content\": lambda docs: \"\\n\\n\".join(\n", - " format_document(doc, doc_prompt) for doc in docs\n", + " \"content\": lambda pages: \"\\n\\n\".join(\n", + " format_document(page, doc_prompt) for page in pages\n", " )\n", " }\n", " | PromptTemplate.from_template(\"Summarize the following content in around 200 words:\\n\\n{content}\")\n", - " | VertexAI()\n", + " | ChatVertexAI()\n", " | StrOutputParser()\n", ")" ] @@ -303,28 +325,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(chain.invoke(docs))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "412cff1c-f70d-4cc6-95ea-047f882de6ec", - "metadata": {}, - "outputs": [], - "source": [ - "doc_prompt = PromptTemplate.from_template(\"{page_content}\")\n", - "\n", - "chain = (\n", - " {\n", - " \"content\": lambda docs: \"\\n\\n\".join(\n", - " format_document(doc, doc_prompt) for doc in docs\n", - " )\n", - " }\n", - " | PromptTemplate.from_template(\"Summarize the following content:\\n\\n{content}\")\n", - " | VertexAI()\n", - " | StrOutputParser()\n", - ")" + "print(chain.invoke(pages))" ] }, { @@ -350,43 +351,7 @@ " ONLY use information that is based on the documents. \\\n", " \\\n", " Document number: \\\n", - " Documents: {content}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "193f50a7-f5bb-4384-8326-1074750bcb70", - "metadata": {}, - "outputs": [], - "source": [ - "prompt_str = \"Instructions: You are about to receive text from several documents. \\\n", - " Based on the documents, give me five ideas for follow up studies that could be conducted. \\\n", - " Be professional, factual, and succinct in your response. \\\n", - " Your answer is ONLY based on information in the documents above. \\\n", - " If you can not answer the question, answer \\\n", - " I am sorry, I am unable to answer the question based on the information provided \\\n", - " ONLY use information that is based on the documents. \\\n", - " \\\n", - " Documents: {content}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d1b8228-5af6-400e-b2aa-4448d3334241", - "metadata": {}, - "outputs": [], - "source": [ - "prompt_str = \"Instructions: You are about to receive text from several documents. \\\n", - " Based on the documents, describe to me what materials would be needed to recreate the study in question. \\\n", - " Be professional, factual, and succinct in your response. \\\n", - " Your answer is ONLY based on information in the documents above. \\\n", - " If you can not answer the question, answer \\\n", - " I am sorry, I am unable to answer the question based on the information provided \\\n", - " ONLY use information that is based on the documents. \\\n", - " \\\n", - " Documents: {content}\"" + " Documents: {page_content}\"" ] }, { @@ -400,12 +365,12 @@ "\n", "chain = (\n", " {\n", - " \"content\": lambda docs: \"\\n\\n\".join(\n", - " format_document(doc, doc_prompt) for doc in docs\n", + " \"page_content\": lambda pages: \"\\n\\n\".join(\n", + " format_document(page, doc_prompt) for page in pages\n", " )\n", " }\n", " | PromptTemplate.from_template(prompt_str) \n", - " | VertexAI()\n", + " | ChatVertexAI()\n", " | StrOutputParser()\n", ")" ] @@ -417,7 +382,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(chain.invoke(docs))" + "print(chain.invoke(pages))" ] }, { @@ -438,7 +403,7 @@ "source": [ "#model garden\n", "#https://cloud.google.com/vertex-ai/docs/general/deployment#what_happens_when_you_deploy_a_model\n", - "from langchain.llms import VertexAIModelGarden" + "from langchain_google_vertexai import VertexAIModelGarden" ] }, { @@ -454,6 +419,19 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5df24a4-0a2e-4077-a831-b8194b8c312a", + "metadata": {}, + "outputs": [], + "source": [ + "llm = VertexAIModelGarden(\n", + " project=\"YOUR PROJECT ID\",\n", + " endpoint_id=\"YOUR ENDPOINT ID\"\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -461,7 +439,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(llm(\"What are the greatest questions left to answer in biomedical research?\"))" + "print(llm.invoke(\"What are the greatest questions left to answer in biomedical research?\"))" ] }, { @@ -487,7 +465,7 @@ "metadata": {}, "outputs": [], "source": [ - "llm = VertexAI(model_name=\"code-bison\", max_output_tokens=1000, temperature=0.3)" + "llm = ChatVertexAI(model_name=\"codechat-bison@002\", max_output_tokens=1000, temperature=0.3)" ] }, { @@ -507,7 +485,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(llm(question))" + "print(llm.invoke(question))" ] }, { @@ -527,7 +505,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(llm(question))" + "print(llm.invoke(question))" ] }, { @@ -547,23 +525,29 @@ "metadata": {}, "outputs": [], "source": [ - "print(llm(question))" + "print(llm.invoke(question))" ] }, { "cell_type": "code", "execution_count": null, - "id": "8bc7c91c-0403-4568-9e6b-1d1767e905d3", + "id": "4165b6d6-8293-45b8-aedc-93395c884659", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { + "environment": { + "kernel": "conda-root-py", + "name": "workbench-notebooks.m119", + "type": "gcloud", + "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m119" + }, "kernelspec": { - "display_name": "Python (Local)", + "display_name": "Python 3 (ipykernel) (Local)", "language": "python", - "name": "local-base" + "name": "conda-root-py" }, "language_info": { "codemirror_mode": { @@ -575,7 +559,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.14" } }, "nbformat": 4,