davedgd
diff --git a/‎ChatDocs.ipynb
Lines changed: 160 additions & 134 deletions b/‎ChatDocs.ipynb
Lines changed: 160 additions & 134 deletions
@@ -1,140 +1,166 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "bc32353a-3506-4319-9051-f1822d455aae",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install chatdocs auto-gptq xformers ipywidgets --quiet\n",
-    "\n",
-    "# note: if you see the message 'CUDA extension not installed' while running chatdocs, try installing auto-gptq from a wheel (see https://github.com/PanQiWei/AutoGPTQ/releases/)\n",
-    "# !pip install https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.2/auto_gptq-0.3.2+cu118-cp310-cp310-linux_x86_64.whl --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "b1062256-93dc-4ba5-be74-17407bebf867",
-   "metadata": {},
-   "outputs": [
+  "cells": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "load INSTRUCTOR_Transformer\n",
-      "[2023-08-22 11:56:55,183] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
-      "max_seq_length  512\n",
-      "skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n"
-     ]
-    }
-   ],
-   "source": [
-    "with open('chatdocs.yml', 'w') as f:\n",
-    "  f.write('''\n",
-    "embeddings:\n",
-    "  model: hkunlp/instructor-large\n",
-    "\n",
-    "llm: gptq\n",
-    "\n",
-    "gptq:\n",
-    "  model: TheBloke/Vigogne-2-13B-Instruct-GPTQ\n",
-    "  model_file: model.safetensors\n",
-    "  device: 0\n",
-    "\n",
-    "retriever:\n",
-    "  search_kwargs:\n",
-    "    k: 5\n",
-    "  ''')\n",
-    "\n",
-    "!chatdocs download"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "4448c264-1894-4884-8e85-0d020fd68d16",
-   "metadata": {},
-   "outputs": [
+      "cell_type": "code",
+      "source": [
+        "!pip install langchain==0.0.354 sentence-transformers==2.2.2 chatdocs ipywidgets --quiet"
+      ],
+      "metadata": {
+        "id": "3wypY9rCa0iY"
+      },
+      "id": "3wypY9rCa0iY",
+      "execution_count": 1,
+      "outputs": []
+    },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Creating new vectorstore\n",
-      "Loading documents from db\n",
-      "Loading new documents: 0it [00:00, ?it/s]\n",
-      "No new documents to load\n"
-     ]
-    }
-   ],
-   "source": [
-    "# note: add files via the file browser upload feature and re-run this cell if needed\n",
-    "!mkdir db\n",
-    "!chatdocs add db"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
-   "metadata": {},
-   "outputs": [
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "b1062256-93dc-4ba5-be74-17407bebf867",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "b1062256-93dc-4ba5-be74-17407bebf867",
+        "outputId": "ddb6b2d4-03e1-4cc1-e8a2-f44965dda4a7"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "load INSTRUCTOR_Transformer\n",
+            "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+            "  return self.fget.__get__(instance, owner)()\n",
+            "max_seq_length  512\n",
+            "Fetching 1 files: 100% 1/1 [00:00<00:00, 17623.13it/s]\n",
+            "Fetching 1 files: 100% 1/1 [00:00<00:00, 3566.59it/s]\n"
+          ]
+        }
+      ],
+      "source": [
+        "with open('chatdocs.yml', 'w') as f:\n",
+        "  f.write('''\n",
+        "embeddings:\n",
+        "  model: hkunlp/instructor-large\n",
+        "\n",
+        "llm: ctransformers\n",
+        "\n",
+        "ctransformers:\n",
+        "  model: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF\n",
+        "  model_file: openhermes-2.5-mistral-7b.Q4_K_M.gguf\n",
+        "  model_type: llama\n",
+        "  config:\n",
+        "    gpu_layers: 50\n",
+        "\n",
+        "retriever:\n",
+        "  search_kwargs:\n",
+        "    k: 5\n",
+        "  ''')\n",
+        "\n",
+        "!chatdocs download"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "4448c264-1894-4884-8e85-0d020fd68d16",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4448c264-1894-4884-8e85-0d020fd68d16",
+        "outputId": "6acd06fe-4f6d-4d8e-d9a9-e52181c77125"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "mkdir: cannot create directory ‘db’: File exists\n",
+            "Creating new vectorstore\n",
+            "Loading documents from db\n",
+            "Loading new documents: 0it [00:00, ?it/s]\n",
+            "No new documents to load\n"
+          ]
+        }
+      ],
+      "source": [
+        "# note: add files via the file browser upload feature and re-run this cell if needed\n",
+        "!mkdir db\n",
+        "!chatdocs add db"
+      ]
+    },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "load INSTRUCTOR_Transformer\n",
-      "[2023-08-22 11:57:08,373] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
-      "max_seq_length  512\n",
-      "skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n",
-      " * Serving Quart app 'chatdocs.ui'\n",
-      " * Environment: production\n",
-      " * Please use an ASGI server (e.g. Hypercorn) directly in production\n",
-      " * Debug mode: False\n",
-      " * Running on http://localhost:5000 (CTRL + C to quit)\n",
-      "[2023-08-22 11:57:13 -0600] [7793] [INFO] Running on http://127.0.0.1:5000 (CTRL + C to quit)\n",
-      "^C\n"
-     ]
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
+        "outputId": "e07ba948-569a-4807-e3f6-3b039713a090"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "load INSTRUCTOR_Transformer\n",
+            "/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+            "  return self.fget.__get__(instance, owner)()\n",
+            "max_seq_length  512\n",
+            "\n",
+            "Type your query below and press Enter.\n",
+            "Type \u001b[32m'exit'\u001b[0m or \u001b[32m'quit'\u001b[0m or \u001b[32m'q'\u001b[0m to exit the application.\n",
+            "\n",
+            "\u001b[1mQ: \u001b[0m"
+          ]
+        }
+      ],
+      "source": [
+        "# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
+        "# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
+        "if 'google.colab' in str(get_ipython()):\n",
+        "    !chatdocs chat\n",
+        "else:\n",
+        "    !chatdocs ui"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
+      "metadata": {
+        "id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f"
+      },
+      "outputs": [],
+      "source": []
     }
-   ],
-   "source": [
-    "# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
-    "# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
-    "if 'google.colab' in str(get_ipython()):\n",
-    "    !chatdocs chat\n",
-    "else:\n",
-    "    !chatdocs ui"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.12"
+    },
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "accelerator": "GPU"
   },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
+  "nbformat": 4,
+  "nbformat_minor": 5
+}