Skip to content

Commit

Permalink
Fix ExLlama script and update to ExLlamaV2; Fix ChatDocs by switching…
Browse files Browse the repository at this point in the history
… to GGUF (may need to deprecate ChatDocs soon)
  • Loading branch information
davedgd committed Feb 22, 2024
1 parent ca87da0 commit 81b3f4c
Show file tree
Hide file tree
Showing 2 changed files with 346 additions and 262 deletions.
294 changes: 160 additions & 134 deletions ChatDocs.ipynb
Original file line number Diff line number Diff line change
@@ -1,140 +1,166 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bc32353a-3506-4319-9051-f1822d455aae",
"metadata": {},
"outputs": [],
"source": [
"!pip install chatdocs auto-gptq xformers ipywidgets --quiet\n",
"\n",
"# note: if you see the message 'CUDA extension not installed' while running chatdocs, try installing auto-gptq from a wheel (see https://github.com/PanQiWei/AutoGPTQ/releases/)\n",
"# !pip install https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.2/auto_gptq-0.3.2+cu118-cp310-cp310-linux_x86_64.whl --quiet"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
"metadata": {},
"outputs": [
"cells": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"load INSTRUCTOR_Transformer\n",
"[2023-08-22 11:56:55,183] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
"max_seq_length 512\n",
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n"
]
}
],
"source": [
"with open('chatdocs.yml', 'w') as f:\n",
" f.write('''\n",
"embeddings:\n",
" model: hkunlp/instructor-large\n",
"\n",
"llm: gptq\n",
"\n",
"gptq:\n",
" model: TheBloke/Vigogne-2-13B-Instruct-GPTQ\n",
" model_file: model.safetensors\n",
" device: 0\n",
"\n",
"retriever:\n",
" search_kwargs:\n",
" k: 5\n",
" ''')\n",
"\n",
"!chatdocs download"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
"metadata": {},
"outputs": [
"cell_type": "code",
"source": [
"!pip install langchain==0.0.354 sentence-transformers==2.2.2 chatdocs ipywidgets --quiet"
],
"metadata": {
"id": "3wypY9rCa0iY"
},
"id": "3wypY9rCa0iY",
"execution_count": 1,
"outputs": []
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Creating new vectorstore\n",
"Loading documents from db\n",
"Loading new documents: 0it [00:00, ?it/s]\n",
"No new documents to load\n"
]
}
],
"source": [
"# note: add files via the file browser upload feature and re-run this cell if needed\n",
"!mkdir db\n",
"!chatdocs add db"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
"metadata": {},
"outputs": [
"cell_type": "code",
"execution_count": 2,
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
"outputId": "ddb6b2d4-03e1-4cc1-e8a2-f44965dda4a7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"load INSTRUCTOR_Transformer\n",
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
" return self.fget.__get__(instance, owner)()\n",
"max_seq_length 512\n",
"Fetching 1 files: 100% 1/1 [00:00<00:00, 17623.13it/s]\n",
"Fetching 1 files: 100% 1/1 [00:00<00:00, 3566.59it/s]\n"
]
}
],
"source": [
"with open('chatdocs.yml', 'w') as f:\n",
" f.write('''\n",
"embeddings:\n",
" model: hkunlp/instructor-large\n",
"\n",
"llm: ctransformers\n",
"\n",
"ctransformers:\n",
" model: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF\n",
" model_file: openhermes-2.5-mistral-7b.Q4_K_M.gguf\n",
" model_type: llama\n",
" config:\n",
" gpu_layers: 50\n",
"\n",
"retriever:\n",
" search_kwargs:\n",
" k: 5\n",
" ''')\n",
"\n",
"!chatdocs download"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
"outputId": "6acd06fe-4f6d-4d8e-d9a9-e52181c77125"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"mkdir: cannot create directory ‘db’: File exists\n",
"Creating new vectorstore\n",
"Loading documents from db\n",
"Loading new documents: 0it [00:00, ?it/s]\n",
"No new documents to load\n"
]
}
],
"source": [
"# note: add files via the file browser upload feature and re-run this cell if needed\n",
"!mkdir db\n",
"!chatdocs add db"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"load INSTRUCTOR_Transformer\n",
"[2023-08-22 11:57:08,373] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
"max_seq_length 512\n",
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n",
" * Serving Quart app 'chatdocs.ui'\n",
" * Environment: production\n",
" * Please use an ASGI server (e.g. Hypercorn) directly in production\n",
" * Debug mode: False\n",
" * Running on http://localhost:5000 (CTRL + C to quit)\n",
"[2023-08-22 11:57:13 -0600] [7793] [INFO] Running on http://127.0.0.1:5000 (CTRL + C to quit)\n",
"^C\n"
]
"cell_type": "code",
"execution_count": null,
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
"outputId": "e07ba948-569a-4807-e3f6-3b039713a090"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"load INSTRUCTOR_Transformer\n",
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
" return self.fget.__get__(instance, owner)()\n",
"max_seq_length 512\n",
"\n",
"Type your query below and press Enter.\n",
"Type \u001b[32m'exit'\u001b[0m or \u001b[32m'quit'\u001b[0m or \u001b[32m'q'\u001b[0m to exit the application.\n",
"\n",
"\u001b[1mQ: \u001b[0m"
]
}
],
"source": [
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
"if 'google.colab' in str(get_ipython()):\n",
" !chatdocs chat\n",
"else:\n",
" !chatdocs ui"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
"metadata": {
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f"
},
"outputs": [],
"source": []
}
],
"source": [
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
"if 'google.colab' in str(get_ipython()):\n",
" !chatdocs chat\n",
"else:\n",
" !chatdocs ui"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"colab": {
"provenance": [],
"gpuType": "T4"
},
"accelerator": "GPU"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 81b3f4c

Please sign in to comment.