-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix ExLlama script and update to ExLlamaV2; Fix ChatDocs by switching…
… to GGUF (may need to deprecate ChatDocs soon)
- Loading branch information
Showing
2 changed files
with
346 additions
and
262 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,140 +1,166 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "bc32353a-3506-4319-9051-f1822d455aae", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!pip install chatdocs auto-gptq xformers ipywidgets --quiet\n", | ||
"\n", | ||
"# note: if you see the message 'CUDA extension not installed' while running chatdocs, try installing auto-gptq from a wheel (see https://github.com/PanQiWei/AutoGPTQ/releases/)\n", | ||
"# !pip install https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.2/auto_gptq-0.3.2+cu118-cp310-cp310-linux_x86_64.whl --quiet" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "b1062256-93dc-4ba5-be74-17407bebf867", | ||
"metadata": {}, | ||
"outputs": [ | ||
"cells": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"load INSTRUCTOR_Transformer\n", | ||
"[2023-08-22 11:56:55,183] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", | ||
"max_seq_length 512\n", | ||
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"with open('chatdocs.yml', 'w') as f:\n", | ||
" f.write('''\n", | ||
"embeddings:\n", | ||
" model: hkunlp/instructor-large\n", | ||
"\n", | ||
"llm: gptq\n", | ||
"\n", | ||
"gptq:\n", | ||
" model: TheBloke/Vigogne-2-13B-Instruct-GPTQ\n", | ||
" model_file: model.safetensors\n", | ||
" device: 0\n", | ||
"\n", | ||
"retriever:\n", | ||
" search_kwargs:\n", | ||
" k: 5\n", | ||
" ''')\n", | ||
"\n", | ||
"!chatdocs download" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "4448c264-1894-4884-8e85-0d020fd68d16", | ||
"metadata": {}, | ||
"outputs": [ | ||
"cell_type": "code", | ||
"source": [ | ||
"!pip install langchain==0.0.354 sentence-transformers==2.2.2 chatdocs ipywidgets --quiet" | ||
], | ||
"metadata": { | ||
"id": "3wypY9rCa0iY" | ||
}, | ||
"id": "3wypY9rCa0iY", | ||
"execution_count": 1, | ||
"outputs": [] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Creating new vectorstore\n", | ||
"Loading documents from db\n", | ||
"Loading new documents: 0it [00:00, ?it/s]\n", | ||
"No new documents to load\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# note: add files via the file browser upload feature and re-run this cell if needed\n", | ||
"!mkdir db\n", | ||
"!chatdocs add db" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d", | ||
"metadata": {}, | ||
"outputs": [ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "b1062256-93dc-4ba5-be74-17407bebf867", | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "b1062256-93dc-4ba5-be74-17407bebf867", | ||
"outputId": "ddb6b2d4-03e1-4cc1-e8a2-f44965dda4a7" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"load INSTRUCTOR_Transformer\n", | ||
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", | ||
" return self.fget.__get__(instance, owner)()\n", | ||
"max_seq_length 512\n", | ||
"Fetching 1 files: 100% 1/1 [00:00<00:00, 17623.13it/s]\n", | ||
"Fetching 1 files: 100% 1/1 [00:00<00:00, 3566.59it/s]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"with open('chatdocs.yml', 'w') as f:\n", | ||
" f.write('''\n", | ||
"embeddings:\n", | ||
" model: hkunlp/instructor-large\n", | ||
"\n", | ||
"llm: ctransformers\n", | ||
"\n", | ||
"ctransformers:\n", | ||
" model: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF\n", | ||
" model_file: openhermes-2.5-mistral-7b.Q4_K_M.gguf\n", | ||
" model_type: llama\n", | ||
" config:\n", | ||
" gpu_layers: 50\n", | ||
"\n", | ||
"retriever:\n", | ||
" search_kwargs:\n", | ||
" k: 5\n", | ||
" ''')\n", | ||
"\n", | ||
"!chatdocs download" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "4448c264-1894-4884-8e85-0d020fd68d16", | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "4448c264-1894-4884-8e85-0d020fd68d16", | ||
"outputId": "6acd06fe-4f6d-4d8e-d9a9-e52181c77125" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"mkdir: cannot create directory ‘db’: File exists\n", | ||
"Creating new vectorstore\n", | ||
"Loading documents from db\n", | ||
"Loading new documents: 0it [00:00, ?it/s]\n", | ||
"No new documents to load\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# note: add files via the file browser upload feature and re-run this cell if needed\n", | ||
"!mkdir db\n", | ||
"!chatdocs add db" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"load INSTRUCTOR_Transformer\n", | ||
"[2023-08-22 11:57:08,373] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", | ||
"max_seq_length 512\n", | ||
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n", | ||
" * Serving Quart app 'chatdocs.ui'\n", | ||
" * Environment: production\n", | ||
" * Please use an ASGI server (e.g. Hypercorn) directly in production\n", | ||
" * Debug mode: False\n", | ||
" * Running on http://localhost:5000 (CTRL + C to quit)\n", | ||
"[2023-08-22 11:57:13 -0600] [7793] [INFO] Running on http://127.0.0.1:5000 (CTRL + C to quit)\n", | ||
"^C\n" | ||
] | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d", | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d", | ||
"outputId": "e07ba948-569a-4807-e3f6-3b039713a090" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"load INSTRUCTOR_Transformer\n", | ||
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n", | ||
" return self.fget.__get__(instance, owner)()\n", | ||
"max_seq_length 512\n", | ||
"\n", | ||
"Type your query below and press Enter.\n", | ||
"Type \u001b[32m'exit'\u001b[0m or \u001b[32m'quit'\u001b[0m or \u001b[32m'q'\u001b[0m to exit the application.\n", | ||
"\n", | ||
"\u001b[1mQ: \u001b[0m" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n", | ||
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n", | ||
"if 'google.colab' in str(get_ipython()):\n", | ||
" !chatdocs chat\n", | ||
"else:\n", | ||
" !chatdocs ui" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f", | ||
"metadata": { | ||
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f" | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"source": [ | ||
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n", | ||
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n", | ||
"if 'google.colab' in str(get_ipython()):\n", | ||
" !chatdocs chat\n", | ||
"else:\n", | ||
" !chatdocs ui" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
}, | ||
"colab": { | ||
"provenance": [], | ||
"gpuType": "T4" | ||
}, | ||
"accelerator": "GPU" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.