Skip to content

Commit 81b3f4c

Browse files
committed
Fix ExLlama script and update to ExLlamaV2; Fix ChatDocs by switching to GGUF (may need to deprecate ChatDocs soon)
1 parent ca87da0 commit 81b3f4c

File tree

2 files changed

+346
-262
lines changed

2 files changed

+346
-262
lines changed

ChatDocs.ipynb

Lines changed: 160 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,140 +1,166 @@
11
{
2-
"cells": [
3-
{
4-
"cell_type": "code",
5-
"execution_count": 1,
6-
"id": "bc32353a-3506-4319-9051-f1822d455aae",
7-
"metadata": {},
8-
"outputs": [],
9-
"source": [
10-
"!pip install chatdocs auto-gptq xformers ipywidgets --quiet\n",
11-
"\n",
12-
"# note: if you see the message 'CUDA extension not installed' while running chatdocs, try installing auto-gptq from a wheel (see https://github.com/PanQiWei/AutoGPTQ/releases/)\n",
13-
"# !pip install https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.2/auto_gptq-0.3.2+cu118-cp310-cp310-linux_x86_64.whl --quiet"
14-
]
15-
},
16-
{
17-
"cell_type": "code",
18-
"execution_count": 2,
19-
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
20-
"metadata": {},
21-
"outputs": [
2+
"cells": [
223
{
23-
"name": "stdout",
24-
"output_type": "stream",
25-
"text": [
26-
"load INSTRUCTOR_Transformer\n",
27-
"[2023-08-22 11:56:55,183] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
28-
"max_seq_length 512\n",
29-
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n"
30-
]
31-
}
32-
],
33-
"source": [
34-
"with open('chatdocs.yml', 'w') as f:\n",
35-
" f.write('''\n",
36-
"embeddings:\n",
37-
" model: hkunlp/instructor-large\n",
38-
"\n",
39-
"llm: gptq\n",
40-
"\n",
41-
"gptq:\n",
42-
" model: TheBloke/Vigogne-2-13B-Instruct-GPTQ\n",
43-
" model_file: model.safetensors\n",
44-
" device: 0\n",
45-
"\n",
46-
"retriever:\n",
47-
" search_kwargs:\n",
48-
" k: 5\n",
49-
" ''')\n",
50-
"\n",
51-
"!chatdocs download"
52-
]
53-
},
54-
{
55-
"cell_type": "code",
56-
"execution_count": 3,
57-
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
58-
"metadata": {},
59-
"outputs": [
4+
"cell_type": "code",
5+
"source": [
6+
"!pip install langchain==0.0.354 sentence-transformers==2.2.2 chatdocs ipywidgets --quiet"
7+
],
8+
"metadata": {
9+
"id": "3wypY9rCa0iY"
10+
},
11+
"id": "3wypY9rCa0iY",
12+
"execution_count": 1,
13+
"outputs": []
14+
},
6015
{
61-
"name": "stdout",
62-
"output_type": "stream",
63-
"text": [
64-
"Creating new vectorstore\n",
65-
"Loading documents from db\n",
66-
"Loading new documents: 0it [00:00, ?it/s]\n",
67-
"No new documents to load\n"
68-
]
69-
}
70-
],
71-
"source": [
72-
"# note: add files via the file browser upload feature and re-run this cell if needed\n",
73-
"!mkdir db\n",
74-
"!chatdocs add db"
75-
]
76-
},
77-
{
78-
"cell_type": "code",
79-
"execution_count": 4,
80-
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
81-
"metadata": {},
82-
"outputs": [
16+
"cell_type": "code",
17+
"execution_count": 2,
18+
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
19+
"metadata": {
20+
"colab": {
21+
"base_uri": "https://localhost:8080/"
22+
},
23+
"id": "b1062256-93dc-4ba5-be74-17407bebf867",
24+
"outputId": "ddb6b2d4-03e1-4cc1-e8a2-f44965dda4a7"
25+
},
26+
"outputs": [
27+
{
28+
"output_type": "stream",
29+
"name": "stdout",
30+
"text": [
31+
"load INSTRUCTOR_Transformer\n",
32+
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
33+
" return self.fget.__get__(instance, owner)()\n",
34+
"max_seq_length 512\n",
35+
"Fetching 1 files: 100% 1/1 [00:00<00:00, 17623.13it/s]\n",
36+
"Fetching 1 files: 100% 1/1 [00:00<00:00, 3566.59it/s]\n"
37+
]
38+
}
39+
],
40+
"source": [
41+
"with open('chatdocs.yml', 'w') as f:\n",
42+
" f.write('''\n",
43+
"embeddings:\n",
44+
" model: hkunlp/instructor-large\n",
45+
"\n",
46+
"llm: ctransformers\n",
47+
"\n",
48+
"ctransformers:\n",
49+
" model: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF\n",
50+
" model_file: openhermes-2.5-mistral-7b.Q4_K_M.gguf\n",
51+
" model_type: llama\n",
52+
" config:\n",
53+
" gpu_layers: 50\n",
54+
"\n",
55+
"retriever:\n",
56+
" search_kwargs:\n",
57+
" k: 5\n",
58+
" ''')\n",
59+
"\n",
60+
"!chatdocs download"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 3,
66+
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
67+
"metadata": {
68+
"colab": {
69+
"base_uri": "https://localhost:8080/"
70+
},
71+
"id": "4448c264-1894-4884-8e85-0d020fd68d16",
72+
"outputId": "6acd06fe-4f6d-4d8e-d9a9-e52181c77125"
73+
},
74+
"outputs": [
75+
{
76+
"output_type": "stream",
77+
"name": "stdout",
78+
"text": [
79+
"mkdir: cannot create directory ‘db’: File exists\n",
80+
"Creating new vectorstore\n",
81+
"Loading documents from db\n",
82+
"Loading new documents: 0it [00:00, ?it/s]\n",
83+
"No new documents to load\n"
84+
]
85+
}
86+
],
87+
"source": [
88+
"# note: add files via the file browser upload feature and re-run this cell if needed\n",
89+
"!mkdir db\n",
90+
"!chatdocs add db"
91+
]
92+
},
8393
{
84-
"name": "stdout",
85-
"output_type": "stream",
86-
"text": [
87-
"load INSTRUCTOR_Transformer\n",
88-
"[2023-08-22 11:57:08,373] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
89-
"max_seq_length 512\n",
90-
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n",
91-
" * Serving Quart app 'chatdocs.ui'\n",
92-
" * Environment: production\n",
93-
" * Please use an ASGI server (e.g. Hypercorn) directly in production\n",
94-
" * Debug mode: False\n",
95-
" * Running on http://localhost:5000 (CTRL + C to quit)\n",
96-
"[2023-08-22 11:57:13 -0600] [7793] [INFO] Running on http://127.0.0.1:5000 (CTRL + C to quit)\n",
97-
"^C\n"
98-
]
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
97+
"metadata": {
98+
"colab": {
99+
"base_uri": "https://localhost:8080/"
100+
},
101+
"id": "e7833375-5b6b-4ff2-afdd-8f6cf04d6a4d",
102+
"outputId": "e07ba948-569a-4807-e3f6-3b039713a090"
103+
},
104+
"outputs": [
105+
{
106+
"output_type": "stream",
107+
"name": "stdout",
108+
"text": [
109+
"load INSTRUCTOR_Transformer\n",
110+
"/usr/local/lib/python3.10/dist-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
111+
" return self.fget.__get__(instance, owner)()\n",
112+
"max_seq_length 512\n",
113+
"\n",
114+
"Type your query below and press Enter.\n",
115+
"Type \u001b[32m'exit'\u001b[0m or \u001b[32m'quit'\u001b[0m or \u001b[32m'q'\u001b[0m to exit the application.\n",
116+
"\n",
117+
"\u001b[1mQ: \u001b[0m"
118+
]
119+
}
120+
],
121+
"source": [
122+
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
123+
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
124+
"if 'google.colab' in str(get_ipython()):\n",
125+
" !chatdocs chat\n",
126+
"else:\n",
127+
" !chatdocs ui"
128+
]
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": null,
133+
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
134+
"metadata": {
135+
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f"
136+
},
137+
"outputs": [],
138+
"source": []
99139
}
100-
],
101-
"source": [
102-
"# note: Colab allows for entering input directly into a running code cell (Jupyter Lab does not)\n",
103-
"# to use the chat mode elsewhere, run it directly from a terminal; otherwise, try the ui mode\n",
104-
"if 'google.colab' in str(get_ipython()):\n",
105-
" !chatdocs chat\n",
106-
"else:\n",
107-
" !chatdocs ui"
108-
]
109-
},
110-
{
111-
"cell_type": "code",
112-
"execution_count": null,
113-
"id": "ff66f540-2c44-47cb-90ee-04b8debf5e5f",
114-
"metadata": {},
115-
"outputs": [],
116-
"source": []
117-
}
118-
],
119-
"metadata": {
120-
"kernelspec": {
121-
"display_name": "Python 3 (ipykernel)",
122-
"language": "python",
123-
"name": "python3"
140+
],
141+
"metadata": {
142+
"kernelspec": {
143+
"display_name": "Python 3",
144+
"name": "python3"
145+
},
146+
"language_info": {
147+
"codemirror_mode": {
148+
"name": "ipython",
149+
"version": 3
150+
},
151+
"file_extension": ".py",
152+
"mimetype": "text/x-python",
153+
"name": "python",
154+
"nbconvert_exporter": "python",
155+
"pygments_lexer": "ipython3",
156+
"version": "3.10.12"
157+
},
158+
"colab": {
159+
"provenance": [],
160+
"gpuType": "T4"
161+
},
162+
"accelerator": "GPU"
124163
},
125-
"language_info": {
126-
"codemirror_mode": {
127-
"name": "ipython",
128-
"version": 3
129-
},
130-
"file_extension": ".py",
131-
"mimetype": "text/x-python",
132-
"name": "python",
133-
"nbconvert_exporter": "python",
134-
"pygments_lexer": "ipython3",
135-
"version": "3.10.12"
136-
}
137-
},
138-
"nbformat": 4,
139-
"nbformat_minor": 5
140-
}
164+
"nbformat": 4,
165+
"nbformat_minor": 5
166+
}

0 commit comments

Comments
 (0)