forked from llmware-ai/llmware
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gguf_streaming_chatbot.py
76 lines (46 loc) · 2.42 KB
/
gguf_streaming_chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
""" This example shows how to build a local chatbot prototype using llmware and Streamlit. The example shows
how to use several GGUF chat models in the LLMWare catalog, along with using the model.stream method which
provides a real time generator for displaying the bot response in real-time.
This is purposefully super simple script (but surprisingly fun) to provide the core of the recipe.
The Streamlit code below is derived from Streamlit tutorials available at:
https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
If you are new to using Steamlit, to run this example:
1. pip3 install streamlit
2. to run, go to the command line: streamlit run "path/to/gguf_streaming_chatbot.py"
"""
import streamlit as st
from llmware.models import ModelCatalog
from llmware.gguf_configs import GGUFConfigs
GGUFConfigs().set_config("max_output_tokens", 500)
def simple_chat_ui_app (model_name):
st.title(f"Simple Chat with {model_name}")
model = ModelCatalog().load_model(model_name, temperature=0.3, sample=True, max_output=450)
# initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# accept user input
prompt = st.chat_input("Say something")
if prompt:
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
# note that the st.write_stream method consumes a generator - so pass model.stream(prompt) directly
bot_response = st.write_stream(model.stream(prompt))
st.session_state.messages.append({"role": "user", "content": prompt})
st.session_state.messages.append({"role": "assistant", "content": bot_response})
return 0
if __name__ == "__main__":
# a few representative good chat models that can run locally
# note: will take a minute for the first time it is downloaded and cached locally
chat_models = ["phi-3-gguf",
"llama-2-7b-chat-gguf",
"llama-3-instruct-bartowski-gguf",
"openhermes-mistral-7b-gguf",
"zephyr-7b-gguf",
"tiny-llama-chat-gguf"]
model_name = chat_models[0]
simple_chat_ui_app(model_name)