Skip to content

Commit

Permalink
Merge pull request #163 from nulib/prototype-streaming
Browse files Browse the repository at this point in the history
Add websocket-based streaming chat to prototype
  • Loading branch information
bmquinn authored Aug 31, 2023
2 parents 72ac444 + 069a01b commit 5c44515
Show file tree
Hide file tree
Showing 14 changed files with 640 additions and 58 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ $RECYCLE.BIN/

.vscode
/samconfig.toml
/samconfig.yaml
/env.json
/env.*.json
/*.parameters
Expand Down
Empty file added chat/src/__init__.py
Empty file.
117 changes: 117 additions & 0 deletions chat/src/handlers/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import boto3
import json
import os
import setup
from helpers.apitoken import ApiToken
from helpers.prompts import document_template, prompt_template
from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from openai.error import InvalidRequestError

DEFAULT_INDEX = "Work"
DEFAULT_KEY = "title"
DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator,"
"date_created,description,genre,language,library_unit,"
"location,physical_description_material,physical_description_size,"
"published,rights_statement,scope_and_contents,series,source,"
"style_period,subject,table_of_contents,technique,visibility,"
"work_type")

class Websocket:
def __init__(self, endpoint_url, connection_id, ref):
self.client = boto3.client('apigatewaymanagementapi', endpoint_url=endpoint_url)
self.connection_id = connection_id
self.ref = ref

def send(self, data):
data['ref'] = self.ref
data_as_bytes = bytes(json.dumps(data), 'utf-8')
self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id)

class StreamingSocketCallbackHandler(BaseCallbackHandler):
def __init__(self, socket: Websocket):
self.socket = socket

def on_llm_new_token(self, token: str, **kwargs):
self.socket.send({'token': token});

def handler(event, context):
try:
payload = json.loads(event.get('body', '{}'))

request_context = event.get('requestContext', {})
connection_id = request_context.get('connectionId')
endpoint_url = f'https://{request_context.get("domainName")}/{request_context.get("stage")}'
ref = payload.get('ref')
socket = Websocket(connection_id=connection_id, endpoint_url=endpoint_url, ref=ref)


api_token = ApiToken(signed_token=payload.get("auth"))
if not api_token.is_logged_in():
socket.send({ "statusCode": 401, "body": "Unauthorized" })
return {
"statusCode": 401,
"body": "Unauthorized"
}

question = payload.get("question")
index_name = payload.get("index", DEFAULT_INDEX)
text_key = payload.get("text_key", DEFAULT_KEY)
attributes = [
item for item
in set(payload.get("attributes", DEFAULT_ATTRIBUTES).split(","))
if item not in [text_key, "source"]
]

weaviate = setup.weaviate_vector_store(index_name=index_name,
text_key=text_key,
attributes=attributes + ["source"])

client = setup.openai_chat_client(callbacks=[StreamingSocketCallbackHandler(socket)], streaming=True)

prompt = PromptTemplate(
template=prompt_template(),
input_variables=["question", "context"]
)

document_prompt = PromptTemplate(
template=document_template(attributes),
input_variables=["page_content", "source"] + attributes,
)

docs = weaviate.similarity_search(question, k=10, additional="certainty")
chain = load_qa_with_sources_chain(
client,
chain_type="stuff",
prompt=prompt,
document_prompt=document_prompt,
document_variable_name="context",
verbose=to_bool(os.getenv("VERBOSE"))
)

try:
doc_response = [doc.__dict__ for doc in docs]
socket.send({"question": question, "source_documents": doc_response})
response = chain({"question": question, "input_documents": docs})
response = {
"answer": response["output_text"],
}
socket.send(response)
except InvalidRequestError as err:
response = {
"question": question,
"answer": str(err),
"source_documents": []
}
socket.send(response)

return {'statusCode': 200}
except Exception as err:
print(event)
raise err

def to_bool(val):
if isinstance(val, str):
return val.lower() not in ["", "no", "false", "0"]
return bool(val)
28 changes: 28 additions & 0 deletions chat/src/helpers/apitoken.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from datetime import datetime
import jwt
import os

class ApiToken:
@classmethod
def empty_token(cls):
time = int(datetime.now().timestamp())
return {
'iss': os.getenv('DC_API_ENDPOINT'),
'exp': datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours
'iat': time,
'entitlements': [],
'isLoggedIn': False,
}

def __init__(self, signed_token=None):
if signed_token is None:
self.token = ApiToken.empty_token()
else:
try:
secret = os.getenv("API_TOKEN_SECRET")
self.token = jwt.decode(signed_token, secret, algorithms=["HS256"])
except Exception:
self.token = ApiToken.empty_token()

def is_logged_in(self):
return self.token.get("isLoggedIn", False)
153 changes: 153 additions & 0 deletions chat/src/helpers/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# ruff: noqa: E501
def prompt_template():
return """Using all of the provided source documents, create a helpful and thorough answer to the supplied question.
If you don't know the answer, just say that you don't know. Don't try to make up an answer, but you should use the documents provided in order to ground your response.
It may be helpful to explain why a provided document does not pertain to the query as well.
Feel free to reference various aspects of the sources in your explanation, but please don't include the full sources in the answer.
The Content field represents the title of each document, and the Metadata fields are the attributes. The Source field is the unique identifier for each document.
'certainty' is an opinionated measure of the distance between the query vector and the document embedding vector. Certainty always returns a number between 0 and 1, with 1 indicating identical vectors and 0 indicating opposing angles.
Content: Purchase order and note
Metadata:
_additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}}
alternate_title: None
collection: Berkeley Folk Music Festival
contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival']
creator: None
date_created: ['October 7, 1970', '1970?']
description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"']
genre: ['notes (documents)', 'purchase orders']
language: ['English']
library_unit: Charles Deering McCormick Library of Special Collections
location: None
physical_description_material: None
physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)']
published: True
rights_statement: In Copyright
scope_and_contents: None
series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity']
source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88
style_period: None
subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)']
table_of_contents: None
technique: None
visibility: Public
work_type: Image
Source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88
Content: Berkeley Folk Music Festival, 1966 June 26-30
Metadata:
_additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}}
alternate_title: None
collection: Berkeley Folk Music Festival
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students']
creator: None
date_created: ['1966']
description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.']
genre: ['posters']
language: ['English']
library_unit: Charles Deering McCormick Library of Special Collections
location: None
physical_description_material: None
physical_description_size: ['12.75 inches (height) x 12.75 inches (width)']
published: True
rights_statement: In Copyright
scope_and_contents: None
series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters']
source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3
style_period: None
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009']
table_of_contents: None
technique: None
visibility: Public
work_type: Image
Source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3
Content: Berkeley Folk Music Festival, 1966 June 26-30
Metadata:
_additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}}
alternate_title: None
collection: Berkeley Folk Music Festival
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students']
creator: None
date_created: ['1966']
description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.']
genre: ['posters']
language: ['English']
library_unit: Charles Deering McCormick Library of Special Collections
location: None
physical_description_material: None
physical_description_size: ['13.75 inches (height) x 21.75 inches (width)']
published: True
rights_statement: In Copyright
scope_and_contents: None
series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals']
source: bddeb375-762b-45e3-9e4e-5a4084ac5955
style_period: None
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival']
table_of_contents: None
technique: None
visibility: Public
work_type: Image
Source: bddeb375-762b-45e3-9e4e-5a4084ac5955
Content: Berkeley Folk Music Festival, 1966 June 30-July 4
Metadata:
_additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}}
alternate_title: None
collection: Berkeley Folk Music Festival
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students']
creator: None
date_created: ['1966']
description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.']
genre: ['posters']
language: ['English']
library_unit: Charles Deering McCormick Library of Special Collections
location: None
physical_description_material: None
physical_description_size: ['24.25 inches (height) x 37.5 inches (width)']
published: True
rights_statement: In Copyright
scope_and_contents: None
series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters']
source: aab0bb76-ab02-429a-843a-5be56e31ba67
style_period: None
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival']
table_of_contents: None
technique: None
visibility: Public
work_type: Image
Source: aab0bb76-ab02-429a-843a-5be56e31ba67
QUESTION: Which musicians played at the Berkeley Folk Music Festival?
HELPFUL ANSWER: For the 1966 Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, the following musicians and groups were listed as performers:
Pete Seeger
Jefferson Airplane
Sam Hinton
Greenbriar Boys
Shlomo Carlebach
John Fahey
Los Halcones de Salitrillos
Charley Marshall
Phil Ochs
Ralph J. Gleason
Malvina Reynolds
Robert Pete Williams
Alice Stuart Thomas
Bess Lomax Hawes
Charles Seeger
Unfortunately, the documents provided do not include information about musicians who performed at the Berkeley Folk Music Festival in other years during the 1960s or 1970s. Therefore, I can only confirm the musicians for the 1966 festival.
{context}
QUESTION: {question}
=========
HELPFUL ANSWER:"""

def document_template(attributes):
lines = (["Content: {page_content}", "Metadata:"] +
[f" {attribute}: {{{attribute}}}" for attribute in attributes] +
["Source: {source}"])
return "\n".join(lines)
9 changes: 9 additions & 0 deletions chat/src/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
langchain~=0.0.208
nbformat~=5.9.0
openai~=0.27.8
pandas~=2.0.2
pyjwt~=2.6.0
python-dotenv~=1.0.0
tiktoken~=0.4.0
weaviate-client~=3.19.2
wheel~=0.40.0
35 changes: 35 additions & 0 deletions chat/src/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from langchain.chat_models import AzureChatOpenAI
from langchain.vectorstores import Weaviate
from typing import List
import os
import weaviate

def openai_chat_client(**kwargs):
deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID")
key = os.getenv("AZURE_OPENAI_API_KEY")
resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME")
version = "2023-07-01-preview"

return AzureChatOpenAI(deployment_name=deployment,
openai_api_key=key,
openai_api_base=f"https://{resource}.openai.azure.com/",
openai_api_version=version,
**kwargs)



def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []):
weaviate_url = os.environ['WEAVIATE_URL']
weaviate_api_key = os.environ['WEAVIATE_API_KEY']
# openai_api_key = os.environ['AZURE_OPENAI_API_KEY']

auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key)

client = weaviate.Client(
url=weaviate_url,
auth_client_secret=auth_config
)
return Weaviate(client=client,
index_name=index_name,
text_key=text_key,
attributes=attributes)
Loading

0 comments on commit 5c44515

Please sign in to comment.