Skip to content

Commit

Permalink
update llama index and langchain, some fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Kav-K committed Oct 21, 2023
1 parent 02c3bb8 commit ee3e7b9
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 110 deletions.
7 changes: 4 additions & 3 deletions cogs/search_service_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
SimpleDirectoryReader,
ServiceContext,
OpenAIEmbedding,
ResponseSynthesizer,
)
from llama_index.response_synthesizers import get_response_synthesizer, ResponseMode
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
Expand Down Expand Up @@ -224,8 +224,8 @@ def get(self, url: str, **kwargs: Any) -> str:
retriever = VectorIndexRetriever(
index=index, similarity_top_k=4, service_context=service_context
)
response_synthesizer = ResponseSynthesizer.from_args(
response_mode="compact",
response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT,
refine_template=CHAT_REFINE_PROMPT,
service_context=service_context,
use_async=True,
Expand Down Expand Up @@ -745,4 +745,5 @@ async def callback(self, interaction: discord.Interaction):
from_followup=FollowupData(message_link, self.children[0].value),
response_mode=self.search_cog.redo_users[self.ctx.user.id].response_mode,
followup_user=interaction.user,
model="gpt-4-32k",
)
2 changes: 1 addition & 1 deletion gpt3discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from models.openai_model import Model


__version__ = "11.8.2"
__version__ = "11.8.3"


PID_FILE = Path("bot.pid")
Expand Down
130 changes: 36 additions & 94 deletions models/index_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
from langchain.llms import OpenAIChat
from langchain.memory import ConversationBufferMemory
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index.node_parser import SimpleNodeParser
from llama_index.schema import NodeRelationship
from llama_index.indices.query.query_transform import StepDecomposeQueryTransform
from llama_index.langchain_helpers.agents import (
IndexToolConfig,
LlamaToolkit,
create_llama_chat_agent,
)
from llama_index.optimization import SentenceEmbeddingOptimizer
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT

from llama_index.readers import YoutubeTranscriptReader
Expand All @@ -55,8 +55,7 @@
LLMPredictor,
ServiceContext,
StorageContext,
ResponseSynthesizer,
load_index_from_storage,
load_index_from_storage, get_response_synthesizer,
)

from llama_index.schema import TextNode
Expand All @@ -77,6 +76,19 @@
RemoteReader = download_loader("RemoteReader")
RemoteDepthReader = download_loader("RemoteDepthReader")

embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
node_parser = SimpleNodeParser.from_defaults(
text_splitter=TokenTextSplitter(chunk_size=256, chunk_overlap=20)
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager, node_parser=node_parser
)


def get_and_query(
user_id,
Expand All @@ -103,11 +115,10 @@ def get_and_query(
index=index, similarity_top_k=nodes, service_context=service_context
)

response_synthesizer = ResponseSynthesizer.from_args(
response_synthesizer = get_response_synthesizer(
response_mode=response_mode,
use_async=True,
refine_template=CHAT_REFINE_PROMPT,
optimizer=SentenceEmbeddingOptimizer(threshold_cutoff=0.7),
service_context=service_context,
)

Expand Down Expand Up @@ -211,6 +222,18 @@ def reset_indexes(self, user_id):


class Index_handler:
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
node_parser = SimpleNodeParser.from_defaults(
text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager, node_parser=node_parser
)
def __init__(self, bot, usage_service):
self.bot = bot
self.openai_key = os.getenv("OPENAI_TOKEN")
Expand Down Expand Up @@ -467,7 +490,7 @@ async def index_webpage(self, url, service_context) -> GPTVectorStoreIndex:
index = await self.loop.run_in_executor(
None,
functools.partial(
GPTVectorStoreIndex,
GPTVectorStoreIndex.from_documents,
documents=documents,
service_context=service_context,
use_async=True,
Expand All @@ -476,6 +499,7 @@ async def index_webpage(self, url, service_context) -> GPTVectorStoreIndex:

return index
except:
traceback.print_exc()
raise ValueError("Could not load webpage")

documents = BeautifulSoupWebReader(
Expand Down Expand Up @@ -566,17 +590,6 @@ async def set_file_index(
suffix=suffix, dir=temp_path, delete=False
) as temp_file:
await file.save(temp_file.name)
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model(
"text-davinci-003"
).encode,
verbose=False,
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)
index = await self.loop.run_in_executor(
None,
partial(
Expand Down Expand Up @@ -620,15 +633,6 @@ async def set_link_index_recurse(

response = await ctx.respond(embed=EmbedStatics.build_index_progress_embed())
try:
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)

# Pre-emptively connect and get the content-type of the response
try:
Expand Down Expand Up @@ -719,15 +723,6 @@ async def set_link_index(

response = await ctx.respond(embed=EmbedStatics.build_index_progress_embed())
try:
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)

# Pre-emptively connect and get the content-type of the response
try:
Expand Down Expand Up @@ -820,15 +815,6 @@ async def set_discord_index(
document = await self.load_data(
channel_ids=[channel.id], limit=message_limit, oldest_first=False
)
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)
index = await self.loop.run_in_executor(
None, partial(self.index_discord, document, service_context)
)
Expand Down Expand Up @@ -931,7 +917,7 @@ async def compose_indexes(self, user_id, indexes, name, deep_compose):

embedding_model = OpenAIEmbedding()

llm_predictor_mock = MockLLMPredictor(4096)
llm_predictor_mock = MockLLMPredictor()
embedding_model_mock = MockEmbedding(1536)

token_counter_mock = TokenCountingHandler(
Expand Down Expand Up @@ -968,34 +954,21 @@ async def compose_indexes(self, user_id, indexes, name, deep_compose):
"Doing this deep search would be prohibitively expensive. Please try a narrower search scope."
)

token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
verbose=False,
)

callback_manager = CallbackManager([token_counter])

service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_model,
callback_manager=callback_manager,
)

tree_index = await self.loop.run_in_executor(
None,
partial(
GPTTreeIndex.from_documents,
documents=documents,
service_context=service_context,
service_context=self.service_context,
use_async=True,
),
)

await self.usage_service.update_usage(
token_counter.total_llm_token_count, "turbo"
self.token_counter.total_llm_token_count, "turbo"
)
await self.usage_service.update_usage(
token_counter.total_embedding_token_count, "embedding"
self.token_counter.total_embedding_token_count, "embedding"
)

# Now we have a list of tree indexes, we can compose them
Expand All @@ -1015,19 +988,6 @@ async def compose_indexes(self, user_id, indexes, name, deep_compose):
for _index in index_objects:
documents.extend(await self.index_to_docs(_index))

embedding_model = OpenAIEmbedding()

token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
verbose=False,
)

callback_manager = CallbackManager([token_counter])

service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)

simple_index = await self.loop.run_in_executor(
None,
partial(
Expand Down Expand Up @@ -1076,15 +1036,7 @@ async def backup_discord(
document = await self.load_data(
channel_ids=channel_ids, limit=message_limit, oldest_first=False
)
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode,
verbose=False,
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
embed_model=embedding_model, callback_manager=callback_manager
)

index = await self.loop.run_in_executor(
None, partial(self.index_discord, document, service_context)
)
Expand Down Expand Up @@ -1137,17 +1089,6 @@ async def query(
)

try:
embedding_model = OpenAIEmbedding()
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model(model).encode, verbose=False
)

callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_model,
callback_manager=callback_manager,
)

token_counter.reset_counts()
response = await self.loop.run_in_executor(
Expand Down Expand Up @@ -1450,6 +1391,7 @@ async def interaction_check(self, interaction: discord.Interaction) -> bool:
)
return False
except Exception as e:
traceback.print_exc()
await interaction.followup.send(
embed=EmbedStatics.get_index_compose_failure_embed(
"An error occurred while composing the indexes: " + str(e)
Expand Down
7 changes: 2 additions & 5 deletions models/search_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
OpenAIEmbedding,
SimpleDirectoryReader,
MockEmbedding,
ServiceContext,
ResponseSynthesizer,
ServiceContext, get_response_synthesizer,
)
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index.composability import QASummaryQueryEngineBuilder
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine, MultiStepQueryEngine
from llama_index.indices.query.query_transform import StepDecomposeQueryTransform
from llama_index.optimization import SentenceEmbeddingOptimizer
from llama_index.prompts.chat_prompts import CHAT_REFINE_PROMPT
from llama_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from langchain import OpenAI
Expand Down Expand Up @@ -439,12 +437,11 @@ async def search(
similarity_top_k=nodes or DEFAULT_SEARCH_NODES,
)

response_synthesizer = ResponseSynthesizer.from_args(
response_synthesizer = get_response_synthesizer(
response_mode=response_mode,
use_async=True,
refine_template=CHAT_REFINE_PROMPT,
text_qa_template=self.qaprompt,
optimizer=SentenceEmbeddingOptimizer(threshold_cutoff=0.7),
service_context=service_context,
)

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ dependencies = [
"backoff==2.2.1",
"flask==2.2.3",
"werkzeug==2.2.2",
"llama-index==0.6.38",
"llama-index==0.8.48",
"pypdf==3.11.1",
"youtube_transcript_api==0.5.0",
"sentencepiece==0.1.99",
"protobuf==3.20.2",
"python-pptx==0.6.21",
"langchain==0.0.268",
"langchain==0.0.319",
"unidecode==1.3.6",
"tqdm==4.64.1",
"docx2txt==0.8",
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
werkzeug==2.2.2
llama-index==0.6.38
llama-index==0.8.48
pypdf==3.11.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.99
protobuf==3.20.2
python-pptx==0.6.21
sentence-transformers==2.2.2
langchain==0.0.268
langchain==0.0.319
openai-whisper
unidecode==1.3.6
tqdm==4.64.1
Expand Down
4 changes: 2 additions & 2 deletions requirements_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ sqlitedict==2.1.0
backoff==2.2.1
flask==2.2.3
werkzeug==2.2.2
llama-index==0.6.38
llama-index==0.8.48
pypdf==3.11.1
youtube_transcript_api==0.5.0
sentencepiece==0.1.99
protobuf==3.20.2
python-pptx==0.6.21
langchain==0.0.268
langchain==0.0.319
unidecode==1.3.6
tqdm==4.64.1
docx2txt==0.8
Expand Down
5 changes: 4 additions & 1 deletion services/usage_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ async def get_cost_name(model) -> str:
return UsageService.MODEL_COST_MAP.get(model, "davinci")

async def get_price(self, tokens_used, mode: ModeType = None):
tokens_used = int(tokens_used)
if isinstance(tokens_used, str) or isinstance(tokens_used, int):
tokens_used = int(tokens_used)
else:
tokens_used = int(len(tokens_used))
price = (tokens_used / 1000) * await self.get_model_cost(
mode
) # This is a very rough estimate
Expand Down

0 comments on commit ee3e7b9

Please sign in to comment.