Skip to content

Commit 91d637a

Browse files
committed
Upstream changes for 0.4.0 release (#53)
0.4.0 release
1 parent 27d5c7c commit 91d637a

File tree

84 files changed

+9645
-322
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+9645
-322
lines changed

CHANGELOG.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,28 @@ All notable changes to this project will be documented in this file.
33

44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## [0.4.0] - 2024-02-22
7+
8+
### Added
9+
10+
- [New dedicated notebooks](./docs/rag/jupyter_server.md) showcasing usage of cloud based Nvidia AI Playground based models using Langchain connectors as well as local model deployment using Huggingface.
11+
- Upgraded milvus container version to enable GPU accelerated vector search.
12+
- Added support to interact with models behind NeMo Inference Microservices using new model engines `nemo-embed` and `nemo-infer`.
13+
- Added support to provide example specific collection name for vector databases using an environment variable named `COLLECTION_NAME`.
14+
- Added `faiss` as a generic vector database solution behind `utils.py`.
15+
16+
### Changed
17+
18+
- Upgraded and changed base containers for all components to pytorch `23.12-py3`.
19+
- Added langchain specific vector database connector in `utils.py`.
20+
- Changed speech support to use single channel for Riva ASR and TTS.
21+
- Changed `get_llm` utility in `utils.py` to return Langchain wrapper instead of Llmaindex wrappers.
22+
23+
### Fixed
24+
25+
- Fixed a bug causing empty rating in evaluation notebook
26+
- Fixed document search implementation of query decomposition example.
27+
628
## [0.3.0] - 2024-01-22
729

830
### Added
@@ -53,4 +75,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5375
### Fixed
5476

5577
- [Fixed issue #13](https://github.com/NVIDIA/GenerativeAIExamples/issues/13) of pipeline not able to answer questions unrelated to knowledge base
56-
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files
78+
- [Fixed issue #12](https://github.com/NVIDIA/GenerativeAIExamples/issues/12) typechecking while uploading PDF files
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
ARG BASE_IMAGE_URL=nvcr.io/nvidia/pytorch
2-
ARG BASE_IMAGE_TAG=23.08-py3
2+
ARG BASE_IMAGE_TAG=23.12-py3
33

44
FROM ${BASE_IMAGE_URL}:${BASE_IMAGE_TAG}
55

66
ARG EXAMPLE_NAME
77
COPY RetrievalAugmentedGeneration/__init__.py /opt/RetrievalAugmentedGeneration/
88
COPY RetrievalAugmentedGeneration/common /opt/RetrievalAugmentedGeneration/common
9-
COPY RetrievalAugmentedGeneration/examples/${EXAMPLE_NAME} /opt/RetrievalAugmentedGeneration/example
109
COPY integrations /opt/integrations
1110
COPY tools /opt/tools
1211
RUN apt-get update && apt-get install -y libpq-dev
1312
RUN --mount=type=bind,source=RetrievalAugmentedGeneration/requirements.txt,target=/opt/requirements.txt \
1413
python3 -m pip install --no-cache-dir -r /opt/requirements.txt
1514

15+
COPY RetrievalAugmentedGeneration/examples/${EXAMPLE_NAME} /opt/RetrievalAugmentedGeneration/example
1616
RUN if [ -f "/opt/RetrievalAugmentedGeneration/example/requirements.txt" ] ; then \
1717
python3 -m pip install --no-cache-dir -r /opt/RetrievalAugmentedGeneration/example/requirements.txt ; else \
1818
echo "Skipping example dependency installation, since requirements.txt was not found" ; \
1919
fi
2020

21+
RUN apt-get remove python3-pip
22+
2123
WORKDIR /opt
2224
ENTRYPOINT ["uvicorn", "RetrievalAugmentedGeneration.common.server:app"]

RetrievalAugmentedGeneration/common/configuration.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ class EmbeddingConfig(ConfigWizard):
114114
default=1024,
115115
help_txt="The required dimensions of the embedding model. Currently utilized for vector DB indexing.",
116116
)
117+
server_url: str = configfield(
118+
"server_url",
119+
default="localhost:9080",
120+
help_txt="The url of the server hosting nemo embedding model",
121+
)
117122

118123

119124
@configclass

RetrievalAugmentedGeneration/common/utils.py

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import base64
1919
import logging
2020
from functools import lru_cache
21+
from urllib.parse import urlparse
2122
from typing import TYPE_CHECKING, List, Optional
2223

2324
logger = logging.getLogger(__name__)
@@ -33,7 +34,7 @@
3334
logger.error(f"psycogp2 import failed with error: {e}")
3435

3536
try:
36-
from sqlalchemy import make_url
37+
from sqlalchemy.engine.url import make_url
3738
except Exception as e:
3839
logger.error(f"SQLalchemy import failed with error: {e}")
3940

@@ -55,15 +56,33 @@
5556
try:
5657
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
5758
from langchain.embeddings import HuggingFaceEmbeddings
59+
from langchain.vectorstores import FAISS
5860
except Exception as e:
5961
logger.error(f"Langchain import failed with error: {e}")
6062

63+
try:
64+
from langchain_core.vectorstores import VectorStore
65+
except Exception as e:
66+
logger.error(f"Langchain core import failed with error: {e}")
67+
68+
try:
69+
from langchain_community.vectorstores import PGVector
70+
from langchain_community.vectorstores import Milvus
71+
except Exception as e:
72+
logger.error(f"Langchain community import failed with error: {e}")
73+
6174
try:
6275
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
76+
from langchain_community.chat_models import ChatOpenAI
6377
except Exception as e:
6478
logger.error(f"NVIDIA AI connector import failed with error: {e}")
6579

80+
from langchain_core.embeddings import Embeddings
81+
from langchain_core.language_models.chat_models import SimpleChatModel
82+
from langchain.llms.base import LLM
6683
from integrations.langchain.llms.triton_trt_llm import TensorRTLLM
84+
from integrations.langchain.llms.nemo_infer import NemoInfer
85+
from integrations.langchain.embeddings.nemo_embed import NemoEmbeddings
6786
from RetrievalAugmentedGeneration.common import configuration
6887

6988
if TYPE_CHECKING:
@@ -102,8 +121,10 @@ def _postprocess_nodes(
102121
@lru_cache
103122
def set_service_context() -> None:
104123
"""Set the global service context."""
124+
llm = LangChainLLM(get_llm())
125+
embedding = LangchainEmbedding(get_embedding_model())
105126
service_context = ServiceContext.from_defaults(
106-
llm=get_llm(), embed_model=get_embedding_model()
127+
llm=llm, embed_model=embedding
107128
)
108129
set_global_service_context(service_context)
109130

@@ -119,15 +140,19 @@ def get_config() -> "ConfigWizard":
119140

120141

121142
@lru_cache
122-
def get_vector_index() -> VectorStoreIndex:
143+
def get_vector_index(collection_name: str = "") -> VectorStoreIndex:
123144
"""Create the vector db index."""
124145
config = get_config()
125146
vector_store = None
126147

127148
logger.info(f"Using {config.vector_store.name} as vector store")
149+
128150
if config.vector_store.name == "pgvector":
129-
db_name = os.getenv('POSTGRES_DB', 'vector_db')
151+
db_name = os.getenv('POSTGRES_DB', None)
152+
if not collection_name:
153+
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
130154
connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}"
155+
logger.info(f"Using PGVector collection: {collection_name}")
131156

132157
conn = psycopg2.connect(connection_string)
133158
conn.autocommit = True
@@ -146,21 +171,60 @@ def get_vector_index() -> VectorStoreIndex:
146171
password=url.password,
147172
port=url.port,
148173
user=url.username,
149-
table_name="document_store",
150-
embed_dim=config.embeddings.dimensions,
174+
table_name=collection_name,
175+
embed_dim=config.embeddings.dimensions
151176
)
152177
elif config.vector_store.name == "milvus":
178+
if not collection_name:
179+
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
180+
logger.info(f"Using milvus collection: {collection_name}")
153181
vector_store = MilvusVectorStore(uri=config.vector_store.url,
154182
dim=config.embeddings.dimensions,
155-
collection_name="document_store_ivfflat",
156-
index_config={"index_type": "IVF_FLAT", "nlist": config.vector_store.nlist},
183+
collection_name=collection_name,
184+
index_config={"index_type": "GPU_IVF_FLAT", "nlist": config.vector_store.nlist},
157185
search_config={"nprobe": config.vector_store.nprobe},
158186
overwrite=False)
159187
else:
160188
raise RuntimeError("Unable to find any supported Vector Store DB. Supported engines are milvus and pgvector.")
161189
return VectorStoreIndex.from_vector_store(vector_store)
162190

163191

192+
def get_vectorstore_langchain(documents, document_embedder, collection_name: str = "") -> VectorStore:
193+
"""Create the vector db index for langchain."""
194+
195+
config = get_config()
196+
197+
if config.vector_store.name == "faiss":
198+
vectorstore = FAISS.from_documents(documents, document_embedder)
199+
elif config.vector_store.name == "pgvector":
200+
db_name = os.getenv('POSTGRES_DB', None)
201+
if not collection_name:
202+
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
203+
logger.info(f"Using PGVector collection: {collection_name}")
204+
connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}"
205+
vectorstore = PGVector.from_documents(
206+
embedding=document_embedder,
207+
documents=documents,
208+
collection_name=collection_name,
209+
connection_string=connection_string,
210+
)
211+
elif config.vector_store.name == "milvus":
212+
if not collection_name:
213+
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
214+
logger.info(f"Using milvus collection: {collection_name}")
215+
url = urlparse(config.vector_store.url)
216+
vectorstore = Milvus.from_documents(
217+
documents,
218+
document_embedder,
219+
collection_name=collection_name,
220+
connection_args={"host": url.hostname, "port": url.port}
221+
)
222+
else:
223+
raise ValueError(f"{config.vector_store.name} vector database is not supported")
224+
logger.info("Vector store created and saved.")
225+
return vectorstore
226+
227+
164228
@lru_cache
165229
def get_doc_retriever(num_nodes: int = 4) -> "BaseRetriever":
166230
"""Create the document retriever."""
@@ -169,7 +233,7 @@ def get_doc_retriever(num_nodes: int = 4) -> "BaseRetriever":
169233

170234

171235
@lru_cache
172-
def get_llm() -> LangChainLLM:
236+
def get_llm() -> LLM | SimpleChatModel:
173237
"""Create the LLM connection."""
174238
settings = get_config()
175239

@@ -180,15 +244,30 @@ def get_llm() -> LangChainLLM:
180244
model_name=settings.llm.model_name,
181245
tokens=DEFAULT_NUM_TOKENS,
182246
)
183-
return LangChainLLM(llm=trtllm)
247+
return trtllm
184248
elif settings.llm.model_engine == "nv-ai-foundation":
185249
return ChatNVIDIA(model=settings.llm.model_name)
250+
elif settings.llm.model_engine == "nemo-infer":
251+
nemo_infer = NemoInfer(
252+
server_url=f"http://{settings.llm.server_url}/v1/completions",
253+
model=settings.llm.model_name,
254+
tokens=DEFAULT_NUM_TOKENS,
255+
)
256+
return nemo_infer
257+
elif settings.llm.model_engine == "nemo-infer-openai":
258+
nemo_infer = ChatOpenAI(
259+
openai_api_base=f"http://{settings.llm.server_url}/v1/",
260+
openai_api_key="xyz",
261+
model_name=settings.llm.model_name,
262+
max_tokens=DEFAULT_NUM_TOKENS,
263+
)
264+
return nemo_infer
186265
else:
187266
raise RuntimeError("Unable to find any supported Large Language Model server. Supported engines are triton-trt-llm and nv-ai-foundation.")
188267

189268

190269
@lru_cache
191-
def get_embedding_model() -> LangchainEmbedding:
270+
def get_embedding_model() -> Embeddings:
192271
"""Create the embedding model."""
193272
model_kwargs = {"device": "cpu"}
194273
if torch.cuda.is_available():
@@ -205,9 +284,15 @@ def get_embedding_model() -> LangchainEmbedding:
205284
encode_kwargs=encode_kwargs,
206285
)
207286
# Load in a specific embedding model
208-
return LangchainEmbedding(hf_embeddings)
287+
return hf_embeddings
209288
elif settings.embeddings.model_engine == "nv-ai-foundation":
210289
return NVIDIAEmbeddings(model=settings.embeddings.model_name, model_type="passage")
290+
elif settings.embeddings.model_engine == "nemo-embed":
291+
nemo_embed = NemoEmbeddings(
292+
server_url=f"http://{settings.embeddings.server_url}/v1/embeddings",
293+
model_name=settings.embeddings.model_name,
294+
)
295+
return nemo_embed
211296
else:
212297
raise RuntimeError("Unable to find any supported embedding model. Supported engine is huggingface.")
213298

RetrievalAugmentedGeneration/examples/developer_rag/chains.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
from llama_index.query_engine import RetrieverQueryEngine
2525
from llama_index.response.schema import StreamingResponse
2626
from llama_index.node_parser import LangchainNodeParser
27+
from llama_index.llms import LangChainLLM
28+
from llama_index.embeddings import LangchainEmbedding
2729

2830
from RetrievalAugmentedGeneration.common.utils import (
2931
LimitRetrievedNodesLength,
@@ -91,7 +93,8 @@ def llm_chain(self, context: str, question: str, num_tokens: int) -> Generator[s
9193
)
9294

9395
logger.info(f"Prompt used for response generation: {prompt}")
94-
response = get_llm().stream_complete(prompt, tokens=num_tokens)
96+
llm = LangChainLLM(get_llm())
97+
response = llm.stream_complete(prompt, tokens=num_tokens)
9598
gen_response = (resp.delta for resp in response)
9699
return gen_response
97100

@@ -101,10 +104,16 @@ def rag_chain(self, prompt: str, num_tokens: int) -> Generator[str, None, None]:
101104
logger.info("Using rag to generate response from document")
102105

103106
set_service_context()
104-
if get_config().llm.model_engine == "triton-trt-llm":
105-
get_llm().llm.tokens = num_tokens # type: ignore
106-
else:
107-
get_llm().llm.max_tokens = num_tokens
107+
llm = LangChainLLM(get_llm())
108+
109+
try:
110+
if get_config().llm.model_engine == "triton-trt-llm" or get_config().llm.model_engine == "nemo-infer":
111+
llm.llm.tokens = num_tokens # type: ignore
112+
else:
113+
llm.llm.max_tokens = num_tokens
114+
except Exception as e:
115+
logger.error(f"Exception in setting llm tokens: {e}")
116+
108117
retriever = get_doc_retriever(num_nodes=4)
109118
qa_template = Prompt(get_config().prompts.rag_template)
110119

RetrievalAugmentedGeneration/examples/nvidia_ai_foundation/chains.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from langchain_core.prompts import ChatPromptTemplate
2626
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
2727
from RetrievalAugmentedGeneration.common.base import BaseExample
28-
from RetrievalAugmentedGeneration.common.utils import get_config, get_llm, get_embedding_model
28+
from RetrievalAugmentedGeneration.common.utils import get_config, get_llm, get_embedding_model, get_vectorstore_langchain
2929

3030
logger = logging.getLogger(__name__)
3131
DOCS_DIR = os.path.abspath("./uploaded_files")
@@ -38,7 +38,6 @@
3838
class NvidiaAIFoundation(BaseExample):
3939
def ingest_docs(self, file_name: str, filename: str):
4040
"""Ingest documents to the VectorDB."""
41-
4241
try:
4342
# TODO: Load embedding created in older conversation, memory persistance
4443
# We initialize class in every call therefore it should be global
@@ -54,8 +53,7 @@ def ingest_docs(self, file_name: str, filename: str):
5453
if vectorstore:
5554
vectorstore.add_documents(documents)
5655
else:
57-
vectorstore = FAISS.from_documents(documents, document_embedder)
58-
logger.info("Vector store created and saved.")
56+
vectorstore = get_vectorstore_langchain(documents, document_embedder)
5957
else:
6058
logger.warning("No documents available to process!")
6159
except Exception as e:
@@ -106,8 +104,14 @@ def rag_chain(self, prompt: str, num_tokens: int) -> Generator[str, None, None]:
106104

107105
try:
108106
if vectorstore != None:
109-
retriever = vectorstore.as_retriever()
110-
docs = retriever.get_relevant_documents(prompt)
107+
try:
108+
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.25})
109+
docs = retriever.get_relevant_documents(prompt)
110+
except NotImplementedError:
111+
# Some retriever like milvus don't have similarity score threshold implemented
112+
retriever = vectorstore.as_retriever()
113+
docs = retriever.get_relevant_documents(prompt)
114+
111115

112116
context = ""
113117
for doc in docs:
@@ -134,8 +138,14 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]:
134138

135139
try:
136140
if vectorstore != None:
137-
retriever = vectorstore.as_retriever()
138-
docs = retriever.get_relevant_documents(content)
141+
try:
142+
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.25})
143+
docs = retriever.get_relevant_documents(content)
144+
except NotImplementedError:
145+
# Some retriever like milvus don't have similarity score threshold implemented
146+
retriever = vectorstore.as_retriever()
147+
docs = retriever.get_relevant_documents(content)
148+
139149
result = []
140150
for doc in docs:
141151
result.append(

0 commit comments

Comments
 (0)