From dd280ac4de2b1e3428dd11901655fb3e73deb8a4 Mon Sep 17 00:00:00 2001
From: christy <cbergman@gmail.com>
Date: Mon, 20 Nov 2023 16:21:26 -0800
Subject: [PATCH] Update simple OSS RAG demo

Signed-off-by: christy <cbergman@gmail.com>
---
 .../readthedocs_rag_milvus_client.ipynb       | 493 ++++++++++++------
 .../text/imdb_search_milvus_client.ipynb      |  20 +-
 2 files changed, 361 insertions(+), 152 deletions(-)
diff --git a/notebooks/llms/langchain/readthedocs_rag_milvus_client.ipynb b/notebooks/llms/langchain/readthedocs_rag_milvus_client.ipynb
index fff2a5f34..7ad1c183b 100755
--- a/notebooks/llms/langchain/readthedocs_rag_milvus_client.ipynb
+++ b/notebooks/llms/langchain/readthedocs_rag_milvus_client.ipynb
@@ -98,8 +98,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Milvus server startup time: 6.501969814300537 sec\n",
-      "v2.2-testing-20230824-68-ga34a9d606-lite\n"
+      "Milvus server startup time: 7.5924530029296875 sec\n",
+      "v2.3.3-lite\n"
      ]
     }
    ],
@@ -107,9 +107,11 @@
     "from milvus import default_server\n",
     "from pymilvus import (\n",
     "    connections, utility, \n",
-    "    MilvusClient,\n",
+    "    # MilvusClient,\n",
     ")\n",
     "\n",
+    "MILVUS_HOST = \"127.0.0.1\"\n",
+    "\n",
     "# Cleanup previous data and stop server in case it is still running.\n",
     "default_server.stop()\n",
     "default_server.cleanup()\n",
@@ -127,7 +129,7 @@
     "\n",
     "# Now you could connect with localhost and the given port.\n",
     "# Port is defined by default_server.listen_port.\n",
-    "connections.connect(host='127.0.0.1', \n",
+    "connections.connect(host=MILVUS_HOST, \n",
     "                  port=default_server.listen_port,\n",
     "                  show_startup_banner=True)\n",
     "\n",
@@ -143,9 +145,9 @@
     "## Load the Embedding Model checkpoint and use it to create vector embeddings\n",
     "**Embedding model:**  We will use the open-source [sentence transformers](https://www.sbert.net/docs/pretrained_models.html) available on HuggingFace to encode the documentation text.  We will download the model from HuggingFace and run it locally.  We'll save the model's generated embeedings to a pandas dataframe and then into the milvus database.\n",
     "\n",
-    "💡 Note:  To keep your tokens private, best practice is to use an env variable.   <br>\n",
-    "In Jupyter, need .env file (in same dir as notebooks) containing lines like this:\n",
-    "- VARIABLE_NAME=value"
+    "Two model parameters of note below:\n",
+    "1. EMBEDDING_LENGTH refers to the dimensionality or length of the embedding vector. In this case, the embeddings generated for EACH token in the input text will have the SAME length = 768. This size of embedding is often associated with BERT-based models, where the embeddings are used for downstream tasks such as classification, question answering, or text generation. <br><br>\n",
+    "2. MAX_SEQ_LENGTH is the maximum length the encoder model can handle for input sequences. In this case, if sequences longer than 512 tokens are given to the model, everything longer will be (silently!) chopped off.  This is the reason why a chunking strategy is needed to segment input texts into chunks with lengths that will fit in the model's input."
    ]
   },
   {
@@ -178,30 +180,19 @@
     "\n",
     "# Initialize torch settings\n",
     "torch.backends.cudnn.deterministic = True\n",
-    "RANDOM_SEED = 415\n",
-    "torch.manual_seed(RANDOM_SEED)\n",
     "DEVICE = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')\n",
     "print(f\"device: {DEVICE}\")\n",
     "\n",
-    "import os\n",
-    "from dotenv import load_dotenv, find_dotenv\n",
-    "_ = load_dotenv(find_dotenv())\n",
-    "from huggingface_hub import login\n",
-    "\n",
-    "# # Login to huggingface_hub\n",
-    "# hub_token = os.getenv(\"HUGGINGFACEHUB_API_TOKEN\")\n",
-    "# login(token=hub_token)\n",
-    "\n",
     "# Load the model from huggingface model hub.\n",
     "model_name = \"BAAI/bge-base-en-v1.5\"\n",
-    "retriever = SentenceTransformer(model_name, device=DEVICE)\n",
-    "print(type(retriever))\n",
-    "print(retriever)\n",
+    "encoder = SentenceTransformer(model_name, device=DEVICE)\n",
+    "print(type(encoder))\n",
+    "print(encoder)\n",
     "\n",
     "# Get the model parameters and save for later.\n",
-    "MAX_SEQ_LENGTH = retriever.get_max_seq_length() \n",
+    "MAX_SEQ_LENGTH = encoder.get_max_seq_length() \n",
     "HF_EOS_TOKEN_LENGTH = 1\n",
-    "EMBEDDING_LENGTH = retriever.get_sentence_embedding_dimension()\n",
+    "EMBEDDING_LENGTH = encoder.get_sentence_embedding_dimension()\n",
     "\n",
     "# Inspect model parameters.\n",
     "print(f\"model_name: {model_name}\")\n",
@@ -209,40 +200,139 @@
     "print(f\"MAX_SEQ_LENGTH: {MAX_SEQ_LENGTH}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Milvus collection\n",
+    "\n",
+    "You can think of a collection in Milvus like a \"table\" in SQL databases.  The **collection** will contain the \n",
+    "- **Schema** (or no-schema Milvus Client).  \n",
+    "💡 You'll need the vector `EMBEDDING_LENGTH` parameter from your embedding model.\n",
+    "- **Vector index** for efficient vector search\n",
+    "- **Vector distance metric** for measuring nearest neighbor vectors\n",
+    "- **Consistency level**\n",
+    "In Milvus, transactional consistency is possible; however, according to the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem), some latency must be sacrificed. 💡 Searching movie reviews is not mission-critical, so [`eventually`](https://milvus.io/docs/consistency.md) consistent is fine here.\n",
+    "\n",
+    "Some supported [data types](https://milvus.io/docs/schema.md) for Milvus schemas are:\n",
+    "- INT64 - primary key\n",
+    "- VARCHAR - raw texts\n",
+    "- FLOAT_VECTOR - embedings = list of `numpy.ndarray` of `numpy.float32` numbers"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "dd543a62",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Embedding length: 768\n",
+      "Created collection: MilvusDocs\n",
+      "Schema: {'auto_id': True, 'description': 'The schema for docs pages', 'fields': [{'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pymilvus import (\n",
+    "    FieldSchema, DataType, \n",
+    "    CollectionSchema, Collection)\n",
+    "\n",
+    "# 1. Set the Milvus collection name.\n",
+    "COLLECTION_NAME = \"MilvusDocs\"\n",
+    "\n",
+    "# 2. Use embedding length from the embedding model.\n",
+    "print(f\"Embedding length: {EMBEDDING_LENGTH}\")\n",
+    "\n",
+    "# 3. Define minimum required fields.\n",
+    "fields = [\n",
+    "    FieldSchema(name=\"pk\", dtype=DataType.INT64, is_primary=True, auto_id=True),\n",
+    "    FieldSchema(name=\"vector\", dtype=DataType.FLOAT_VECTOR, dim=EMBEDDING_LENGTH),\n",
+    "]\n",
+    "\n",
+    "# 4. Create schema with dynamic field enabled.\n",
+    "schema = CollectionSchema(\n",
+    "\t\tfields,\n",
+    "\t\tdescription=\"The schema for docs pages\",\n",
+    "\t\tenable_dynamic_field=True\n",
+    ")\n",
+    "mc = Collection(COLLECTION_NAME, schema, consistency_level=\"Eventually\")\n",
+    "\n",
+    "print(f\"Created collection: {COLLECTION_NAME}\")\n",
+    "print(f\"Schema: {mc.schema}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add a Vector Index\n",
+    "\n",
+    "The vector index determines the vector **search algorithm** used to find the closest vectors in your data to the query a user submits.  Most vector indexes use different sets of parameters depending on whether the database is:\n",
+    "- **inserting vectors** (creation mode) - vs - \n",
+    "- **searching vectors** (search mode) \n",
+    "\n",
+    "Scroll down the [docs page](https://milvus.io/docs/index.md) to see a table listing different vector indexes available on Milvus.  For example:\n",
+    "- FLAT - deterministic exhaustive search\n",
+    "- IVF_FLAT or IVF_SQ8 - Hash index (stochastic approximate search)\n",
+    "- HNSW - Graph index (stochastic approximate search)\n",
+    "\n",
+    "Besides a search algorithm, we also need to specify a **distance metric**, that is, a definition of what is considered \"close\" in vector space.  In the cell below, the [`HNSW`](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) search index is chosen.  Its possible distance metrics are one of:\n",
+    "- L2 - L2-norm\n",
+    "- IP - Dot-product\n",
+    "- COSINE - Angular distance\n",
+    "\n",
+    "💡 Most use cases work better with normalized embeddings, in which case L2 is useless (every vector has length=1) and IP and COSINE are the same.  Only choose L2 if you plan to keep your embeddings unnormalized."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "langchain.embeddings.huggingface.HuggingFaceEmbeddings"
+       "Status(code=0, message=)"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Convert the HuggingFace embeddings to a Langchain embeddings.\n",
-    "from langchain.embeddings import HuggingFaceEmbeddings\n",
-    "\n",
-    "model_kwargs = {\"device\": DEVICE}\n",
-    "encode_kwargs = {'normalize_embeddings': True}\n",
-    "lc_encoder = HuggingFaceEmbeddings(\n",
-    "    model_name=model_name,\n",
-    "    model_kwargs=model_kwargs,\n",
-    "    encode_kwargs=encode_kwargs\n",
-    ")\n",
-    "type(lc_encoder)"
+    "# Define the type of search index to add to the collection.\n",
+    "\n",
+    "# Drop the index, in case it already exists.\n",
+    "mc.release()\n",
+    "mc.drop_index()\n",
+    "\n",
+    "# M = max number graph connections per layer. Large M = denser graph.\n",
+    "# Choice of M: 4~64, larger M for larger data and larger embedding lengths.\n",
+    "M = 16\n",
+    "# efConstruction = num_candidate_nearest_neighbors per layer. \n",
+    "# Use Rule of thumb: int. 8~512, efConstruction = M * 2.\n",
+    "efConstruction = M * 2\n",
+    "\n",
+    "# Show how to change the vector index algorithm parameters.\n",
+    "INDEX_PARAMS = dict({\n",
+    "    'M': M,               \n",
+    "    \"efConstruction\": efConstruction })\n",
+    "# Create the search index for local Milvus server.\n",
+    "index_params = {\n",
+    "    \"index_type\": \"HNSW\", \n",
+    "    \"metric_type\": \"COSINE\", \n",
+    "    \"params\": INDEX_PARAMS\n",
+    "    }\n",
+    "mc.create_index(\"vector\", index_params)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "6861beb7",
    "metadata": {},
    "outputs": [
@@ -274,68 +364,106 @@
     "## Chunking\n",
     "\n",
     "Before embedding, it is necessary to decide your chunk strategy, chunk size, and chunk overlap.  In this demo, I will use:\n",
-    "- **Strategy** = Naive for now.  TODO use markdown header hierarchies.\n",
+    "- **Strategy** = Use markdown header hierarchies.  Split markdown sections if too long.\n",
     "- **Chunk size** = Use the embedding model's parameter `MAX_SEQ_LENGTH`\n",
     "- **Overlap** = Rule-of-thumb 10-15%\n",
-    "- **Function** = Langchain's convenient `RecursiveCharacterTextSplitter` to split up long reviews recursively.\n"
+    "- **Function** = \n",
+    "  - Langchain's `HTMLHeaderTextSplitter` to split markdown sections.\n",
+    "  - Langchain's `RecursiveCharacterTextSplitter` to split up long reviews recursively.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "a53595fa",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "chunking time: 0.0025839805603027344\n",
-      "type: list of <class 'langchain.schema.document.Document'>, len: 197\n",
+      "chunking time: 0.018436193466186523\n",
+      "docs: 15, split into: 15\n",
+      "split into chunks: 159, type: list of <class 'langchain.schema.document.Document'>\n",
       "\n",
       "Looking at a sample chunk...\n",
-      "{'source': 'rtdocs/pymilvus.readthedocs.io/en/latest/install.html'}\n",
-      "Installation¶\n",
-      "Installing via pip¶\n",
-      "PyMilvus is in the Python Package Index.\n",
-      "PyMilvus only support pyt\n"
+      "{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'rtdocs/pymilvus.readthedocs.io/en/latest/install.html'}\n",
+      "demonstrate how to install and using PyMilvus in a virtual environment. See virtualenv for more info\n"
      ]
     }
    ],
    "source": [
-    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-    "\n",
-    "# Use the embedding model parameters to calculate chunk_size and overlap.\n",
+    "from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
+    "\n",
+    "# Define the headers to split on for the HTMLHeaderTextSplitter\n",
+    "headers_to_split_on = [\n",
+    "    (\"h1\", \"Header 1\"),\n",
+    "    (\"h2\", \"Header 2\"),\n",
+    "    (\"h3\", \"Header 3\"),\n",
+    "]\n",
+    "# Create an instance of the HTMLHeaderTextSplitter\n",
+    "html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)\n",
+    "\n",
+    "# Use the embedding model parameters.\n",
     "chunk_size = MAX_SEQ_LENGTH - HF_EOS_TOKEN_LENGTH\n",
-    "# Default chunk overlap is 10% chunk_size.\n",
     "chunk_overlap = np.round(chunk_size * 0.10, 0)\n",
     "\n",
-    "# Use recursive splitter to chunk text.\n",
-    "start_time = time.time()\n",
-    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "# Create an instance of the RecursiveCharacterTextSplitter\n",
+    "child_splitter = RecursiveCharacterTextSplitter(\n",
     "    chunk_size = chunk_size,\n",
     "    chunk_overlap = chunk_overlap,\n",
     "    length_function = len,\n",
     ")\n",
     "\n",
-    "chunks = text_splitter.create_documents(\n",
-    "    [doc.page_content for doc in docs], \n",
-    "    metadatas=[doc.metadata for doc in docs])\n",
+    "# Split the HTML text using the HTMLHeaderTextSplitter.\n",
+    "start_time = time.time()\n",
+    "html_header_splits = []\n",
+    "for doc in docs:\n",
+    "    splits = html_splitter.split_text(doc.page_content)\n",
+    "    for split in splits:\n",
+    "        # Add the source URL and header values to the metadata\n",
+    "        metadata = {}\n",
+    "        new_text = split.page_content\n",
+    "        for header_name, metadata_header_name in headers_to_split_on:\n",
+    "            header_value = new_text.split(\"¶ \")[0].strip()\n",
+    "            metadata[header_name] = header_value\n",
+    "            try:\n",
+    "                new_text = new_text.split(\"¶ \")[1].strip()\n",
+    "            except:\n",
+    "                break\n",
+    "        split.metadata = {\n",
+    "            **metadata,\n",
+    "            \"source\": doc.metadata[\"source\"]\n",
+    "        }\n",
+    "        # Add the header to the text\n",
+    "        split.page_content = split.page_content\n",
+    "    html_header_splits.extend(splits)\n",
+    "\n",
+    "# Split the documents further into smaller, recursive chunks.\n",
+    "chunks = child_splitter.split_documents(html_header_splits)\n",
     "\n",
     "end_time = time.time()\n",
     "print(f\"chunking time: {end_time - start_time}\")\n",
-    "# print(f\"type: {type(chunks)}, len: {len(chunks)}, type: {type(chunks[0])}\")\n",
-    "print(f\"type: list of {type(chunks[0])}, len: {len(chunks)}\") \n",
+    "print(f\"docs: {len(docs)}, split into: {len(html_header_splits)}\")\n",
+    "print(f\"split into chunks: {len(chunks)}, type: list of {type(chunks[0])}\") \n",
     "\n",
+    "# Inspect chunks.\n",
     "print()\n",
     "print(\"Looking at a sample chunk...\")\n",
-    "print(chunks[0].metadata)\n",
-    "print(chunks[0].page_content[:100])\n"
+    "print(chunks[1].metadata)\n",
+    "print(chunks[1].page_content[:100])\n",
+    "\n",
+    "# TODO - remove this before saving in github.\n",
+    "# # Print the child splits with their associated header metadata\n",
+    "# print()\n",
+    "# for child in chunks:\n",
+    "#     print(f\"Content: {child.page_content}\")\n",
+    "#     print(f\"Metadata: {child.metadata}\")\n",
+    "#     print()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "512130a3",
    "metadata": {},
    "outputs": [
@@ -343,15 +471,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'source': 'https://pymilvus.readthedocs.io/en/latest/install.html'}\n",
-      "Installation¶\n",
-      "Installing via pip¶\n",
-      "PyMilvus is in the Python Package Index.\n",
-      "PyMilvus only support python3(>= 3.6), usually, it’s ok to install PyMilvus like below.\n",
-      "$ python3 -m pip install pymilvus\n",
-      "Installing in a virtual environment¶\n",
-      "It’s recommended to use PyMilvus in a virtual environment, using virtual environment allows you to avoid\n",
-      "installing Python packages globally which could break system tools or other projects.\n"
+      "{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'https://pymilvus.readthedocs.io/en/latest/install.html'}\n",
+      "Installation¶ Installing via pip¶ PyMilvus is in the Python Package Index. PyMilvus only support pyt\n"
      ]
     }
    ],
@@ -363,7 +484,7 @@
     "    doc.metadata.update({\"source\": new_url})\n",
     "\n",
     "print(chunks[0].metadata)\n",
-    "print(chunks[0].page_content[:500])"
+    "print(chunks[0].page_content[:100])"
    ]
   },
   {
@@ -373,19 +494,46 @@
    "source": [
     "## Insert data into Milvus\n",
     "\n",
-    "The code below uses the [Langchain Milvus](https://api.python.langchain.com/en/latest/_modules/langchain/vectorstores/milvus.html#Milvus) adapter.  \n",
-    "- Default index is AUTOINDEX. <br>\n",
-    "💡 AUTOINDEX works on both Milvus and Zilliz Cloud (where it is the fastest!)\n",
-    "- collection_name is \"LangChainCollection\".\n",
-    "- Schema is \n",
-    "  - pk (str): Name of the primary key field.\n",
-    "  - text (str): Name of the text field.\n",
-    "  - vector (str): Name of the vector field. \n"
+    "Milvus and Milvus Lite support loading pandas dataframes directly.\n",
+    "\n",
+    "Milvus Client, however, requires conerting pandas df into a list of dictionaries first.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert chunks and embeddings to a list of dictionaries.\n",
+    "chunk_list = []\n",
+    "for chunk in chunks:\n",
+    "    embeddings = torch.tensor(encoder.encode([chunk.page_content]))\n",
+    "    embeddings = F.normalize(embeddings, p=2, dim=1)\n",
+    "    converted_values = list(map(np.float32, embeddings))[0]\n",
+    "    \n",
+    "    # Only use h1, h2. Truncate the metadata in case too long.\n",
+    "    try:\n",
+    "        h2 = chunk.metadata['h2'][:200]\n",
+    "    except:\n",
+    "        h2 = \"\"\n",
+    "    chunk_dict = {\n",
+    "        'vector': converted_values,\n",
+    "        'chunk': chunk.page_content,\n",
+    "        'source': chunk.metadata['source'],\n",
+    "        'h1': chunk.metadata['h1'][:200],\n",
+    "        'h2': h2,\n",
+    "    }\n",
+    "    chunk_list.append(chunk_dict)\n",
+    "\n",
+    "# # TODO - remove this before saving in github.\n",
+    "# for chunk in chunk_list[:1]:\n",
+    "#     print(chunk)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
    "id": "b51ff139",
    "metadata": {},
    "outputs": [
@@ -394,30 +542,28 @@
      "output_type": "stream",
      "text": [
       "Start inserting entities\n",
-      "Langchain Milvus insert time for 197 vectors: 11.850640058517456 seconds\n",
-      "type: <class 'langchain.vectorstores.milvus.Milvus'>\n"
+      "Milvus insert time for 159 vectors: 0.022408246994018555 seconds\n",
+      "(insert count: 159, delete count: 0, upsert count: 0, timestamp: 445782462398988290, success count: 159, err count: 0)\n",
+      "[{\"name\":\"_default\",\"collection_name\":\"MilvusDocs\",\"description\":\"\"}]\n"
      ]
     }
    ],
    "source": [
     "# Insert a batch of data into the Milvus collection.\n",
-    "from langchain.vectorstores import Milvus\n",
-    "MILVUS_PORT = 19530\n",
-    "MILVUS_HOST = \"127.0.0.1\"\n",
     "\n",
     "print(\"Start inserting entities\")\n",
     "start_time = time.time()\n",
-    "\n",
-    "vector_store = Milvus.from_documents(\n",
-    "    chunks,\n",
-    "    embedding=lc_encoder,\n",
-    "    connection_args={\"host\": MILVUS_HOST, \n",
-    "                     \"port\": MILVUS_PORT},\n",
-    ")\n",
+    "insert_result = mc.insert(chunk_list)\n",
     "\n",
     "end_time = time.time()\n",
-    "print(f\"Langchain Milvus insert time for {len(chunks)} vectors: {end_time - start_time} seconds\")\n",
-    "print(f\"type: {type(vector_store)}\")\n"
+    "print(f\"Milvus insert time for {len(chunk_list)} vectors: {end_time - start_time} seconds\")\n",
+    "\n",
+    "# After final entity is inserted, call flush to stop growing segments left in memory.\n",
+    "mc.flush() \n",
+    "\n",
+    "# Inspect results.\n",
+    "print(insert_result)\n",
+    "print(mc.partitions) # list[Partition] objects\n"
    ]
   },
   {
@@ -456,7 +602,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "id": "5e7f41f4",
    "metadata": {},
    "outputs": [
@@ -464,13 +610,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "query length: 47\n"
+      "query length: 54\n"
      ]
     }
    ],
    "source": [
     "# Define a sample question about your data.\n",
-    "question = 'What is the default AUTOINDEX in Milvus Client?'\n",
+    "question = \"what is the default distance metric used in AUTOINDEX?\"\n",
     "query = [question]\n",
     "\n",
     "# Inspect the length of the query.\n",
@@ -495,83 +641,132 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Question: what is the default distance metric used in AUTOINDEX?\n",
+      "filter expr: ((h1 == 'API reference') && (h2 == 'Client')))\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Ask the question.\n",
+    "METADATA_URL = \"https://pymilvus.readthedocs.io/en/latest/_modules/milvus/client/stub.html\"\n",
+    "\n",
+    "# question = f\"According to the source '{METADATA_URL}', what is the default distance metric used in AUTOINDEX?\"\n",
+    "# question = \"What is default value of the minimum proportion to seal a segment?\"\n",
+    "question = \"what is the default distance metric used in AUTOINDEX?\"\n",
+    "print(f\"Question: {question}\")\n",
+    "\n",
+    "FILTER_EXPR = f\"source == '{METADATA_URL}'\"\n",
+    "FILTER_EXPR = f\"source.endswith('stub.html')\"\n",
+    "FILTER_EXPR = \"((h1 == 'API reference') && (h2 == 'Client')))\"\n",
+    "print(f\"filter expr: {FILTER_EXPR}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
    "id": "89642119",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded milvus collection into memory.\n",
+      "Milvus search time: 0.005253791809082031 sec\n",
+      "type: <class 'pymilvus.client.abstract.SearchResult'>, count: 5\n"
+     ]
+    }
+   ],
    "source": [
-    "# RETRIEVAL USING MILVUS WITH LANGCHAIN\n",
+    "# RETRIEVAL USING MILVUS.\n",
     "\n",
-    "# Search with metadata.  TODO: Add better filtering query!\n",
-    "METADATA_URL = \"https://pymilvus.readthedocs.io/en/latest/_modules/milvus/client/stub.html\"\n",
+    "# Before conducting a search based on a query, you need to load the data into memory.\n",
+    "mc.load()\n",
+    "print(\"Loaded milvus collection into memory.\")\n",
+    "\n",
+    "# Embed the question using the same embedding model.\n",
+    "embedded_question = torch.tensor(encoder.encode([question]))\n",
+    "# Normalize embeddings to unit length.\n",
+    "embedded_question = F.normalize(embedded_question, p=2, dim=1)\n",
+    "# Convert the embeddings to list of list of np.float32.\n",
+    "embedded_question = list(map(np.float32, embedded_question))\n",
+    "\n",
+    "# Return top k results with HNSW index.\n",
+    "TOP_K = 5\n",
     "SEARCH_PARAMS = dict({\n",
-    "    \"expr\": \"text = METADATA_URL\",\n",
+    "    # Re-use index param for num_candidate_nearest_neighbors.\n",
+    "    \"ef\": INDEX_PARAMS['efConstruction']\n",
     "    })\n",
     "\n",
-    "start_time = time.time()\n",
+    "# Filter using metadata.\n",
+    "# FILTER_EXPR = f\"source == '{METADATA_URL}'\"\n",
+    "FILTER_EXPR = \"((h1 == 'API reference') && (h2 == 'Client')))\"\n",
     "\n",
-    "# Default search.\n",
-    "docs = vector_store.similarity_search(\n",
-    "    question, \n",
-    "    k=100,\n",
+    "# Run semantic vector search using your query and the vector database.\n",
+    "start_time = time.time()\n",
+    "results = mc.search(\n",
+    "    data=embedded_question, \n",
+    "    anns_field=\"vector\", \n",
     "    param=SEARCH_PARAMS,\n",
-    "    verbose=True,\n",
+    "    # Access dynamic fields in the boolean expression.\n",
+    "    # expr=FILTER_EXPR,\n",
+    "    output_fields=[\"h1\", \"h2\", \"chunk\", \"source\"], \n",
+    "    limit=TOP_K,\n",
+    "    consistency_level=\"Eventually\"\n",
     "    )\n",
     "\n",
-    "end_time = time.time()\n",
-    "print(f\"Milvus query time: {end_time - start_time}\")\n",
+    "elapsed_time = time.time() - start_time\n",
+    "print(f\"Milvus search time: {elapsed_time} sec\")\n",
     "\n",
-    "# # View the retrieval result.\n",
-    "# for d in docs:\n",
-    "#     print(d.metadata)\n",
-    "#     # print(d.page_content[:100])\n"
+    "# Inspect search result.\n",
+    "print(f\"type: {type(results)}, count: {len(results[0])}\")\n"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "95f7e011",
    "metadata": {},
    "source": [
     "## Assemble and inspect the search result\n",
     "\n",
-    "The search result is in the list variable `docs` of type `'pymilvus.orm.search.SearchResult'`.  "
+    "The search result is in the variable `result[0]` of type `'pymilvus.orm.search.SearchResult'`.  "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "d3dfa33a",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Count raw retrievals: 100\n",
-      "Count unique texts: 37\n",
-      "17084\n"
+      "0th query result\n",
+      "id: 445782430324621984, distance: 0.7082178592681885, entity: {'chunk': \"index_file_size (int) – Segment size. See Storage Concepts. metric_type (MetricType) – Distance Metrics type. Valued form MetricType. See Distance Metrics. A demo is as follow: param={'collection_name': 'name', 'dimension': 16, 'index_file_size': 1024 # Optional, default 1024， 'metric_type': MetricType.L2 # Optional, default MetricType.L2 } timeout (float) – An optional duration of time in seconds to allow for the RPC. When timeout is set to None, client waits until server responses or error occurs.\", 'source': 'https://pymilvus.readthedocs.io/en/latest/api.html', 'h1': 'API reference', 'h2': 'Client'}\n",
+      "id: 445782430324622095, distance: 0.690363883972168, entity: {'chunk': 'A metric for binary vectors, only support :attr:`~milvus.IndexType.FLAT` index. #: See `Substructure `_. SUPERSTRUCTURE = 7 def __repr__(self): return \"\".format(self.__class__.__name__, self._name_) def __str__(self): return self._name_', 'source': 'https://pymilvus.readthedocs.io/en/latest/_modules/milvus/client/types.html', 'h1': 'Source code for milvus.client.types from enum import IntEnum class Status: \"\"\" :attribute code: int (optional) default as ok :attribute message: str (optional) current status message \"\"\" SUCCESS = 0 U', 'h2': ''}\n",
+      "id: 445782430324621945, distance: 0.6885938048362732, entity: {'chunk': 'you can refer to Storage Concepts for more information about segments and index_file_size. metric_type:Milvus compute distance between two vectors, you can refer to Distance Metrics for more information. Now we can create a collection: >>> collection_name = \\'demo_film_tutorial\\' >>> collection_param = { ... \"collection_name\": collection_name, ... \"dimension\": 8, ... \"index_file_size\": 2048, ... \"metric_type\": MetricType.L2 ... } >>> client.create_collection(collection_param) Status(code=0, message=\\'Create', 'source': 'https://pymilvus.readthedocs.io/en/latest/tutorial.html', 'h1': 'Tutorial', 'h2': 'This is a basic introduction to Milvus by PyMilvus. For a runnable python script, checkout example.py on PyMilvus Github, or hello milvus on Milvus official website. It’s a good recommended start to g'}\n",
+      "id: 445782430324621947, distance: 0.6716917157173157, entity: {'chunk': \"metric_type=) The attributes of collection can be extracted from info. >>> info.collection_name 'demo_film_tutorial' >>> info.dimension 8 >>> info.index_file_size 2048 >>> info.metric_type This tutorial is a basic intro tutorial, building index won’t be covered by this tutorial. If you want to go further into Milvus with indexes, it’s recommended to check our index examples. If you’re already known about indexes from index examples, and you want a full lists of params supported by PyMilvus, you check out\", 'source': 'https://pymilvus.readthedocs.io/en/latest/tutorial.html', 'h1': 'Tutorial', 'h2': 'This is a basic introduction to Milvus by PyMilvus. For a runnable python script, checkout example.py on PyMilvus Github, or hello milvus on Milvus official website. It’s a good recommended start to g'}\n",
+      "id: 445782430324622037, distance: 0.6690606474876404, entity: {'chunk': \"-- Segment size. See `Storage Concepts `_. * *metric_type* (``MetricType``) -- Distance Metrics type. Valued form :class:`~milvus.MetricType`. See `Distance Metrics `_. A demo is as follow: .. code-block:: python param={'collection_name': 'name', 'dimension': 16, 'index_file_size': 1024 # Optional, default 1024， 'metric_type': MetricType.L2 # Optional, default MetricType.L2 } :param timeout: An optional duration of time in seconds to allow for the RPC. When timeout is set to None, client waits until\", 'source': 'https://pymilvus.readthedocs.io/en/latest/_modules/milvus/client/stub.html', 'h1': 'Source code for milvus.client.stub # -*- coding: UTF-8 -*- import collections import copy import functools import logging from urllib.parse import urlparse from . import __version__ from .types import', 'h2': ''}\n",
+      "2267\n"
      ]
     }
    ],
    "source": [
-    "print(f\"Count raw retrievals: {len(docs)}\")\n",
-    "\n",
-    "unique_sources = []\n",
-    "unique_texts = []\n",
-    "for doc in docs:\n",
-    "    if doc.metadata['source'] == METADATA_URL:\n",
-    "        if doc.page_content not in unique_texts:\n",
-    "            unique_texts.append(doc.page_content)\n",
-    "            unique_sources.append(doc.metadata['source'])\n",
-    "print(f\"Count unique texts: {len(unique_texts)}\")\n",
-    "# [ print(text) for text in unique_texts ]\n",
-    "\n",
-    "# Assemble all the results in a zipped list.\n",
-    "formatted_context = list(zip(unique_sources, unique_texts))\n",
+    "for n, hits in enumerate(results):\n",
+    "    print(f\"{n}th query result\")\n",
+    "    for hit in hits:\n",
+    "        print(hit)\n",
     "\n",
     "# Assemble the context as a stuffed string.\n",
     "context = \"\"\n",
-    "for source, text in formatted_context:\n",
+    "for r in results[0]:\n",
+    "    text = r.entity.chunk\n",
     "    context += f\"{text} \"\n",
     "print(len(context))"
    ]
@@ -588,7 +783,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 17,
    "id": "3e7fa0b6",
    "metadata": {},
    "outputs": [
@@ -596,7 +791,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Question: What is the default AUTOINDEX in Milvus Client?\n",
+      "Question: what is the default distance metric used in AUTOINDEX?\n",
       "Answer: lazy dog\n"
      ]
     }
@@ -629,7 +824,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 18,
    "id": "a68e87b1",
    "metadata": {},
    "outputs": [
@@ -637,7 +832,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Question: What is the default AUTOINDEX in Milvus Client?\n",
+      "Question: what is the default distance metric used in AUTOINDEX?\n",
       "Answer: MetricType.L2\n"
      ]
     }
@@ -662,7 +857,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 19,
    "id": "d0e81e68",
    "metadata": {},
    "outputs": [],
@@ -674,7 +869,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 20,
    "id": "c777937e",
    "metadata": {},
    "outputs": [
@@ -690,9 +885,9 @@
       "\n",
       "torch       : 2.0.1\n",
       "transformers: 4.34.1\n",
-      "milvus      : 2.3.0\n",
-      "pymilvus    : 2.3.0\n",
-      "langchain   : 0.0.301\n",
+      "milvus      : 2.3.3\n",
+      "pymilvus    : 2.3.3\n",
+      "langchain   : 0.0.322\n",
       "\n",
       "conda environment: py310\n",
       "\n"
diff --git a/notebooks/text/imdb_search_milvus_client.ipynb b/notebooks/text/imdb_search_milvus_client.ipynb
index 0d7a482a0..2481eb129 100755
--- a/notebooks/text/imdb_search_milvus_client.ipynb
+++ b/notebooks/text/imdb_search_milvus_client.ipynb
@@ -890,7 +890,9 @@
    "source": [
     "## Insert data into Milvus\n",
     "\n",
-    "We can insert a batch of data directly from a pandas dataframe into Milvus.\n",
+    "Milvus and Milvus Lite support loading pandas dataframes directly.\n",
+    "\n",
+    "Milvus Client, however, requires conerting pandas df into a list of dictionaries first.\n",
     "\n",
     "🤔 TODO: This would be a good place to demonstrate Milvus' scalability by using Ray together with Milvus to run batches in parallel. I'll do this in a future tutorial."
    ]
@@ -1287,10 +1289,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 1,
    "id": "d0e81e68",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'default_server' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m/Users/christybergman/Documents/milvus_bootcamp/bootcamp/notebooks/text/imdb_search_milvus_client.ipynb Cell 42\u001b[0m line \u001b[0;36m2\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/christybergman/Documents/milvus_bootcamp/bootcamp/notebooks/text/imdb_search_milvus_client.ipynb#X56sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Shut down and cleanup the milvus server.\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/christybergman/Documents/milvus_bootcamp/bootcamp/notebooks/text/imdb_search_milvus_client.ipynb#X56sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m default_server\u001b[39m.\u001b[39mstop()\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/christybergman/Documents/milvus_bootcamp/bootcamp/notebooks/text/imdb_search_milvus_client.ipynb#X56sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m default_server\u001b[39m.\u001b[39mcleanup()\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'default_server' is not defined"
+     ]
+    }
+   ],
    "source": [
     "# Shut down and cleanup the milvus server.\n",
     "default_server.stop()\n",