Merge branch 'branch-25.06' into use_cb

nv-morpheus · Mar 4, 2025 · 6884fe0 · 6884fe0
2 parents ab944d3 + 5fe1e7d
commit 6884fe0
Show file tree

Hide file tree

Showing 16 changed files with 1,551 additions and 0 deletions.
diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml
@@ -132,6 +132,7 @@ dependencies:
   - dglgo
   - faiss-cpu
   - google-search-results==2.4
+  - gpudb>=7.2.2.3
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain-openai==0.1.3
   - langchain==0.1.16

diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -138,6 +138,7 @@ dependencies:
   - dglgo
   - faiss-cpu
   - google-search-results==2.4
+  - gpudb>=7.2.2.3
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain-openai==0.1.3
   - langchain==0.1.16

diff --git a/conda/environments/dev_cuda-128_arch-aarch64.yaml b/conda/environments/dev_cuda-128_arch-aarch64.yaml
@@ -107,6 +107,7 @@ dependencies:
   - --extra-index-url https://download.pytorch.org/whl/cu124
   - databricks-cli < 0.100
   - databricks-connect
+  - gpudb>=7.2.2.3
   - pypdfium2==4.30
   - pytest-kafka==0.6.0
   - torch==2.4.0

diff --git a/conda/environments/dev_cuda-128_arch-x86_64.yaml b/conda/environments/dev_cuda-128_arch-x86_64.yaml
@@ -112,6 +112,7 @@ dependencies:
   - --extra-index-url https://download.pytorch.org/whl/cu124
   - databricks-cli < 0.100
   - databricks-connect
+  - gpudb>=7.2.2.3
   - milvus==2.3.5
   - pymilvus==2.3.6
   - pytest-kafka==0.6.0

diff --git a/conda/environments/examples_cuda-128_arch-aarch64.yaml b/conda/environments/examples_cuda-128_arch-aarch64.yaml
@@ -73,6 +73,7 @@ dependencies:
   - dglgo
   - faiss-cpu
   - google-search-results==2.4
+  - gpudb>=7.2.2.3
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain-openai==0.1.3
   - langchain==0.1.16

diff --git a/conda/environments/examples_cuda-128_arch-x86_64.yaml b/conda/environments/examples_cuda-128_arch-x86_64.yaml
@@ -75,6 +75,7 @@ dependencies:
   - dglgo
   - faiss-cpu
   - google-search-results==2.4
+  - gpudb>=7.2.2.3
   - langchain-nvidia-ai-endpoints==0.0.11
   - langchain-openai==0.1.3
   - langchain==0.1.16

diff --git a/conda/environments/runtime_cuda-128_arch-aarch64.yaml b/conda/environments/runtime_cuda-128_arch-aarch64.yaml
@@ -52,5 +52,6 @@ dependencies:
   - --extra-index-url https://download.pytorch.org/whl/cu124
   - databricks-cli < 0.100
   - databricks-connect
+  - gpudb>=7.2.2.3
   - torch==2.4.0
 name: runtime_cuda-128_arch-aarch64
diff --git a/conda/environments/runtime_cuda-128_arch-x86_64.yaml b/conda/environments/runtime_cuda-128_arch-x86_64.yaml
@@ -52,6 +52,7 @@ dependencies:
   - --extra-index-url https://download.pytorch.org/whl/cu124
   - databricks-cli < 0.100
   - databricks-connect
+  - gpudb>=7.2.2.3
   - milvus==2.3.5
   - pymilvus==2.3.6
   - torch==2.4.0+cu124

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -418,6 +418,7 @@ dependencies:
             - &torch-extra-index --extra-index-url https://download.pytorch.org/whl/cu124
             - databricks-cli < 0.100
             - databricks-connect
+            - &gpudb gpudb>=7.2.2.3
     specific:
       - output_types: [conda]
         matrices:
@@ -485,6 +486,7 @@ dependencies:
         packages:
            - &faiss-cpu faiss-cpu
            - &google-search-results google-search-results==2.4
+           - *gpudb
            - &langchain langchain==0.1.16
            - &langchain-nvidia-ai-endpoints langchain-nvidia-ai-endpoints==0.0.11
            - &langchain-openai langchain-openai==0.1.3

diff --git a/examples/llm/kinetica_pipeline/kinetica_pipeline_example.py b/examples/llm/kinetica_pipeline/kinetica_pipeline_example.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+import random
+
+import cudf
+
+from morpheus.config import Config
+from morpheus.config import ExecutionMode
+from morpheus.messages import ControlMessage
+from morpheus.modules import to_control_message  # noqa: F401 # pylint: disable=unused-import
+from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.general.linear_modules_stage import LinearModulesStage
+from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage
+from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE
+from morpheus.utils.module_ids import TO_CONTROL_MESSAGE
+from morpheus_llm.service.vdb.kinetica_vector_db_service import KineticaVectorDBService
+from morpheus_llm.stages.output.write_to_vector_db_stage import WriteToVectorDBStage
+
+logger = logging.getLogger(__name__)
+
+
+def get_test_df(num_input_rows):
+    df = cudf.DataFrame({
+        "id": list(range(num_input_rows)),
+        "embeddings": [[random.random() for _ in range(3)] for _ in range(num_input_rows)],
+        "metadata": [json.dumps({"metadata": f"Sample metadata for row {i}"}) for i in range(num_input_rows)],
+    })
+
+    return df
+
+
+def main():
+    host = os.getenv("kinetica_host", "http://localhost:9191")
+    username = os.getenv("username", "")
+    password = os.getenv("password", "")
+    schema = os.getenv("schema", "")
+
+    config = Config()
+    config.execution_mode = ExecutionMode.GPU
+
+    kinetica_db_service = KineticaVectorDBService(host, user=username, password=password, kinetica_schema=schema)
+    collection_name = "test_collection"
+    collection_name = f"{schema}.{collection_name}" if schema is not None and len(
+        schema) > 0 else f"ki_home.{collection_name}"
+
+    columns = [
+        ["id", "long", "primary_key"],
+        ["embeddings", "bytes", "vector(3)"],
+        ["metadata", "string", "json"],
+    ]
+    kinetica_db_service.create(collection_name, type=columns)
+
+    df = get_test_df(10)
+    to_cm_module_config = {
+        "module_id": TO_CONTROL_MESSAGE, "module_name": "to_control_message", "namespace": MORPHEUS_MODULE_NAMESPACE
+    }
+
+    # Step 1: Create a pipeline
+    pipeline = LinearPipeline(config)
+    pipeline.set_source(InMemorySourceStage(config, [df]))
+    pipeline.add_stage(
+        LinearModulesStage(config,
+                           to_cm_module_config,
+                           input_port_name="input",
+                           output_port_name="output",
+                           output_type=ControlMessage))
+
+    pipeline.add_stage(
+        WriteToVectorDBStage(
+            config,
+            kinetica_db_service,
+            "test_collection"
+        )
+    )
+
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ markers = [
   "benchmark: Benchmarks",
   "slow: Slow tests",
   "kafka: Tests that require a running instance of kafka",
+  "kinetica: Tests that require a running instance of Kinetica",
   "milvus: Tests that require a running instance of milvus",
   "gpu_mode: Test support GPU nodes and objects",
   "cpu_mode: Test only supports CPU nodes and objects",

diff --git a/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm_arch-aarch64.txt b/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm_arch-aarch64.txt
@@ -3,6 +3,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu124
 faiss-cpu
 google-search-results==2.4
+gpudb>=7.2.2.3
 langchain-nvidia-ai-endpoints==0.0.11
 langchain-openai==0.1.3
 langchain==0.1.16

diff --git a/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm_arch-x86_64.txt b/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm_arch-x86_64.txt
@@ -3,6 +3,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu124
 faiss-cpu
 google-search-results==2.4
+gpudb>=7.2.2.3
 langchain-nvidia-ai-endpoints==0.0.11
 langchain-openai==0.1.3
 langchain==0.1.16