diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index fb127f4fac..74b24e52c7 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ def run(): ) @click.option( "--model_max_batch_size", - default=64, + default=256, type=click.IntRange(min=1), help="Max batch size to use for the model", ) diff --git a/tests/llm/test_vdb_upload_pipe.py b/tests/llm/test_vdb_upload_pipe.py index fb0599f938..c1213a70c3 100644 --- a/tests/llm/test_vdb_upload_pipe.py +++ b/tests/llm/test_vdb_upload_pipe.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +37,7 @@ from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage EMBEDDING_SIZE = 384 -MODEL_MAX_BATCH_SIZE = 64 +MODEL_MAX_BATCH_SIZE = 256 MODEL_FEA_LENGTH = 512 @@ -116,7 +116,7 @@ def test_vdb_upload_pipe(mock_triton_client: mock.MagicMock, "name": "output", "datatype": "FP32", "shape": [-1, EMBEDDING_SIZE] }] } - mock_model_config = {"config": {"max_batch_size": 256}} + mock_model_config = {"config": {"max_batch_size": MODEL_MAX_BATCH_SIZE}} mock_triton_client.return_value = mock_triton_client mock_triton_client.is_server_live.return_value = True