Skip to content

Commit

Permalink
Add additional outputs test
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed Nov 2, 2024
1 parent 58ee481 commit 264d387
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 1 deletion.
180 changes: 180 additions & 0 deletions ci/L0_vllm_additional_outputs/additional_outputs_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import unittest

import numpy as np
import tritonclient.grpc as grpcclient


class InferTest(unittest.TestCase):
_grpc_url = "localhost:8001"
_model_name = "vllm_opt"
_sampling_parameters = {"temperature": "0", "top_p": "1"}
_prompt = "In this example,"

def _get_inputs(
self,
prompt,
stream=True,
sampling_parameters=None,
output_finish_reason=None,
output_cumulative_logprob=None,
output_num_token_ids=None,
):
inputs = []

inputs.append(grpcclient.InferInput("text_input", [1], "BYTES"))
inputs[-1].set_data_from_numpy(
np.array([prompt.encode("utf-8")], dtype=np.object_)
)

inputs.append(grpcclient.InferInput("stream", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([stream], dtype=bool))

if sampling_parameters is not None:
inputs.append(grpcclient.InferInput("sampling_parameters", [1], "BYTES"))
inputs[-1].set_data_from_numpy(
np.array(
[json.dumps(sampling_parameters).encode("utf-8")], dtype=np.object_
)
)

if output_finish_reason is not None:
inputs.append(grpcclient.InferInput("output_finish_reason", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([output_finish_reason], dtype=bool))

if output_cumulative_logprob is not None:
inputs.append(
grpcclient.InferInput("output_cumulative_logprob", [1], "BOOL")
)
inputs[-1].set_data_from_numpy(
np.array([output_cumulative_logprob], dtype=bool)
)

if output_num_token_ids is not None:
inputs.append(grpcclient.InferInput("output_num_token_ids", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([output_num_token_ids], dtype=bool))

return inputs

def _callback(self, result, error):
self._responses.append({"result": result, "error": error})

def _llm_infer(self, inputs):
self._responses = []
with grpcclient.InferenceServerClient(self._grpc_url) as client:
client.start_stream(self._callback)
client.async_stream_infer(
self._model_name, inputs=inputs, parameters=self._sampling_parameters
)
client.stop_stream()

def _assert_text_output_valid(self):
text_output = ""
for response in self._responses:
result, error = response["result"], response["error"]
self.assertIsNone(error)
text_output += result.as_numpy(name="text_output")[0].decode("utf-8")
self.assertGreater(len(text_output), 0, "output is empty")
self.assertGreater(text_output.count(" "), 4, "output is not a sentence")

def _assert_finish_reason(self, output_finish_reason):
for i in range(len(self._responses)):
result, error = self._responses[i]["result"], self._responses[i]["error"]
self.assertIsNone(error)
finish_reason_np = result.as_numpy(name="finish_reason")
if output_finish_reason is None or output_finish_reason == False:
self.assertIsNone(finish_reason_np)
continue
finish_reason = finish_reason_np[0].decode("utf-8")
if i < len(self._responses) - 1:
self.assertEqual(finish_reason, "None")
else:
self.assertEqual(finish_reason, "length")

def _assert_cumulative_logprob(self, output_cumulative_logprob):
prev_cumulative_logprob = 0.0
for response in self._responses:
result, error = response["result"], response["error"]
self.assertIsNone(error)
cumulative_logprob_np = result.as_numpy(name="cumulative_logprob")
if output_cumulative_logprob is None or output_cumulative_logprob == False:
self.assertIsNone(cumulative_logprob_np)
continue
cumulative_logprob = cumulative_logprob_np[0].astype(float)
self.assertNotEqual(cumulative_logprob, prev_cumulative_logprob)
prev_cumulative_logprob = cumulative_logprob

def _assert_num_token_ids(self, output_num_token_ids):
for response in self._responses:
result, error = response["result"], response["error"]
self.assertIsNone(error)
num_token_ids_np = result.as_numpy(name="num_token_ids")
if output_num_token_ids is None or output_num_token_ids == False:
self.assertIsNone(num_token_ids_np)
continue
num_token_ids = num_token_ids_np[0].astype(int)
self.assertGreater(num_token_ids, 0)

def _assert_additional_outputs_valid(
self,
stream,
output_finish_reason,
output_cumulative_logprob,
output_num_token_ids,
):
inputs = self._get_inputs(
self._prompt,
stream=stream,
sampling_parameters=self._sampling_parameters,
output_finish_reason=output_finish_reason,
output_cumulative_logprob=output_cumulative_logprob,
output_num_token_ids=output_num_token_ids,
)
self._llm_infer(inputs)
self._assert_text_output_valid()
self._assert_finish_reason(output_finish_reason)
self._assert_cumulative_logprob(output_cumulative_logprob)
self._assert_num_token_ids(output_num_token_ids)

def test_additional_outputs(self):
for stream in [True, False]:
choices = [None, False, True]
for output_finish_reason in choices:
for output_cumulative_logprob in choices:
for output_num_token_ids in choices:
self._assert_additional_outputs_valid(
stream,
output_finish_reason,
output_cumulative_logprob,
output_num_token_ids,
)


if __name__ == "__main__":
unittest.main()
67 changes: 67 additions & 0 deletions ci/L0_vllm_additional_outputs/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

export CUDA_VISIBLE_DEVICES=0
source ../common/util.sh

pip3 install tritonclient[grpc]

# Prepare Model
rm -rf models vllm_baseline_output.pkl && mkdir -p models
SAMPLE_MODELS_REPO="../../samples/model_repository"
cp -r $SAMPLE_MODELS_REPO/vllm_model models/vllm_opt
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.3/' models/vllm_opt/1/model.json

RET=0

# Infer Test
CLIENT_LOG="vllm_opt.log"
SERVER_LOG="vllm_opt.server.log"
SERVER_ARGS="--model-repository=models"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi
set +e
python3 additional_outputs_test.py > $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** additional_outputs_test FAILED. \n***"
RET=1
fi
set -e
kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
echo -e "\n***\n*** Test FAILED\n***"
fi
exit $RET
2 changes: 1 addition & 1 deletion ci/common/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


SERVER=${SERVER:=/opt/tritonserver/bin/tritonserver}
SERVER_IPADDR=${TRITONSERVER_IPADDR:=localhost}
SERVER_LOG=${SERVER_LOG:=./server.log}
SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
Expand Down

0 comments on commit 264d387

Please sign in to comment.