Skip to content

Commit

Permalink
Parametrizes tests instead of running loops
Browse files Browse the repository at this point in the history
  • Loading branch information
pranavm-nvidia committed Nov 23, 2024
1 parent 488fca7 commit 8cced02
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 99 deletions.
63 changes: 48 additions & 15 deletions python/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def test_memory_fallback_to_cpu(self, server_options):
for response in server.model("test").infer(
inputs={"fp16_input": fp16_input},
):
assert response.outputs["fp16_output"].memory_type == tritonserver.MemoryType.CPU
assert (
response.outputs["fp16_output"].memory_type
== tritonserver.MemoryType.CPU
)
fp16_output = numpy.from_dlpack(response.outputs["fp16_output"])
assert fp16_input[0][0] == fp16_output[0][0]

Expand All @@ -145,7 +148,9 @@ def test_memory_allocator_exception(self, server_options):

with pytest.raises(tritonserver.InternalError):
for response in server.model("test").infer(
inputs={"string_input": tritonserver.Tensor.from_string_array([["hello"]])},
inputs={
"string_input": tritonserver.Tensor.from_string_array([["hello"]])
},
output_memory_type="gpu",
output_memory_allocator=TestAllocators.MockMemoryAllocator(),
):
Expand All @@ -169,35 +174,45 @@ def test_unsupported_memory_type(self, server_options):
)

if tritonserver.MemoryType.GPU in tritonserver.default_memory_allocators:
allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]
allocator = tritonserver.default_memory_allocators[
tritonserver.MemoryType.GPU
]

del tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]
else:
allocator = None

with pytest.raises(tritonserver.InvalidArgumentError):
for response in server.model("test").infer(
inputs={"string_input": tritonserver.Tensor.from_string_array([["hello"]])},
inputs={
"string_input": tritonserver.Tensor.from_string_array([["hello"]])
},
output_memory_type="gpu",
):
pass

if allocator is not None:
tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU] = allocator
tritonserver.default_memory_allocators[
tritonserver.MemoryType.GPU
] = allocator

@pytest.mark.skipif(torch is None, reason="Skipping test, torch not installed")
def test_allocate_on_cpu_and_reshape(self):
allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.CPU]

memory_buffer = allocator.allocate(memory_type=tritonserver.MemoryType.CPU, memory_type_id=0, size=200)
memory_buffer = allocator.allocate(
memory_type=tritonserver.MemoryType.CPU, memory_type_id=0, size=200
)

cpu_array = memory_buffer.owner

assert memory_buffer.size == 200

fp32_size = int(memory_buffer.size / 4)

tensor = tritonserver.Tensor(tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer)
tensor = tritonserver.Tensor(
tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer
)

cpu_fp32_array = numpy.from_dlpack(tensor)
assert cpu_array.ctypes.data == cpu_fp32_array.ctypes.data
Expand All @@ -209,7 +224,9 @@ def test_allocate_on_cpu_and_reshape(self):
def test_allocate_on_gpu_and_reshape(self):
allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]

memory_buffer = allocator.allocate(memory_type=tritonserver.MemoryType.GPU, memory_type_id=0, size=200)
memory_buffer = allocator.allocate(
memory_type=tritonserver.MemoryType.GPU, memory_type_id=0, size=200
)

gpu_array = memory_buffer.owner

Expand All @@ -220,17 +237,25 @@ def test_allocate_on_gpu_and_reshape(self):

fp32_size = int(memory_buffer.size / 4)

tensor = tritonserver.Tensor(tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer)
tensor = tritonserver.Tensor(
tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer
)

gpu_fp32_array = cupy.from_dlpack(tensor)
assert gpu_array.__cuda_array_interface__["data"][0] == gpu_fp32_array.__cuda_array_interface__["data"][0]
assert (
gpu_array.__cuda_array_interface__["data"][0]
== gpu_fp32_array.__cuda_array_interface__["data"][0]
)

assert gpu_fp32_array.dtype == cupy.float32
assert gpu_fp32_array.nbytes == 200

torch_fp32_tensor = torch.from_dlpack(tensor)
assert torch_fp32_tensor.dtype == torch.float32
assert torch_fp32_tensor.data_ptr() == gpu_array.__cuda_array_interface__["data"][0]
assert (
torch_fp32_tensor.data_ptr()
== gpu_array.__cuda_array_interface__["data"][0]
)
assert torch_fp32_tensor.nbytes == 200


Expand All @@ -250,7 +275,9 @@ def test_cpu_to_gpu(self):

assert gpu_array.__cuda_array_interface__["data"][0] == memory_buffer.data_ptr

@pytest.mark.skipif(torch is None, reason="Skipping gpu memory, torch not installed")
@pytest.mark.skipif(
torch is None, reason="Skipping gpu memory, torch not installed"
)
@pytest.mark.skipif(cupy is None, reason="Skipping gpu memory, cupy not installed")
def test_gpu_tensor_from_dl_pack(self):
cupy_array = cupy.ones([100]).astype(cupy.float64)
Expand Down Expand Up @@ -307,7 +334,9 @@ def test_stop(self, server_options):
"parameters": {"decoupled": {"string_value": "False"}},
# Keep instance count low for fast startup/cleanup.
# Alternatively can use KIND_CPU here, but keeping gpus/count explicit.
"instance_group": [{"kind": "KIND_GPU", "gpus": [0], "count": 1}],
"instance_group": [
{"kind": "KIND_GPU", "gpus": [0], "count": 1}
],
}
)
},
Expand Down Expand Up @@ -414,7 +443,9 @@ def test_basic_inference(self, server_options):
raise_on_error=True,
):
for input_name, input_value in inputs.items():
output_value = numpy.from_dlpack(response.outputs[input_name.replace("input", "output")])
output_value = numpy.from_dlpack(
response.outputs[input_name.replace("input", "output")]
)
numpy.testing.assert_array_equal(input_value, output_value)

def test_parameters(self, server_options):
Expand Down Expand Up @@ -450,7 +481,9 @@ def test_parameters(self, server_options):
):
fp16_output = numpy.from_dlpack(response.outputs["fp16_output"])
numpy.testing.assert_array_equal(fp16_input, fp16_output)
output_parameters = json.loads(response.outputs["output_parameters"].to_string_array()[0])
output_parameters = json.loads(
response.outputs["output_parameters"].to_string_array()[0]
)
assert input_parameters == output_parameters

with pytest.raises(tritonserver.InvalidArgumentError):
Expand Down
Loading

0 comments on commit 8cced02

Please sign in to comment.