Parametrizes tests instead of running loops

triton-inference-server · Nov 23, 2024 · 8cced02 · 8cced02
1 parent 488fca7
commit 8cced02
Show file tree

Hide file tree

Showing 2 changed files with 231 additions and 99 deletions.
diff --git a/python/test/test_api.py b/python/test/test_api.py
@@ -120,7 +120,10 @@ def test_memory_fallback_to_cpu(self, server_options):
         for response in server.model("test").infer(
             inputs={"fp16_input": fp16_input},
         ):
-            assert response.outputs["fp16_output"].memory_type == tritonserver.MemoryType.CPU
+            assert (
+                response.outputs["fp16_output"].memory_type
+                == tritonserver.MemoryType.CPU
+            )
             fp16_output = numpy.from_dlpack(response.outputs["fp16_output"])
             assert fp16_input[0][0] == fp16_output[0][0]
 
@@ -145,7 +148,9 @@ def test_memory_allocator_exception(self, server_options):
 
         with pytest.raises(tritonserver.InternalError):
             for response in server.model("test").infer(
-                inputs={"string_input": tritonserver.Tensor.from_string_array([["hello"]])},
+                inputs={
+                    "string_input": tritonserver.Tensor.from_string_array([["hello"]])
+                },
                 output_memory_type="gpu",
                 output_memory_allocator=TestAllocators.MockMemoryAllocator(),
             ):
@@ -169,35 +174,45 @@ def test_unsupported_memory_type(self, server_options):
         )
 
         if tritonserver.MemoryType.GPU in tritonserver.default_memory_allocators:
-            allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]
+            allocator = tritonserver.default_memory_allocators[
+                tritonserver.MemoryType.GPU
+            ]
 
             del tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]
         else:
             allocator = None
 
         with pytest.raises(tritonserver.InvalidArgumentError):
             for response in server.model("test").infer(
-                inputs={"string_input": tritonserver.Tensor.from_string_array([["hello"]])},
+                inputs={
+                    "string_input": tritonserver.Tensor.from_string_array([["hello"]])
+                },
                 output_memory_type="gpu",
             ):
                 pass
 
         if allocator is not None:
-            tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU] = allocator
+            tritonserver.default_memory_allocators[
+                tritonserver.MemoryType.GPU
+            ] = allocator
 
     @pytest.mark.skipif(torch is None, reason="Skipping test, torch not installed")
     def test_allocate_on_cpu_and_reshape(self):
         allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.CPU]
 
-        memory_buffer = allocator.allocate(memory_type=tritonserver.MemoryType.CPU, memory_type_id=0, size=200)
+        memory_buffer = allocator.allocate(
+            memory_type=tritonserver.MemoryType.CPU, memory_type_id=0, size=200
+        )
 
         cpu_array = memory_buffer.owner
 
         assert memory_buffer.size == 200
 
         fp32_size = int(memory_buffer.size / 4)
 
-        tensor = tritonserver.Tensor(tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer)
+        tensor = tritonserver.Tensor(
+            tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer
+        )
 
         cpu_fp32_array = numpy.from_dlpack(tensor)
         assert cpu_array.ctypes.data == cpu_fp32_array.ctypes.data
@@ -209,7 +224,9 @@ def test_allocate_on_cpu_and_reshape(self):
     def test_allocate_on_gpu_and_reshape(self):
         allocator = tritonserver.default_memory_allocators[tritonserver.MemoryType.GPU]
 
-        memory_buffer = allocator.allocate(memory_type=tritonserver.MemoryType.GPU, memory_type_id=0, size=200)
+        memory_buffer = allocator.allocate(
+            memory_type=tritonserver.MemoryType.GPU, memory_type_id=0, size=200
+        )
 
         gpu_array = memory_buffer.owner
 
@@ -220,17 +237,25 @@ def test_allocate_on_gpu_and_reshape(self):
 
         fp32_size = int(memory_buffer.size / 4)
 
-        tensor = tritonserver.Tensor(tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer)
+        tensor = tritonserver.Tensor(
+            tritonserver.DataType.FP32, shape=[fp32_size], memory_buffer=memory_buffer
+        )
 
         gpu_fp32_array = cupy.from_dlpack(tensor)
-        assert gpu_array.__cuda_array_interface__["data"][0] == gpu_fp32_array.__cuda_array_interface__["data"][0]
+        assert (
+            gpu_array.__cuda_array_interface__["data"][0]
+            == gpu_fp32_array.__cuda_array_interface__["data"][0]
+        )
 
         assert gpu_fp32_array.dtype == cupy.float32
         assert gpu_fp32_array.nbytes == 200
 
         torch_fp32_tensor = torch.from_dlpack(tensor)
         assert torch_fp32_tensor.dtype == torch.float32
-        assert torch_fp32_tensor.data_ptr() == gpu_array.__cuda_array_interface__["data"][0]
+        assert (
+            torch_fp32_tensor.data_ptr()
+            == gpu_array.__cuda_array_interface__["data"][0]
+        )
         assert torch_fp32_tensor.nbytes == 200
 
 
@@ -250,7 +275,9 @@ def test_cpu_to_gpu(self):
 
         assert gpu_array.__cuda_array_interface__["data"][0] == memory_buffer.data_ptr
 
-    @pytest.mark.skipif(torch is None, reason="Skipping gpu memory, torch not installed")
+    @pytest.mark.skipif(
+        torch is None, reason="Skipping gpu memory, torch not installed"
+    )
     @pytest.mark.skipif(cupy is None, reason="Skipping gpu memory, cupy not installed")
     def test_gpu_tensor_from_dl_pack(self):
         cupy_array = cupy.ones([100]).astype(cupy.float64)
@@ -307,7 +334,9 @@ def test_stop(self, server_options):
                         "parameters": {"decoupled": {"string_value": "False"}},
                         # Keep instance count low for fast startup/cleanup.
                         # Alternatively can use KIND_CPU here, but keeping gpus/count explicit.
-                        "instance_group": [{"kind": "KIND_GPU", "gpus": [0], "count": 1}],
+                        "instance_group": [
+                            {"kind": "KIND_GPU", "gpus": [0], "count": 1}
+                        ],
                     }
                 )
             },
@@ -414,7 +443,9 @@ def test_basic_inference(self, server_options):
             raise_on_error=True,
         ):
             for input_name, input_value in inputs.items():
-                output_value = numpy.from_dlpack(response.outputs[input_name.replace("input", "output")])
+                output_value = numpy.from_dlpack(
+                    response.outputs[input_name.replace("input", "output")]
+                )
                 numpy.testing.assert_array_equal(input_value, output_value)
 
     def test_parameters(self, server_options):
@@ -450,7 +481,9 @@ def test_parameters(self, server_options):
         ):
             fp16_output = numpy.from_dlpack(response.outputs["fp16_output"])
             numpy.testing.assert_array_equal(fp16_input, fp16_output)
-            output_parameters = json.loads(response.outputs["output_parameters"].to_string_array()[0])
+            output_parameters = json.loads(
+                response.outputs["output_parameters"].to_string_array()[0]
+            )
             assert input_parameters == output_parameters
 
         with pytest.raises(tritonserver.InvalidArgumentError):