From c52bbc5a8c82a340e1fd941817020c8dbfa1d734 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Thu, 24 Aug 2023 14:02:25 -0700 Subject: [PATCH 1/9] Initial commit --- include/triton/core/tritonbackend.h | 14 ++++++++++++++ src/backend_model.cc | 15 +++++++++++++++ src/infer_response.cc | 22 ++++++++++++++++++++++ src/infer_response.h | 6 ++++++ src/tritonserver_stub.cc | 5 +++++ 5 files changed, 62 insertions(+) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index de0fffb8d..a463c2cb1 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1569,6 +1569,20 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchInitialize( TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchFinalize( void* userp); +/// Accessor function to obtain the shape and data type of a particular output. +/// +/// \param response The response. +/// \param name The name of the output tensor to extract shape and data +/// type information. +/// \param datatype Returns the tensor datatype. +/// \param shape Returns the tensor shape. +/// \param dim_count Returns the number of dimensions in the tensor shape. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_OutputShapeAndDType( + TRITONBACKEND_Response* response, const char* name, + TRITONSERVER_DataType* datatype, const int64_t** shape, + uint32_t* dim_count); + #ifdef __cplusplus } #endif diff --git a/src/backend_model.cc b/src/backend_model.cc index 996ce6fd6..6dbf337cd 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1753,6 +1753,21 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading( return nullptr; } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_OutputShapeAndDType( + TRITONBACKEND_Response* response, const char* name, + TRITONSERVER_DataType* datatype, const int64_t** shape, uint32_t* dim_count) +{ + InferenceResponse* tr = reinterpret_cast(response); + Status status = tr->OutputShapeAndDType(name, datatype, shape, dim_count); + if (!status.IsOk()) { + return TRITONSERVER_ErrorNew( + StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str()); + } + + return nullptr; // success +} + } // extern C }} // namespace triton::core diff --git a/src/infer_response.cc b/src/infer_response.cc index 2a8f2af2e..252dfbaa4 100644 --- a/src/infer_response.cc +++ b/src/infer_response.cc @@ -275,6 +275,28 @@ InferenceResponse::TraceOutputTensors( } #endif // TRITON_ENABLE_TRACING +Status +InferenceResponse::OutputShapeAndDType( + const char* tensor_name, TRITONSERVER_DataType* datatype, + const int64_t** shape, uint64_t* dim_count) +{ + const auto& outputs = this->Outputs(); + uint32_t output_count = outputs.size(); + std::string output_name = std::string(name); + + for (uint32_t idx = 0; idx < output_count; ++idx) { + if (outputs[idx].Name() == output_name) { + const std::vector& output_shape = outputs[idx].Shape(); + *shape = &output_shape[0]; + *dim_count = output_shape.size(); + *datatype = DataTypeToTriton(outputs[idx].DType()); + return Status::Success; + } + } + return Status( + Status::Code::NOT_FOUND, "Output name '" + output_name + "' not found"); +} + // // InferenceResponse::Output // diff --git a/src/infer_response.h b/src/infer_response.h index 5632c0f84..a9c54e813 100644 --- a/src/infer_response.h +++ b/src/infer_response.h @@ -284,6 +284,12 @@ class InferenceResponse { std::unique_ptr&& response, const uint32_t flags, const Status& status); + // Get the data type, shape, and dimensionality associated with + // an output. Return error status if tensor is not found. + Status OutputShapeAndDType( + const char* name, TRITONSERVER_DataType* datatype, const int64_t** shape, + uint64_t* dim_count); + #ifdef TRITON_ENABLE_TRACING const std::shared_ptr& Trace() const { return trace_; } void SetTrace(const std::shared_ptr& trace) diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc index b0081a0a2..73e1066b6 100644 --- a/src/tritonserver_stub.cc +++ b/src/tritonserver_stub.cc @@ -1078,4 +1078,9 @@ TRITONCACHE_Copy() { } +TRITONAPI_DECLSPEC void +TRITONBACKEND_OutputShapeAndDType() +{ +} + } /* extern "C" */ From 6604d848fd88d94a67800e795677865230aa95bf Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 25 Aug 2023 16:02:06 -0700 Subject: [PATCH 2/9] Expanding the scope of the APIs to return more output details --- include/triton/core/tritonbackend.h | 68 +++++++++++++++++++++++------ src/backend_model.cc | 66 ++++++++++++++++++++++++++-- src/infer_response.cc | 22 ---------- src/infer_response.h | 6 --- 4 files changed, 117 insertions(+), 45 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index a463c2cb1..fe9ae2bb3 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1569,19 +1569,61 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchInitialize( TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchFinalize( void* userp); -/// Accessor function to obtain the shape and data type of a particular output. -/// -/// \param response The response. -/// \param name The name of the output tensor to extract shape and data -/// type information. -/// \param datatype Returns the tensor datatype. -/// \param shape Returns the tensor shape. -/// \param dim_count Returns the number of dimensions in the tensor shape. -/// \return a TRITONSERVER_Error indicating success or failure. -TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_OutputShapeAndDType( - TRITONBACKEND_Response* response, const char* name, - TRITONSERVER_DataType* datatype, const int64_t** shape, - uint32_t* dim_count); +/// Get all information about an output tensor by name. The tensor data is +/// returned as the base pointer to the data and the size, in bytes, of the +/// data. The caller does not own any of the returned values and must not +/// modify or delete them. The lifetime of all returned values extends until +/// 'response' is deleted. +/// +/// \param response The response object. +/// \param name The name of the output. +/// \param datatype Returns the type of the output. +/// \param shape Returns the shape of the output. +/// \param dim_count Returns the number of dimensions of the returned +/// shape. +/// \param base Returns the tensor data for the output. +/// \param byte_size Returns the size, in bytes, of the data. +/// \param memory_type Returns the memory type of the data. +/// \param memory_type_id Returns the memory type id of the data. +/// \param userp The user-specified value associated with the buffer +/// in TRITONSERVER_ResponseAllocatorAllocFn_t. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByName( + TRITONBACKEND_Response* response, + const char* name, TRITONSERVER_DataType* datatype, + const int64_t** shape, uint64_t* dim_count, const void** base, + size_t* byte_size, TRITONSERVER_MemoryType* memory_type, + int64_t* memory_type_id, void** userp); + +/// Get all information about an output tensor by index. The tensor data is +/// returned as the base pointer to the data and the size, in bytes, of the +/// data. The caller does not own any of the returned values and must not +/// modify or delete them. The lifetime of all returned values extends +/// until 'response' is deleted. +/// +/// \param response The response object. +/// \param index The index of the output tensor, must be 0 <= index < +/// count, where 'count' is the value returned by +/// TRITONSERVER_InferenceResponseOutputCount. +/// \param name Returns the name of the output. +/// \param datatype Returns the type of the output. +/// \param shape Returns the shape of the output. +/// \param dim_count Returns the number of dimensions of the returned +/// shape. +/// \param base Returns the tensor data for the output. +/// \param byte_size Returns the size, in bytes, of the data. +/// \param memory_type Returns the memory type of the data. +/// \param memory_type_id Returns the memory type id of the data. +/// \param userp The user-specified value associated with the buffer +/// in TRITONSERVER_ResponseAllocatorAllocFn_t. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONSERVER_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_InferenceResponseOutput( + TRITONBACKEND_Response* response, + const uint32_t index, const char** name, TRITONSERVER_DataType* datatype, + const int64_t** shape, uint64_t* dim_count, const void** base, + size_t* byte_size, TRITONSERVER_MemoryType* memory_type, + int64_t* memory_type_id, void** userp); #ifdef __cplusplus } diff --git a/src/backend_model.cc b/src/backend_model.cc index 6dbf337cd..05a04f6ab 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1753,14 +1753,72 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading( return nullptr; } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_InferenceResponseOutputByName( + TRITONBACKEND_Response* response, + const char* name, TRITONSERVER_DataType* datatype, + const int64_t** shape, uint64_t* dim_count, const void** base, + size_t* byte_size, TRITONSERVER_MemoryType* memory_type, + int64_t* memory_type_id, void** userp) +{ + InferenceResponse* tr = reinterpret_cast(response); + + const auto& outputs = tr->Outputs(); + uint32_t output_count = outputs.size(); + std::string output_name = std::string(name); + + for (uint32_t idx = 0; idx < output_count; ++idx) { + if (outputs[idx].Name() == output_name) { + *datatype = DataTypeToTriton(outputs[idx].DType()); + const std::vector& oshape = outputs[idx].Shape(); + *shape = &oshape[0]; + *dim_count = oshape.size(); + Status status = outputs[idx].DataBuffer(base, byte_size, memory_type, memory_type_id, userp); + if (!status.IsOk()) { + *base = nullptr; + *byte_size = 0; + return TRITONSERVER_ErrorNew( + StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str()); + } + return nullptr; // success + } + } + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_NOT_FOUND, + ("Output name " + output_name + "not found.").c_str()); + +} + TRITONAPI_DECLSPEC TRITONSERVER_Error* -TRITONBACKEND_OutputShapeAndDType( - TRITONBACKEND_Response* response, const char* name, - TRITONSERVER_DataType* datatype, const int64_t** shape, uint32_t* dim_count) +TRITONBACKEND_InferenceResponseOutput( + TRITONBACKEND_Response* response, const uint32_t index, + const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape, + uint64_t* dim_count, const void** base, size_t* byte_size, + TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp) { InferenceResponse* tr = reinterpret_cast(response); - Status status = tr->OutputShapeAndDType(name, datatype, shape, dim_count); + + const auto& outputs = tr->Outputs(); + if (index >= outputs.size()) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INVALID_ARG, + ("out of bounds index " + std::to_string(index) + + std::string(": response has ") + std::to_string(outputs.size()) + + " outputs").c_str()); + } + + const InferenceResponse::Output& output = outputs[index]; + + *name = output.Name().c_str() + *datatype = DataTypeToTriton(output.DType()); + + const std::vector& oshape = output.Shape(); + *shape = &oshape[0]; + *dim_count = oshape.size(); + Status status = output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp); if (!status.IsOk()) { + *base = nullptr; + *byte_size = 0; return TRITONSERVER_ErrorNew( StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str()); } diff --git a/src/infer_response.cc b/src/infer_response.cc index 252dfbaa4..2a8f2af2e 100644 --- a/src/infer_response.cc +++ b/src/infer_response.cc @@ -275,28 +275,6 @@ InferenceResponse::TraceOutputTensors( } #endif // TRITON_ENABLE_TRACING -Status -InferenceResponse::OutputShapeAndDType( - const char* tensor_name, TRITONSERVER_DataType* datatype, - const int64_t** shape, uint64_t* dim_count) -{ - const auto& outputs = this->Outputs(); - uint32_t output_count = outputs.size(); - std::string output_name = std::string(name); - - for (uint32_t idx = 0; idx < output_count; ++idx) { - if (outputs[idx].Name() == output_name) { - const std::vector& output_shape = outputs[idx].Shape(); - *shape = &output_shape[0]; - *dim_count = output_shape.size(); - *datatype = DataTypeToTriton(outputs[idx].DType()); - return Status::Success; - } - } - return Status( - Status::Code::NOT_FOUND, "Output name '" + output_name + "' not found"); -} - // // InferenceResponse::Output // diff --git a/src/infer_response.h b/src/infer_response.h index a9c54e813..5632c0f84 100644 --- a/src/infer_response.h +++ b/src/infer_response.h @@ -284,12 +284,6 @@ class InferenceResponse { std::unique_ptr&& response, const uint32_t flags, const Status& status); - // Get the data type, shape, and dimensionality associated with - // an output. Return error status if tensor is not found. - Status OutputShapeAndDType( - const char* name, TRITONSERVER_DataType* datatype, const int64_t** shape, - uint64_t* dim_count); - #ifdef TRITON_ENABLE_TRACING const std::shared_ptr& Trace() const { return trace_; } void SetTrace(const std::shared_ptr& trace) From f3b7b4b9e4c9edf1a9b1d4c35b2a8aa3ea83eedf Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 25 Aug 2023 16:04:34 -0700 Subject: [PATCH 3/9] Add back test semicolon and format --- include/triton/core/tritonbackend.h | 20 +++++++-------- src/backend_model.cc | 40 +++++++++++++++-------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index fe9ae2bb3..5e098b5fc 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1588,11 +1588,11 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchFinalize( /// \param userp The user-specified value associated with the buffer /// in TRITONSERVER_ResponseAllocatorAllocFn_t. /// \return a TRITONSERVER_Error indicating success or failure. -TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByName( - TRITONBACKEND_Response* response, - const char* name, TRITONSERVER_DataType* datatype, - const int64_t** shape, uint64_t* dim_count, const void** base, - size_t* byte_size, TRITONSERVER_MemoryType* memory_type, +TRITONBACKEND_ISPEC TRITONSERVER_Error* +TRITONBACKEND_InferenceResponseOutputByName( + TRITONBACKEND_Response* response, const char* name, + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, + const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp); /// Get all information about an output tensor by index. The tensor data is @@ -1617,12 +1617,10 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByN /// \param userp The user-specified value associated with the buffer /// in TRITONSERVER_ResponseAllocatorAllocFn_t. /// \return a TRITONSERVER_Error indicating success or failure. -TRITONSERVER_DECLSPEC TRITONSERVER_Error* -TRITONBACKEND_InferenceResponseOutput( - TRITONBACKEND_Response* response, - const uint32_t index, const char** name, TRITONSERVER_DataType* datatype, - const int64_t** shape, uint64_t* dim_count, const void** base, - size_t* byte_size, TRITONSERVER_MemoryType* memory_type, +TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutput( + TRITONBACKEND_Response* response, const uint32_t index, const char** name, + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, + const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp); #ifdef __cplusplus diff --git a/src/backend_model.cc b/src/backend_model.cc index 05a04f6ab..55ea2958a 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1753,12 +1753,11 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading( return nullptr; } -TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByName( - TRITONBACKEND_Response* response, - const char* name, TRITONSERVER_DataType* datatype, - const int64_t** shape, uint64_t* dim_count, const void** base, - size_t* byte_size, TRITONSERVER_MemoryType* memory_type, + TRITONBACKEND_Response* response, const char* name, + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, + const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp) { InferenceResponse* tr = reinterpret_cast(response); @@ -1773,28 +1772,29 @@ TRITONBACKEND_InferenceResponseOutputByName( const std::vector& oshape = outputs[idx].Shape(); *shape = &oshape[0]; *dim_count = oshape.size(); - Status status = outputs[idx].DataBuffer(base, byte_size, memory_type, memory_type_id, userp); + Status status = outputs[idx].DataBuffer( + base, byte_size, memory_type, memory_type_id, userp); if (!status.IsOk()) { *base = nullptr; *byte_size = 0; return TRITONSERVER_ErrorNew( - StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str()); + StatusCodeToTritonCode(status.StatusCode()), + status.Message().c_str()); } - return nullptr; // success + return nullptr; // success } } return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_NOT_FOUND, - ("Output name " + output_name + "not found.").c_str()); - + TRITONSERVER_ERROR_NOT_FOUND, + ("Output name " + output_name + "not found.").c_str()); } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutput( - TRITONBACKEND_Response* response, const uint32_t index, - const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape, - uint64_t* dim_count, const void** base, size_t* byte_size, - TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp) + TRITONBACKEND_Response* response, const uint32_t index, const char** name, + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, + const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, + int64_t* memory_type_id, void** userp) { InferenceResponse* tr = reinterpret_cast(response); @@ -1803,19 +1803,21 @@ TRITONBACKEND_InferenceResponseOutput( return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, ("out of bounds index " + std::to_string(index) + - std::string(": response has ") + std::to_string(outputs.size()) + - " outputs").c_str()); + std::string(": response has ") + std::to_string(outputs.size()) + + " outputs") + .c_str()); } const InferenceResponse::Output& output = outputs[index]; - *name = output.Name().c_str() + *name = output.Name().c_str(); *datatype = DataTypeToTriton(output.DType()); const std::vector& oshape = output.Shape(); *shape = &oshape[0]; *dim_count = oshape.size(); - Status status = output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp); + Status status = + output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp); if (!status.IsOk()) { *base = nullptr; *byte_size = 0; From 294240ba18eb49ea74ac6f6b187c844c083119e5 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Mon, 28 Aug 2023 08:49:27 -0700 Subject: [PATCH 4/9] Remove stub function --- src/tritonserver_stub.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc index 73e1066b6..b0081a0a2 100644 --- a/src/tritonserver_stub.cc +++ b/src/tritonserver_stub.cc @@ -1078,9 +1078,4 @@ TRITONCACHE_Copy() { } -TRITONAPI_DECLSPEC void -TRITONBACKEND_OutputShapeAndDType() -{ -} - } /* extern "C" */ From 07c8764ea41adc2445d042606df86fa1221e2e9f Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Tue, 29 Aug 2023 09:24:43 -0700 Subject: [PATCH 5/9] Increment API version --- include/triton/core/tritonbackend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 5e098b5fc..32c0c4bc6 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -94,7 +94,7 @@ struct TRITONBACKEND_Batcher; /// } /// #define TRITONBACKEND_API_VERSION_MAJOR 1 -#define TRITONBACKEND_API_VERSION_MINOR 15 +#define TRITONBACKEND_API_VERSION_MINOR 16 /// Get the TRITONBACKEND API version supported by Triton. This value /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and From 51a6682938e64af2448efdfaf2faf37d000bca43 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Wed, 30 Aug 2023 14:33:57 -0700 Subject: [PATCH 6/9] Removing buffer attribute details --- include/triton/core/tritonbackend.h | 40 +++++++++-------------------- src/backend_model.cc | 25 ++---------------- 2 files changed, 14 insertions(+), 51 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 32c0c4bc6..5d588648b 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -1569,11 +1569,10 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchInitialize( TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchFinalize( void* userp); -/// Get all information about an output tensor by name. The tensor data is -/// returned as the base pointer to the data and the size, in bytes, of the -/// data. The caller does not own any of the returned values and must not -/// modify or delete them. The lifetime of all returned values extends until -/// 'response' is deleted. +/// Get all information about an output tensor by its name. The caller does +/// not own any of the referenced return values and must not modify or delete +/// them. The lifetime of all returned values extends until 'response' is +/// deleted. /// /// \param response The response object. /// \param name The name of the output. @@ -1581,25 +1580,17 @@ TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatchFinalize( /// \param shape Returns the shape of the output. /// \param dim_count Returns the number of dimensions of the returned /// shape. -/// \param base Returns the tensor data for the output. -/// \param byte_size Returns the size, in bytes, of the data. -/// \param memory_type Returns the memory type of the data. -/// \param memory_type_id Returns the memory type id of the data. -/// \param userp The user-specified value associated with the buffer -/// in TRITONSERVER_ResponseAllocatorAllocFn_t. /// \return a TRITONSERVER_Error indicating success or failure. TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByName( TRITONBACKEND_Response* response, const char* name, - TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, - const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, - int64_t* memory_type_id, void** userp); + TRITONSERVER_DataType* datatype, const int64_t** shape, + uint64_t* dim_count); -/// Get all information about an output tensor by index. The tensor data is -/// returned as the base pointer to the data and the size, in bytes, of the -/// data. The caller does not own any of the returned values and must not -/// modify or delete them. The lifetime of all returned values extends -/// until 'response' is deleted. +/// Get all information about an output tensor by its index. The caller does +/// not own any of the referenced return values and must not modify or delete +/// them. The lifetime of all returned values extends until 'response' is +/// deleted. /// /// \param response The response object. /// \param index The index of the output tensor, must be 0 <= index < @@ -1610,18 +1601,11 @@ TRITONBACKEND_InferenceResponseOutputByName( /// \param shape Returns the shape of the output. /// \param dim_count Returns the number of dimensions of the returned /// shape. -/// \param base Returns the tensor data for the output. -/// \param byte_size Returns the size, in bytes, of the data. -/// \param memory_type Returns the memory type of the data. -/// \param memory_type_id Returns the memory type id of the data. -/// \param userp The user-specified value associated with the buffer -/// in TRITONSERVER_ResponseAllocatorAllocFn_t. /// \return a TRITONSERVER_Error indicating success or failure. TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutput( TRITONBACKEND_Response* response, const uint32_t index, const char** name, - TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, - const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, - int64_t* memory_type_id, void** userp); + TRITONSERVER_DataType* datatype, const int64_t** shape, + uint64_t* dim_count); #ifdef __cplusplus } diff --git a/src/backend_model.cc b/src/backend_model.cc index 55ea2958a..5074b42e4 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1756,9 +1756,7 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading( TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutputByName( TRITONBACKEND_Response* response, const char* name, - TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, - const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, - int64_t* memory_type_id, void** userp) + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count) { InferenceResponse* tr = reinterpret_cast(response); @@ -1772,15 +1770,6 @@ TRITONBACKEND_InferenceResponseOutputByName( const std::vector& oshape = outputs[idx].Shape(); *shape = &oshape[0]; *dim_count = oshape.size(); - Status status = outputs[idx].DataBuffer( - base, byte_size, memory_type, memory_type_id, userp); - if (!status.IsOk()) { - *base = nullptr; - *byte_size = 0; - return TRITONSERVER_ErrorNew( - StatusCodeToTritonCode(status.StatusCode()), - status.Message().c_str()); - } return nullptr; // success } } @@ -1792,9 +1781,7 @@ TRITONBACKEND_InferenceResponseOutputByName( TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InferenceResponseOutput( TRITONBACKEND_Response* response, const uint32_t index, const char** name, - TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, - const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, - int64_t* memory_type_id, void** userp) + TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count) { InferenceResponse* tr = reinterpret_cast(response); @@ -1816,14 +1803,6 @@ TRITONBACKEND_InferenceResponseOutput( const std::vector& oshape = output.Shape(); *shape = &oshape[0]; *dim_count = oshape.size(); - Status status = - output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp); - if (!status.IsOk()) { - *base = nullptr; - *byte_size = 0; - return TRITONSERVER_ErrorNew( - StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str()); - } return nullptr; // success } From 132c855165b20dccd564ffea8cfdd5742375d73a Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 1 Sep 2023 10:36:02 -0700 Subject: [PATCH 7/9] Unit tests for nwe backend API methods --- src/test/CMakeLists.txt | 40 ++++ src/test/backend_output_detail_test.cc | 302 +++++++++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 src/test/backend_output_detail_test.cc diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 7fe27f5c0..ccbe752b6 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -534,3 +534,43 @@ install( TARGETS register_api_test RUNTIME DESTINATION bin ) + +# +# Backend Output Detail Unittest +# +add_executable( + backend_output_detail_test + backend_output_detail_test.cc +) + +set_target_properties( + backend_output_detail_test + PROPERTIES + SKIP_BUILD_RPATH TRUE + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH_USE_LINK_PATH FALSE + INSTALL_RPATH "" +) + +target_include_directories( + backend_output_detail_test + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${GTEST_INCLUDE_DIRS} +) + +target_link_libraries( + backend_output_detail_test + PRIVATE + triton-common-error # from repo-common + triton-common-logging # from repo-common + triton-core + GTest::gtest + GTest::gtest_main +) + +install( + TARGETS backend_output_detail_test + RUNTIME DESTINATION bin +) diff --git a/src/test/backend_output_detail_test.cc b/src/test/backend_output_detail_test.cc new file mode 100644 index 000000000..d03f5a575 --- /dev/null +++ b/src/test/backend_output_detail_test.cc @@ -0,0 +1,302 @@ +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "triton/core/tritonbackend.h" +#include "triton/core/tritonserver.h" + +namespace { + +#define FAIL_TEST_IF_ERR(X, MSG) \ + do { \ + std::shared_ptr err__((X), TRITONSERVER_ErrorDelete); \ + ASSERT_TRUE((err__ == nullptr)) \ + << "error: " << (MSG) << ": " \ + << TRITONSERVER_ErrorCodeString(err__.get()) << " - " \ + << TRITONSERVER_ErrorMessage(err__.get()); \ + } while (false) + +TRITONSERVER_Error* +ResponseAlloc( + TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name, + size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type, + int64_t preferred_memory_type_id, void* userp, void** buffer, + void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type, + int64_t* actual_memory_type_id) +{ + *actual_memory_type = TRITONSERVER_MEMORY_CPU; + *actual_memory_type_id = preferred_memory_type_id; + + if (byte_size == 0) { + *buffer = nullptr; + *buffer_userp = nullptr; + } else { + void* allocated_ptr = nullptr; + allocated_ptr = malloc(byte_size); + + if (allocated_ptr != nullptr) { + *buffer = allocated_ptr; + *buffer_userp = new std::string(tensor_name); + } + return nullptr; // Success + } +} + +TRITONSERVER_Error* +ResponseRelease( + TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp, + size_t byte_size, TRITONSERVER_MemoryType memory_type, + int64_t memory_type_id) +{ + return nullptr; // Success +} + +void +InferRequestComplete( + TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp) +{ + TRITONSERVER_InferenceRequestDelete(request); +} + +void +InferResponseComplete( + TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp) +{ + if (response != nullptr) { + // Notify that the completion. + std::promise* p = + reinterpret_cast*>(userp); + p->set_value(response); + delete p; + } +} + +class BackendOutputDetailTest : public ::testing::Test { + protected: + static void SetUpTestSuite() + { + // Create the server... + TRITONSERVER_ServerOptions* server_options = nullptr; + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsNew(&server_options), + "creating server options"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetModelRepositoryPath( + server_options, "./models"), + "setting model repository path"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetBackendDirectory( + server_options, "/opt/tritonserver/backends"), + "setting backend directory"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetRepoAgentDirectory( + server_options, "/opt/tritonserver/repoagents"), + "setting repository agent directory"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true), + "setting strict model configuration"); + + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerNew(&server_, server_options), "creating server"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsDelete(server_options), + "deleting server options"); + } + + static void TearDownTestSuite() + { + FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server"); + } + + void SetUp() override + { + ASSERT_TRUE(server_ != nullptr) << "Server has not created"; + // Wait until the server is both live and ready. + size_t health_iters = 0; + while (true) { + bool live, ready; + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerIsLive(server_, &live), + "unable to get server liveness"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerIsReady(server_, &ready), + "unable to get server readiness"); + if (live && ready) { + break; + } + + if (++health_iters >= 10) { + FAIL() << "failed to find healthy inference server"; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + + // Create allocator with common callback + FAIL_TEST_IF_ERR( + TRITONSERVER_ResponseAllocatorNew( + &allocator_, ResponseAlloc, ResponseRelease, + nullptr /* start_fn */), + "creating response allocator"); + + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestNew( + &irequest_, server_, "add_sub", -1 /* model_version */), + "creating inference request"); + + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestSetReleaseCallback( + irequest_, InferRequestComplete, + nullptr /* request_release_userp */), + "setting request release callback"); + + std::vector input0_shape({16}); + std::vector input1_shape({16}); + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestAddInput( + irequest_, "INPUT0", TRITONSERVER_TYPE_FP32, &input0_shape[0], + input0_shape.size()), + "setting input0 for the request"); + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest_, "INPUT0", &input0_data_[0], input0_data_.size(), + TRITONSERVER_MEMORY_CPU, 0), + "assigning INPUT data"); + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestAddInput( + irequest_, "INPUT1", TRITONSERVER_TYPE_FP32, &input1_shape[0], + input1_shape.size()), + "setting input1 for the request"); + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest_, "INPUT1", &input1_data_[0], input1_data_.size(), + TRITONSERVER_MEMORY_CPU, 0), + "assigning INPUT1 data"); + } + + void TearDown() override + { + unsetenv("TEST_ANONYMOUS"); + unsetenv("TEST_BYTE_SIZE"); + FAIL_TEST_IF_ERR( + TRITONSERVER_ResponseAllocatorDelete(allocator_), + "deleting response allocator"); + } + + static TRITONSERVER_Server* server_; + TRITONSERVER_ResponseAllocator* allocator_ = nullptr; + static std::vector input0_data_; + static std::vector input1_data_; + TRITONSERVER_InferenceRequest* irequest_ = nullptr; +}; + +TRITONSERVER_Server* BackendOutputDetailTest::server_ = nullptr; +std::vector BackendOutputDetailTest::input0_data_(16, 1); +std::vector BackendOutputDetailTest::input1_data_(16, 1); + +TEST_F(BackendOutputDetailTest, DefaultInference) +{ + auto p = new std::promise(); + std::future future = p->get_future(); + + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestSetResponseCallback( + irequest_, allocator_, nullptr /* response_allocator_userp */, + InferResponseComplete, reinterpret_cast(p)), + "setting response callback"); + + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */), + "running inference"); + + uint32_t output_count; + const char* output_name; + TRITONSERVER_DataType output_datatype; + const int64_t* output_shape; + uint64_t dims_count; + std::vector names = {"OUTPUT0", "OUTPUT1"}; + + TRITONSERVER_InferenceResponse* response = future.get(); + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceResponseError(response), + "error with inference response"); + ASSERT_TRUE(response != nullptr) << "Expect successful inference"; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceResponseOutputCount(response, &output_count), + "getting output count"); + ASSERT_EQ(output_count, size_t(2)); + + + for (size_t idx = 0; idx < output_count; idx++) { + // Cast the response from a TRITONSERVER_Response to a + // TRITONBACKEND_Response. This is not recommended and not allowed for + // backend developers as this cast is unsupported. However, for the purposes + // of our own internal testing we do so here in order to validate the + // functionality of our backend APIs. + TRITONBACKEND_Response* backend_response = + reinterpret_cast(response); + + FAIL_TEST_IF_ERR( + TRITONBACKEND_InferenceResponseOutput( + backend_response, idx, &output_name, &output_datatype, + &output_shape, &dims_count), + "getting ouput details by index"); + EXPECT_EQ(*output_name, *names[idx]); + EXPECT_EQ(output_datatype, TRITONSERVER_TYPE_FP32); + EXPECT_EQ(*output_shape, int64_t(16)); + EXPECT_EQ(dims_count, int64_t(1)); + + FAIL_TEST_IF_ERR( + TRITONBACKEND_InferenceResponseOutputByName( + backend_response, names[idx], &output_datatype, &output_shape, + &dims_count), + "getting output details by name"); + EXPECT_EQ(output_datatype, TRITONSERVER_TYPE_FP32); + EXPECT_EQ(*output_shape, int64_t(16)); + EXPECT_EQ(dims_count, int64_t(1)); + } + TRITONSERVER_InferenceResponseDelete(response); +} + +} // namespace + +int +main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From d13bc8b42b5db66871c1cc4102448356fb265bc3 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Fri, 1 Sep 2023 10:43:53 -0700 Subject: [PATCH 8/9] Run pre-commit hooks --- src/test/backend_output_detail_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/backend_output_detail_test.cc b/src/test/backend_output_detail_test.cc index d03f5a575..173508762 100644 --- a/src/test/backend_output_detail_test.cc +++ b/src/test/backend_output_detail_test.cc @@ -274,7 +274,7 @@ TEST_F(BackendOutputDetailTest, DefaultInference) TRITONBACKEND_InferenceResponseOutput( backend_response, idx, &output_name, &output_datatype, &output_shape, &dims_count), - "getting ouput details by index"); + "getting output details by index"); EXPECT_EQ(*output_name, *names[idx]); EXPECT_EQ(output_datatype, TRITONSERVER_TYPE_FP32); EXPECT_EQ(*output_shape, int64_t(16)); From 8c07b2ccfbe1c002d5a7272546f588891343df09 Mon Sep 17 00:00:00 2001 From: fpetrini15 Date: Tue, 5 Sep 2023 10:18:24 -0700 Subject: [PATCH 9/9] Removing unused headers --- src/test/backend_output_detail_test.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/test/backend_output_detail_test.cc b/src/test/backend_output_detail_test.cc index 173508762..e10a9aefe 100644 --- a/src/test/backend_output_detail_test.cc +++ b/src/test/backend_output_detail_test.cc @@ -26,11 +26,8 @@ #include #include -#include #include #include -#include -#include #include #include #include