Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate response throughput metric #356

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/c++/library/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,10 +513,14 @@ class InferResult {
const std::string& output_name, const uint8_t** buf,
size_t* byte_size) const = 0;

/// Get final response bool of the request which generated this response.
/// Get final response bool for this response.
/// \return Error object indicating the success or failure.
virtual Error IsFinalResponse(bool* is_final_response) const = 0;

/// Get null response bool for this response.
/// \return Error object indicating the success or failure.
virtual Error IsNullResponse(bool* is_null_response) const = 0;

/// Get the result data as a vector of strings. The vector will
/// receive a copy of result data. An error will be generated if
/// the datatype of output is not 'BYTES'.
Expand Down
16 changes: 15 additions & 1 deletion src/c++/library/grpc_client.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -190,6 +190,7 @@ class InferResultGrpc : public InferResult {
const std::string& output_name, const uint8_t** buf,
size_t* byte_size) const override;
Error IsFinalResponse(bool* is_final_response) const override;
Error IsNullResponse(bool* is_null_response) const override;
Error StringData(
const std::string& output_name,
std::vector<std::string>* string_result) const override;
Expand All @@ -211,6 +212,7 @@ class InferResultGrpc : public InferResult {
std::shared_ptr<inference::ModelStreamInferResponse> stream_response_;
Error request_status_;
bool is_final_response_{true};
bool is_null_response_{false};
};

Error
Expand Down Expand Up @@ -322,6 +324,16 @@ InferResultGrpc::IsFinalResponse(bool* is_final_response) const
return Error::Success;
}

Error
InferResultGrpc::IsNullResponse(bool* is_null_response) const
{
if (is_null_response == nullptr) {
debermudez marked this conversation as resolved.
Show resolved Hide resolved
return Error("is_null_response cannot be nullptr");
}
*is_null_response = is_null_response_;
return Error::Success;
}

Error
InferResultGrpc::StringData(
const std::string& output_name,
Expand Down Expand Up @@ -384,6 +396,7 @@ InferResultGrpc::InferResultGrpc(
if (is_final_response_itr != response_->parameters().end()) {
is_final_response_ = is_final_response_itr->second.bool_param();
}
is_null_response_ = response_->outputs().empty() && is_final_response_;
}

InferResultGrpc::InferResultGrpc(
Expand All @@ -409,6 +422,7 @@ InferResultGrpc::InferResultGrpc(
if (is_final_response_itr != response_->parameters().end()) {
is_final_response_ = is_final_response_itr->second.bool_param();
}
is_null_response_ = response_->outputs().empty() && is_final_response_;
}

//==============================================================================
Expand Down
12 changes: 12 additions & 0 deletions src/c++/library/http_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@ class InferResultHttp : public InferResult {
const std::string& output_name, const uint8_t** buf,
size_t* byte_size) const override;
Error IsFinalResponse(bool* is_final_response) const override;
Error IsNullResponse(bool* is_null_response) const override;
Error StringData(
const std::string& output_name,
std::vector<std::string>* string_result) const override;
Expand Down Expand Up @@ -769,6 +770,7 @@ class InferResultHttp : public InferResult {

bool binary_data_{true};
bool is_final_response_{true};
bool is_null_response_{false};
};

void
Expand Down Expand Up @@ -951,6 +953,16 @@ InferResultHttp::IsFinalResponse(bool* is_final_response) const
return Error::Success;
}

Error
InferResultHttp::IsNullResponse(bool* is_null_response) const
debermudez marked this conversation as resolved.
Show resolved Hide resolved
{
if (is_null_response == nullptr) {
return Error("is_null_response cannot be nullptr");
}
*is_null_response = is_null_response_;
return Error::Success;
}

Error
InferResultHttp::StringData(
const std::string& output_name,
Expand Down
9 changes: 8 additions & 1 deletion src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,12 +617,19 @@ class InferResult {
const std::string& output_name, const uint8_t** buf,
size_t* byte_size) const = 0;

/// Get final response bool of the request which generated this response.
/// Get final response bool for this response.
/// \return Error object indicating the success or failure.
virtual Error IsFinalResponse(bool* is_final_response) const
{
return Error("InferResult::IsFinalResponse() not implemented");
};

/// Get null response bool for this response.
/// \return Error object indicating the success or failure.
virtual Error IsNullResponse(bool* is_null_response) const
{
return Error("InferResult::IsNullResponse() not implemented");
};
};

}}} // namespace triton::perfanalyzer::clientbackend
9 changes: 9 additions & 0 deletions src/c++/perf_analyzer/client_backend/mock_client_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,15 @@ class MockInferResult : public InferResult {
return Error::Success;
}

Error IsNullResponse(bool* is_null_response) const override
{
if (is_null_response == nullptr) {
return Error("is_null_response cannot be nullptr");
}
*is_null_response = false;
return Error::Success;
}

private:
std::string req_id_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,13 @@ TritonInferResult::IsFinalResponse(bool* is_final_response) const
return Error::Success;
}

Error
TritonInferResult::IsNullResponse(bool* is_null_response) const
{
RETURN_IF_TRITON_ERROR(result_->IsNullResponse(is_null_response));
return Error::Success;
}

//==============================================================================

}}}} // namespace triton::perfanalyzer::clientbackend::tritonremote
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ class TritonInferResult : public InferResult {
size_t* byte_size) const override;
/// See InferResult::IsFinalResponse()
Error IsFinalResponse(bool* is_final_response) const override;
/// See InferResult::IsNullResponse()
Error IsNullResponse(bool* is_null_response) const override;

private:
std::unique_ptr<tc::InferResult> result_;
Expand Down
24 changes: 16 additions & 8 deletions src/c++/perf_analyzer/infer_context.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -236,20 +236,26 @@ void
InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
{
std::shared_ptr<cb::InferResult> result_ptr(result);
bool is_final_response{true};
matthewkotila marked this conversation as resolved.
Show resolved Hide resolved
if (thread_stat_->cb_status_.IsOk()) {
// Add the request timestamp to thread Timestamp vector with
// proper locking
std::lock_guard<std::mutex> lock(thread_stat_->mu_);
thread_stat_->cb_status_ = result_ptr->RequestStatus();
if (thread_stat_->cb_status_.IsOk()) {
std::chrono::time_point<std::chrono::system_clock> end_time_async;
end_time_async = std::chrono::system_clock::now();
std::string request_id;
thread_stat_->cb_status_ = result_ptr->Id(&request_id);
const auto& it = async_req_map_.find(request_id);
if (it != async_req_map_.end()) {
it->second.end_times.push_back(end_time_async);
bool is_final_response{false};
bool is_null_response{false};
thread_stat_->cb_status_ =
result_ptr->IsNullResponse(&is_null_response);
if (thread_stat_->cb_status_.IsOk() == false) {
return;
}
if (is_null_response == false) {
it->second.end_times.push_back(std::chrono::system_clock::now());
}
thread_stat_->cb_status_ =
result_ptr->IsFinalResponse(&is_final_response);
if (thread_stat_->cb_status_.IsOk() == false) {
Expand All @@ -267,10 +273,12 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
}
}

total_ongoing_requests_--;
if (is_final_response) {
total_ongoing_requests_--;

if (async_callback_finalize_func_ != nullptr) {
async_callback_finalize_func_(id_);
if (async_callback_finalize_func_ != nullptr) {
async_callback_finalize_func_(id_);
}
}
}

Expand Down
18 changes: 14 additions & 4 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,8 @@ InferenceProfiler::MergePerfStatusReports(
perf_status.client_stats.sequence_count;
experiment_perf_status.client_stats.delayed_request_count +=
perf_status.client_stats.delayed_request_count;
experiment_perf_status.client_stats.response_count +=
perf_status.client_stats.response_count;
experiment_perf_status.client_stats.duration_ns +=
perf_status.client_stats.duration_ns;

Expand Down Expand Up @@ -1079,6 +1081,8 @@ InferenceProfiler::MergePerfStatusReports(
(experiment_perf_status.client_stats.request_count *
experiment_perf_status.batch_size) /
client_duration_sec;
experiment_perf_status.client_stats.responses_per_sec =
experiment_perf_status.client_stats.response_count / client_duration_sec;
RETURN_IF_ERROR(SummarizeLatency(
experiment_perf_status.client_stats.latencies, experiment_perf_status));

Expand Down Expand Up @@ -1211,18 +1215,20 @@ InferenceProfiler::Summarize(
{
size_t valid_sequence_count = 0;
size_t delayed_request_count = 0;
size_t response_count = 0;

// Get measurement from requests that fall within the time interval
std::pair<uint64_t, uint64_t> valid_range{window_start_ns, window_end_ns};
uint64_t window_duration_ns = valid_range.second - valid_range.first;
std::vector<uint64_t> latencies;
ValidLatencyMeasurement(
valid_range, valid_sequence_count, delayed_request_count, &latencies);
valid_range, valid_sequence_count, delayed_request_count, &latencies,
response_count);

RETURN_IF_ERROR(SummarizeLatency(latencies, summary));
RETURN_IF_ERROR(SummarizeClientStat(
start_stat, end_stat, window_duration_ns, latencies.size(),
valid_sequence_count, delayed_request_count, summary));
valid_sequence_count, delayed_request_count, response_count, summary));
summary.client_stats.latencies = std::move(latencies);

SummarizeOverhead(window_duration_ns, manager_->GetIdleTime(), summary);
Expand All @@ -1245,10 +1251,11 @@ void
InferenceProfiler::ValidLatencyMeasurement(
const std::pair<uint64_t, uint64_t>& valid_range,
size_t& valid_sequence_count, size_t& delayed_request_count,
std::vector<uint64_t>* valid_latencies)
std::vector<uint64_t>* valid_latencies, size_t& response_count)
{
valid_latencies->clear();
valid_sequence_count = 0;
response_count = 0;
std::vector<size_t> erase_indices{};
for (size_t i = 0; i < all_timestamps_.size(); i++) {
const auto& timestamp = all_timestamps_[i];
Expand All @@ -1260,6 +1267,7 @@ InferenceProfiler::ValidLatencyMeasurement(
if ((request_end_ns >= valid_range.first) &&
(request_end_ns <= valid_range.second)) {
valid_latencies->push_back(request_end_ns - request_start_ns);
response_count += std::get<1>(timestamp).size();
erase_indices.push_back(i);
// Just add the sequence_end flag here.
if (std::get<2>(timestamp)) {
Expand Down Expand Up @@ -1358,7 +1366,7 @@ InferenceProfiler::SummarizeClientStat(
const cb::InferStat& start_stat, const cb::InferStat& end_stat,
const uint64_t duration_ns, const size_t valid_request_count,
const size_t valid_sequence_count, const size_t delayed_request_count,
PerfStatus& summary)
const size_t response_count, PerfStatus& summary)
{
summary.on_sequence_model =
((parser_->SchedulerType() == ModelParser::SEQUENCE) ||
Expand All @@ -1367,13 +1375,15 @@ InferenceProfiler::SummarizeClientStat(
summary.client_stats.request_count = valid_request_count;
summary.client_stats.sequence_count = valid_sequence_count;
summary.client_stats.delayed_request_count = delayed_request_count;
summary.client_stats.response_count = response_count;
summary.client_stats.duration_ns = duration_ns;
float client_duration_sec =
(float)summary.client_stats.duration_ns / NANOS_PER_SECOND;
summary.client_stats.sequence_per_sec =
valid_sequence_count / client_duration_sec;
summary.client_stats.infer_per_sec =
(valid_request_count * summary.batch_size) / client_duration_sec;
summary.client_stats.responses_per_sec = response_count / client_duration_sec;

if (include_lib_stats_) {
size_t completed_count =
Expand Down
23 changes: 15 additions & 8 deletions src/c++/perf_analyzer/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
namespace triton { namespace perfanalyzer {

#ifndef DOCTEST_CONFIG_DISABLE
class NaggyMockInferenceProfiler;
class TestInferenceProfiler;
#endif

Expand Down Expand Up @@ -126,6 +127,8 @@ struct ClientSideStats {
uint64_t sequence_count;
// The number of requests that missed their schedule
uint64_t delayed_request_count;
// The number of responses
uint64_t response_count;
uint64_t duration_ns;
uint64_t avg_latency_ns;
// a ordered map of percentiles to be reported (<percentile, value> pair)
Expand All @@ -139,6 +142,7 @@ struct ClientSideStats {
uint64_t avg_receive_time_ns;
// Per sec stat
double infer_per_sec;
double responses_per_sec;
double sequence_per_sec;

// Completed request count reported by the client library
Expand Down Expand Up @@ -440,16 +444,17 @@ class InferenceProfiler {
/// sequence model.
/// \param latencies Returns the vector of request latencies where the
/// requests are completed within the measurement window.
void ValidLatencyMeasurement(
/// \param response_count Returns the number of responses
virtual void ValidLatencyMeasurement(
const std::pair<uint64_t, uint64_t>& valid_range,
size_t& valid_sequence_count, size_t& delayed_request_count,
std::vector<uint64_t>* latencies);
std::vector<uint64_t>* latencies, size_t& response_count);

/// \param latencies The vector of request latencies collected.
/// \param summary Returns the summary that the latency related fields are
/// set.
/// \return cb::Error object indicating success or failure.
cb::Error SummarizeLatency(
virtual cb::Error SummarizeLatency(
const std::vector<uint64_t>& latencies, PerfStatus& summary);

/// \param latencies The vector of request latencies collected.
Expand All @@ -466,14 +471,15 @@ class InferenceProfiler {
/// \param valid_sequence_count The number of completed sequences recorded.
/// \param delayed_request_count The number of requests that missed their
/// schedule.
/// \param response_count The number of responses.
/// \param summary Returns the summary that the fields recorded by
/// client are set.
/// \return cb::Error object indicating success or failure.
cb::Error SummarizeClientStat(
virtual cb::Error SummarizeClientStat(
const cb::InferStat& start_stat, const cb::InferStat& end_stat,
const uint64_t duration_ns, const size_t valid_request_count,
const size_t delayed_request_count, const size_t valid_sequence_count,
PerfStatus& summary);
const size_t response_count, PerfStatus& summary);

/// Adds the send request rate metric to the summary object.
/// \param window_duration_s The duration of the window in seconds.
Expand Down Expand Up @@ -557,15 +563,15 @@ class InferenceProfiler {
/// \param perf_status List of perf status reports to be merged.
/// \param summary_status Final merged summary status.
/// \return cb::Error object indicating success or failure.
cb::Error MergePerfStatusReports(
virtual cb::Error MergePerfStatusReports(
std::deque<PerfStatus>& perf_status, PerfStatus& summary_status);

/// Merge individual server side statistics into a single server side report.
/// \param server_side_stats List of server side statistics reports to be
/// merged.
/// \param server_side_summary Final merged summary status.
/// \return cb::Error object indicating success or failure.
cb::Error MergeServerSideStats(
virtual cb::Error MergeServerSideStats(
std::vector<ServerSideStats>& server_side_stats,
ServerSideStats& server_side_summary);

Expand Down Expand Up @@ -695,10 +701,11 @@ class InferenceProfiler {
const double overhead_pct_threshold_{0.0};

#ifndef DOCTEST_CONFIG_DISABLE
friend NaggyMockInferenceProfiler;
friend TestInferenceProfiler;

public:
InferenceProfiler(){};
InferenceProfiler() = default;
#endif
};

Expand Down
Loading