From 9e0144b154ae2e90966c8f8833324b4cb288ffd3 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 31 Aug 2023 14:51:41 -0700 Subject: [PATCH 1/5] Allow transitioning from RELEASED to STARTED state for re-using requests --- src/infer_request.cc | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 803335bca..3dd9702cf 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -124,6 +124,8 @@ InferenceRequest::~InferenceRequest() Status InferenceRequest::SetState(InferenceRequest::State new_state) { + LOG_VERBOSE(1) << LogRequest() << "Setting state from " << state_ << " to " + << new_state; // No-op if this is already the current state, or if this is a null request. if (new_state == state_ || null_request_) { return Status::Success; @@ -142,7 +144,7 @@ InferenceRequest::SetState(InferenceRequest::State new_state) std::stringstream ss; ss << LogRequest() << "Invalid request state transition from " << state_ << " to " << new_state; - return Status(Status::Code::INVALID_ARG, ss.str()); + return Status(Status::Code::INTERNAL, ss.str()); }; // Define state transitions @@ -151,7 +153,6 @@ InferenceRequest::SetState(InferenceRequest::State new_state) if (new_state != InferenceRequest::State::STARTED) { return generate_error(); } - state_ = new_state; IncrementPendingRequestCount(); break; } @@ -159,7 +160,6 @@ InferenceRequest::SetState(InferenceRequest::State new_state) if (new_state != InferenceRequest::State::EXECUTING) { return generate_error(); } - state_ = new_state; DecrementPendingRequestCount(); break; } @@ -167,14 +167,18 @@ InferenceRequest::SetState(InferenceRequest::State new_state) if (new_state != InferenceRequest::State::RELEASED) { return generate_error(); } - state_ = new_state; break; } case InferenceRequest::State::RELEASED: { - // No state transition currently supported after release. - return generate_error(); + if (new_state != InferenceRequest::State::STARTED) { + // Only transition currently supported after release is to start again + // when re-using request objects for multiple inferences. + return generate_error(); + } + break; } } + state_ = new_state; return Status::Success; } @@ -182,9 +186,13 @@ void InferenceRequest::IncrementPendingRequestCount() { #ifdef TRITON_ENABLE_METRICS - auto reporter = model_raw_->MetricReporter(); - if (reporter) { - reporter->IncrementGauge(kPendingRequestMetric, 1); + // Only increment once and do not increment again until decremented. + const bool increment_pending_count = !decrement_pending_count_; + if (increment_pending_count) { + auto reporter = model_raw_->MetricReporter(); + if (reporter) { + reporter->IncrementGauge(kPendingRequestMetric, 1); + } decrement_pending_count_ = true; } #endif // TRITON_ENABLE_METRICS From bf84f54683fc05184e563b0dbc5a05c41b959426 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 31 Aug 2023 22:28:14 -0700 Subject: [PATCH 2/5] Rename STARTED request state to PENDING --- src/infer_request.cc | 12 ++++++------ src/infer_request.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 3dd9702cf..ce2fac139 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -150,13 +150,13 @@ InferenceRequest::SetState(InferenceRequest::State new_state) // Define state transitions switch (state_) { case InferenceRequest::State::INITIALIZED: { - if (new_state != InferenceRequest::State::STARTED) { + if (new_state != InferenceRequest::State::PENDING) { return generate_error(); } IncrementPendingRequestCount(); break; } - case InferenceRequest::State::STARTED: { + case InferenceRequest::State::PENDING: { if (new_state != InferenceRequest::State::EXECUTING) { return generate_error(); } @@ -170,7 +170,7 @@ InferenceRequest::SetState(InferenceRequest::State new_state) break; } case InferenceRequest::State::RELEASED: { - if (new_state != InferenceRequest::State::STARTED) { + if (new_state != InferenceRequest::State::PENDING) { // Only transition currently supported after release is to start again // when re-using request objects for multiple inferences. return generate_error(); @@ -384,7 +384,7 @@ InferenceRequest::OutputBufferProperties( Status InferenceRequest::Run(std::unique_ptr& request) { - RETURN_IF_ERROR(request->SetState(InferenceRequest::State::STARTED)); + RETURN_IF_ERROR(request->SetState(InferenceRequest::State::PENDING)); return request->model_raw_->Enqueue(request); } @@ -1588,8 +1588,8 @@ operator<<(std::ostream& out, const InferenceRequest::State& state) out << "INITIALIZED"; break; } - case InferenceRequest::State::STARTED: { - out << "STARTED"; + case InferenceRequest::State::PENDING: { + out << "PENDING"; break; } case InferenceRequest::State::EXECUTING: { diff --git a/src/infer_request.h b/src/infer_request.h index 97c56ba27..09be610de 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -63,7 +63,7 @@ class InferenceRequest { INITIALIZED, // The request has been enqueued, but is not yet executing. - STARTED, + PENDING, // The request has been picked up by a backend model instance for execution, // but hasn't been released yet. From 948f1c53affa96e499775fa626052ff88d445d45 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 31 Aug 2023 22:46:49 -0700 Subject: [PATCH 3/5] Reset to INITIALIZED in PrepareForInference() to help enforce preparation before Run() --- src/infer_request.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index ce2fac139..6bf00a002 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -133,7 +133,7 @@ InferenceRequest::SetState(InferenceRequest::State new_state) // Allow RELEASED state transition from any state for now. // Not all requests will follow linear transition, such as null requests - // used for padding batches, and ensemble requests. + // used for padding batches, ensemble requests, and errors. if (new_state == InferenceRequest::State::RELEASED) { state_ = new_state; return Status::Success; @@ -170,9 +170,9 @@ InferenceRequest::SetState(InferenceRequest::State new_state) break; } case InferenceRequest::State::RELEASED: { - if (new_state != InferenceRequest::State::PENDING) { - // Only transition currently supported after release is to start again - // when re-using request objects for multiple inferences. + if (new_state != InferenceRequest::State::INITIALIZED) { + // Only transition currently supported after release is to start over + // again, such as re-using request objects for multiple inferences. return generate_error(); } break; @@ -857,8 +857,10 @@ InferenceRequest::PrepareForInference() request_start_ns_ = 0; #endif // TRITON_ENABLE_STATS - LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; + // Help enforce that PrepareForInference() is called prior to Run(). + RETURN_IF_ERROR(SetState(InferenceRequest::State::INITIALIZED)); + LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; return Status::Success; } From a7be7c08202a56f6a06c35d9155f0012c2d07435 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 31 Aug 2023 22:46:49 -0700 Subject: [PATCH 4/5] Capture pending count updates strictly in state transitions --- src/infer_request.cc | 71 ++++++++++++++++++-------------------------- src/infer_request.h | 3 -- 2 files changed, 29 insertions(+), 45 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index ce2fac139..ef6abca78 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -106,21 +106,11 @@ InferenceRequest::InferenceRequest( : needs_normalization_(true), model_raw_(model), requested_model_version_(requested_model_version), flags_(0), correlation_id_(0), batch_size_(0), timeout_us_(0), collect_stats_(true), - state_(InferenceRequest::State::INITIALIZED), null_request_(false), - decrement_pending_count_(false) + state_(InferenceRequest::State::INITIALIZED), null_request_(false) { SetPriority(0); } -InferenceRequest::~InferenceRequest() -{ - // If request has been enqueued but hasn't started executing by destruction - // time, an error occurred and the pending request count will need to be - // decremented. - DecrementPendingRequestCount(); -} - - Status InferenceRequest::SetState(InferenceRequest::State new_state) { @@ -131,14 +121,6 @@ InferenceRequest::SetState(InferenceRequest::State new_state) return Status::Success; } - // Allow RELEASED state transition from any state for now. - // Not all requests will follow linear transition, such as null requests - // used for padding batches, and ensemble requests. - if (new_state == InferenceRequest::State::RELEASED) { - state_ = new_state; - return Status::Success; - } - // Generate error when called rather than copying it into every case below. const auto generate_error = [&]() { std::stringstream ss; @@ -150,17 +132,25 @@ InferenceRequest::SetState(InferenceRequest::State new_state) // Define state transitions switch (state_) { case InferenceRequest::State::INITIALIZED: { - if (new_state != InferenceRequest::State::PENDING) { + if (new_state == InferenceRequest::State::PENDING) { + IncrementPendingRequestCount(); + } else if (new_state == InferenceRequest::State::RELEASED) { + // No-op when moving from initialized to released, just releasing early. + } else { return generate_error(); } - IncrementPendingRequestCount(); break; } case InferenceRequest::State::PENDING: { - if (new_state != InferenceRequest::State::EXECUTING) { + // Request may move from pending to either execution when scheduled to + // backend, or released early due to some error. + if (new_state == InferenceRequest::State::EXECUTING || + new_state == InferenceRequest::State::RELEASED) { + DecrementPendingRequestCount(); + } else { + // Unexpected state transition return generate_error(); } - DecrementPendingRequestCount(); break; } case InferenceRequest::State::EXECUTING: { @@ -170,9 +160,9 @@ InferenceRequest::SetState(InferenceRequest::State new_state) break; } case InferenceRequest::State::RELEASED: { - if (new_state != InferenceRequest::State::PENDING) { - // Only transition currently supported after release is to start again - // when re-using request objects for multiple inferences. + if (new_state != InferenceRequest::State::INITIALIZED) { + // Only transition currently supported after release is to start over + // again, such as re-using request objects for multiple inferences. return generate_error(); } break; @@ -186,14 +176,11 @@ void InferenceRequest::IncrementPendingRequestCount() { #ifdef TRITON_ENABLE_METRICS - // Only increment once and do not increment again until decremented. - const bool increment_pending_count = !decrement_pending_count_; - if (increment_pending_count) { - auto reporter = model_raw_->MetricReporter(); - if (reporter) { - reporter->IncrementGauge(kPendingRequestMetric, 1); - } - decrement_pending_count_ = true; + // Pending request count should always be 0 or 1 per-request. If a request + // increments the count, it should not be incremented again until decremented. + auto reporter = model_raw_->MetricReporter(); + if (reporter) { + reporter->IncrementGauge(kPendingRequestMetric, 1); } #endif // TRITON_ENABLE_METRICS } @@ -202,13 +189,11 @@ void InferenceRequest::DecrementPendingRequestCount() { #ifdef TRITON_ENABLE_METRICS - // Only decrement if count has been incremented, and not already decremented. - if (decrement_pending_count_) { - auto reporter = model_raw_->MetricReporter(); - if (reporter) { - reporter->DecrementGauge(kPendingRequestMetric, 1); - } - decrement_pending_count_ = false; + // Pending request count should always be 0 or 1 per-request. A request should + // not decrement the count unless it has already been incremented. + auto reporter = model_raw_->MetricReporter(); + if (reporter) { + reporter->DecrementGauge(kPendingRequestMetric, 1); } #endif // TRITON_ENABLE_METRICS } @@ -857,8 +842,10 @@ InferenceRequest::PrepareForInference() request_start_ns_ = 0; #endif // TRITON_ENABLE_STATS - LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; + // Help enforce that PrepareForInference() is called prior to Run(). + RETURN_IF_ERROR(SetState(InferenceRequest::State::INITIALIZED)); + LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; return Status::Success; } diff --git a/src/infer_request.h b/src/infer_request.h index 09be610de..228cff03f 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -799,9 +799,6 @@ class InferenceRequest { // Whether this is a null request used for direct sequence batch padding or // not. bool null_request_; - // Catch-all to correctly decrement pending count if needed on destruction - // if request doesn't follow normal execution path (error, unused, ensembles) - bool decrement_pending_count_; }; std::ostream& operator<<(std::ostream& out, const InferenceRequest& request); From c742fc6363fed4e505051310aa822a704e7c778a Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Tue, 5 Sep 2023 12:41:34 -0700 Subject: [PATCH 5/5] Remove destructor, not necessary for now --- src/infer_request.h | 1 - src/test/response_cache_test.cc | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/infer_request.h b/src/infer_request.h index 228cff03f..41e7deed8 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -291,7 +291,6 @@ class InferenceRequest { const int64_t requested_model_version); InferenceRequest(Model* model, const int64_t requested_model_version); - ~InferenceRequest(); const std::string& ModelName() const; int64_t RequestedModelVersion() const { return requested_model_version_; } diff --git a/src/test/response_cache_test.cc b/src/test/response_cache_test.cc index 2b40a04c5..dad7d0faf 100644 --- a/src/test/response_cache_test.cc +++ b/src/test/response_cache_test.cc @@ -66,8 +66,6 @@ InferenceRequest::InferenceRequest( response_factory_.reset(new InferenceResponseFactory()); } -InferenceRequest::~InferenceRequest() {} - InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const int64_t* shape, const uint64_t dim_count)