triton-inference-server · rmccorm4 · Sep 6, 2023 · Aug 31, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/src/infer_request.cc b/src/infer_request.cc
@@ -124,14 +124,16 @@ InferenceRequest::~InferenceRequest()
 Status
 InferenceRequest::SetState(InferenceRequest::State new_state)
 {
+  LOG_VERBOSE(1) << LogRequest() << "Setting state from " << state_ << " to "
+                 << new_state;
   // No-op if this is already the current state, or if this is a null request.
   if (new_state == state_ || null_request_) {
     return Status::Success;
   }
 
   // Allow RELEASED state transition from any state for now.
   // Not all requests will follow linear transition, such as null requests
-  // used for padding batches, and ensemble requests.
+  // used for padding batches, ensemble requests, and errors.
   if (new_state == InferenceRequest::State::RELEASED) {
     state_ = new_state;
     return Status::Success;
@@ -142,49 +144,55 @@ InferenceRequest::SetState(InferenceRequest::State new_state)
     std::stringstream ss;
     ss << LogRequest() << "Invalid request state transition from " << state_
        << " to " << new_state;
-    return Status(Status::Code::INVALID_ARG, ss.str());
+    return Status(Status::Code::INTERNAL, ss.str());
   };
 
   // Define state transitions
   switch (state_) {
     case InferenceRequest::State::INITIALIZED: {
-      if (new_state != InferenceRequest::State::STARTED) {
+      if (new_state != InferenceRequest::State::PENDING) {
         return generate_error();
       }
-      state_ = new_state;
       IncrementPendingRequestCount();
       break;
     }
-    case InferenceRequest::State::STARTED: {
+    case InferenceRequest::State::PENDING: {
       if (new_state != InferenceRequest::State::EXECUTING) {
         return generate_error();
       }
-      state_ = new_state;
       DecrementPendingRequestCount();
       break;
     }
     case InferenceRequest::State::EXECUTING: {
       if (new_state != InferenceRequest::State::RELEASED) {
         return generate_error();
       }
-      state_ = new_state;
       break;
     }
     case InferenceRequest::State::RELEASED: {
-      // No state transition currently supported after release.
-      return generate_error();
+      if (new_state != InferenceRequest::State::INITIALIZED) {
+        // Only transition currently supported after release is to start over
+        // again, such as re-using request objects for multiple inferences.
+        return generate_error();
+      }
+      break;
     }
   }
+  state_ = new_state;
   return Status::Success;
 }
 
 void
 InferenceRequest::IncrementPendingRequestCount()
 {
 #ifdef TRITON_ENABLE_METRICS
-  auto reporter = model_raw_->MetricReporter();
-  if (reporter) {
-    reporter->IncrementGauge(kPendingRequestMetric, 1);
+  // Only increment once and do not increment again until decremented.
+  const bool increment_pending_count = !decrement_pending_count_;
+  if (increment_pending_count) {
+    auto reporter = model_raw_->MetricReporter();
+    if (reporter) {
+      reporter->IncrementGauge(kPendingRequestMetric, 1);
+    }
     decrement_pending_count_ = true;
   }
 #endif  // TRITON_ENABLE_METRICS
@@ -376,7 +384,7 @@ InferenceRequest::OutputBufferProperties(
 Status
 InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
 {
-  RETURN_IF_ERROR(request->SetState(InferenceRequest::State::STARTED));
+  RETURN_IF_ERROR(request->SetState(InferenceRequest::State::PENDING));
   return request->model_raw_->Enqueue(request);
 }
 
@@ -849,8 +857,10 @@ InferenceRequest::PrepareForInference()
   request_start_ns_ = 0;
 #endif  // TRITON_ENABLE_STATS
 
-  LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this;
+  // Help enforce that PrepareForInference() is called prior to Run().
+  RETURN_IF_ERROR(SetState(InferenceRequest::State::INITIALIZED));
 
+  LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this;
   return Status::Success;
 }
 
@@ -1580,8 +1590,8 @@ operator<<(std::ostream& out, const InferenceRequest::State& state)
       out << "INITIALIZED";
       break;
     }
-    case InferenceRequest::State::STARTED: {
-      out << "STARTED";
+    case InferenceRequest::State::PENDING: {
+      out << "PENDING";
       break;
     }
     case InferenceRequest::State::EXECUTING: {

diff --git a/src/infer_request.h b/src/infer_request.h
@@ -63,7 +63,7 @@ class InferenceRequest {
     INITIALIZED,
 
     // The request has been enqueued, but is not yet executing.
-    STARTED,
+    PENDING,
 
     // The request has been picked up by a backend model instance for execution,
     // but hasn't been released yet.