Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix state transitions for re-running requests #251

Merged
merged 6 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 26 additions & 16 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,16 @@ InferenceRequest::~InferenceRequest()
Status
InferenceRequest::SetState(InferenceRequest::State new_state)
{
LOG_VERBOSE(1) << LogRequest() << "Setting state from " << state_ << " to "
<< new_state;
// No-op if this is already the current state, or if this is a null request.
if (new_state == state_ || null_request_) {
return Status::Success;
}

// Allow RELEASED state transition from any state for now.
// Not all requests will follow linear transition, such as null requests
// used for padding batches, and ensemble requests.
// used for padding batches, ensemble requests, and errors.
if (new_state == InferenceRequest::State::RELEASED) {
state_ = new_state;
return Status::Success;
Expand All @@ -142,49 +144,55 @@ InferenceRequest::SetState(InferenceRequest::State new_state)
std::stringstream ss;
ss << LogRequest() << "Invalid request state transition from " << state_
<< " to " << new_state;
return Status(Status::Code::INVALID_ARG, ss.str());
return Status(Status::Code::INTERNAL, ss.str());
};

// Define state transitions
switch (state_) {
case InferenceRequest::State::INITIALIZED: {
if (new_state != InferenceRequest::State::STARTED) {
if (new_state != InferenceRequest::State::PENDING) {
return generate_error();
}
state_ = new_state;
IncrementPendingRequestCount();
break;
}
case InferenceRequest::State::STARTED: {
case InferenceRequest::State::PENDING: {
if (new_state != InferenceRequest::State::EXECUTING) {
return generate_error();
}
state_ = new_state;
DecrementPendingRequestCount();
break;
}
case InferenceRequest::State::EXECUTING: {
if (new_state != InferenceRequest::State::RELEASED) {
return generate_error();
}
state_ = new_state;
break;
}
case InferenceRequest::State::RELEASED: {
// No state transition currently supported after release.
return generate_error();
if (new_state != InferenceRequest::State::INITIALIZED) {
// Only transition currently supported after release is to start over
// again, such as re-using request objects for multiple inferences.
return generate_error();
}
nnshah1 marked this conversation as resolved.
Show resolved Hide resolved
break;
}
}
state_ = new_state;
return Status::Success;
}

void
InferenceRequest::IncrementPendingRequestCount()
{
#ifdef TRITON_ENABLE_METRICS
auto reporter = model_raw_->MetricReporter();
if (reporter) {
reporter->IncrementGauge(kPendingRequestMetric, 1);
// Only increment once and do not increment again until decremented.
const bool increment_pending_count = !decrement_pending_count_;
if (increment_pending_count) {
auto reporter = model_raw_->MetricReporter();
if (reporter) {
reporter->IncrementGauge(kPendingRequestMetric, 1);
}
decrement_pending_count_ = true;
nnshah1 marked this conversation as resolved.
Show resolved Hide resolved
}
#endif // TRITON_ENABLE_METRICS
Expand Down Expand Up @@ -376,7 +384,7 @@ InferenceRequest::OutputBufferProperties(
Status
InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
{
RETURN_IF_ERROR(request->SetState(InferenceRequest::State::STARTED));
RETURN_IF_ERROR(request->SetState(InferenceRequest::State::PENDING));
return request->model_raw_->Enqueue(request);
}

Expand Down Expand Up @@ -849,8 +857,10 @@ InferenceRequest::PrepareForInference()
request_start_ns_ = 0;
#endif // TRITON_ENABLE_STATS

LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this;
// Help enforce that PrepareForInference() is called prior to Run().
RETURN_IF_ERROR(SetState(InferenceRequest::State::INITIALIZED));

LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this;
return Status::Success;
}

Expand Down Expand Up @@ -1580,8 +1590,8 @@ operator<<(std::ostream& out, const InferenceRequest::State& state)
out << "INITIALIZED";
break;
}
case InferenceRequest::State::STARTED: {
out << "STARTED";
case InferenceRequest::State::PENDING: {
out << "PENDING";
break;
}
case InferenceRequest::State::EXECUTING: {
Expand Down
2 changes: 1 addition & 1 deletion src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class InferenceRequest {
INITIALIZED,

// The request has been enqueued, but is not yet executing.
STARTED,
PENDING,

// The request has been picked up by a backend model instance for execution,
// but hasn't been released yet.
Expand Down