Skip to content

Commit

Permalink
HDFS-17290: Adds disconnected client rpc backoff metrics (apache#6359)
Browse files Browse the repository at this point in the history
  • Loading branch information
li-leyang authored Jan 5, 2024
1 parent 7d3b6a3 commit 661c784
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3133,6 +3133,13 @@ private void internalQueueCall(Call call, boolean blocking)
// For example, IPC clients using FailoverOnNetworkExceptionRetry handle
// RetriableException.
rpcMetrics.incrClientBackoff();
// Clients that are directly put into lowest priority queue are backed off and disconnected.
if (cqe.getCause() instanceof RpcServerException) {
RpcServerException ex = (RpcServerException) cqe.getCause();
if (ex.getRpcStatusProto() == RpcStatusProto.FATAL) {
rpcMetrics.incrClientBackoffDisconnected();
}
}
// unwrap retriable exception.
throw cqe.getCause();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
MutableCounterLong rpcAuthorizationSuccesses;
@Metric("Number of client backoff requests")
MutableCounterLong rpcClientBackoff;
@Metric("Number of disconnected client backoff requests")
MutableCounterLong rpcClientBackoffDisconnected;
@Metric("Number of slow RPC calls")
MutableCounterLong rpcSlowCalls;
@Metric("Number of requeue calls")
Expand Down Expand Up @@ -342,6 +344,22 @@ public void incrClientBackoff() {
rpcClientBackoff.incr();
}

/**
* Client was disconnected due to backoff
*/
public void incrClientBackoffDisconnected() {
rpcClientBackoffDisconnected.incr();
}

/**
* Returns the number of disconnected backoffs.
* @return long
*/
public long getClientBackoffDisconnected() {
return rpcClientBackoffDisconnected.value();
}


/**
* Increments the Slow RPC counter.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ The default timeunit used for RPC metrics is milliseconds (as per the below desc
| `RpcAuthorizationFailures` | Total number of authorization failures |
| `RpcAuthorizationSuccesses` | Total number of authorization successes |
| `RpcClientBackoff` | Total number of client backoff requests |
| `RpcClientBackoffDisconnected` | Total number of client backoff requests that are disconnected. This is a subset of RpcClientBackoff |
| `RpcSlowCalls` | Total number of slow RPC calls |
| `RpcRequeueCalls` | Total number of requeue RPC calls |
| `RpcCallsSuccesses` | Total number of RPC calls that are successfully processed |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,7 @@ public Void call() throws ServiceException, InterruptedException {
IOException unwrapExeption = re.unwrapRemoteException();
if (unwrapExeption instanceof RetriableException) {
succeeded = true;
assertEquals(1L, server.getRpcMetrics().getClientBackoffDisconnected());
} else {
lastException = unwrapExeption;
}
Expand Down

0 comments on commit 661c784

Please sign in to comment.