Skip to content

Commit a8dba0a

Browse files
rahul2393surbhigarg92cloud-java-bot
authored
feat: [Internal] client-side metrics for afe latency and connectivity error (#3819)
* Revert "Revert "feat: built in metrics for afe latency and connectivity error…" This reverts commit b6c9c6e. * rebase * review comments * chore: generate libraries at Wed Apr 16 05:18:36 UTC 2025 * metrics test refactoring --------- Co-authored-by: surbhigarg92 <[email protected]> Co-authored-by: cloud-java-bot <[email protected]>
1 parent bafa0bb commit a8dba0a

11 files changed

+352
-85
lines changed

google-cloud-spanner/clirr-ignored-differences.xml

+8-1
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,13 @@
758758
<method>boolean isEnableGRPCBuiltInMetrics()</method>
759759
</difference>
760760

761+
<!-- Added AFE Server Timing option -->
762+
<difference>
763+
<differenceType>7012</differenceType>
764+
<className>com/google/cloud/spanner/SpannerOptions$SpannerEnvironment</className>
765+
<method>boolean isEnableAFEServerTiming()</method>
766+
</difference>
767+
761768
<!-- Added Monitoring host option -->
762769
<difference>
763770
<differenceType>7012</differenceType>
@@ -899,7 +906,7 @@
899906
<className>com/google/cloud/spanner/connection/Connection</className>
900907
<method>java.lang.String getDefaultSequenceKind()</method>
901908
</difference>
902-
909+
903910
<!-- Default isolation level -->
904911
<difference>
905912
<differenceType>7012</differenceType>

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsConstant.java

+15-16
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import io.opentelemetry.sdk.metrics.InstrumentType;
2828
import io.opentelemetry.sdk.metrics.View;
2929
import java.util.Collection;
30+
import java.util.List;
3031
import java.util.Map;
3132
import java.util.Set;
3233
import java.util.stream.Collectors;
@@ -39,6 +40,9 @@ public class BuiltInMetricsConstant {
3940
static final String SPANNER_METER_NAME = "spanner-java";
4041
static final String GRPC_METER_NAME = "grpc-java";
4142
static final String GFE_LATENCIES_NAME = "gfe_latencies";
43+
static final String AFE_LATENCIES_NAME = "afe_latencies";
44+
static final String GFE_CONNECTIVITY_ERROR_NAME = "gfe_connectivity_error_count";
45+
static final String AFE_CONNECTIVITY_ERROR_NAME = "afe_connectivity_error_count";
4246
static final String OPERATION_LATENCIES_NAME = "operation_latencies";
4347
static final String ATTEMPT_LATENCIES_NAME = "attempt_latencies";
4448
static final String OPERATION_LATENCY_NAME = "operation_latency";
@@ -52,7 +56,10 @@ public class BuiltInMetricsConstant {
5256
ATTEMPT_LATENCIES_NAME,
5357
OPERATION_COUNT_NAME,
5458
ATTEMPT_COUNT_NAME,
55-
GFE_LATENCIES_NAME)
59+
GFE_LATENCIES_NAME,
60+
AFE_LATENCIES_NAME,
61+
GFE_CONNECTIVITY_ERROR_NAME,
62+
AFE_CONNECTIVITY_ERROR_NAME)
5663
.stream()
5764
.map(m -> METER_NAME + '/' + m)
5865
.collect(Collectors.toSet());
@@ -110,14 +117,14 @@ public class BuiltInMetricsConstant {
110117
static final Set<String> GRPC_LB_RLS_ATTRIBUTES =
111118
ImmutableSet.of("grpc.lb.rls.data_plane_target", "grpc.lb.pick_result");
112119

120+
static List<Double> BUCKET_BOUNDARIES =
121+
ImmutableList.of(
122+
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
123+
16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0,
124+
200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0,
125+
50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0, 3200000.0);
113126
static Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
114-
Aggregation.explicitBucketHistogram(
115-
ImmutableList.of(
116-
0.0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
117-
15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0,
118-
160.0, 200.0, 250.0, 300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0,
119-
10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 400000.0, 800000.0, 1600000.0,
120-
3200000.0));
127+
Aggregation.explicitBucketHistogram(BUCKET_BOUNDARIES);
121128

122129
static final Collection<String> GRPC_METRICS_ENABLED_BY_DEFAULT =
123130
ImmutableList.of(
@@ -145,14 +152,6 @@ static Map<InstrumentSelector, View> getAllViews() {
145152
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
146153
InstrumentType.HISTOGRAM,
147154
"ms");
148-
defineView(
149-
views,
150-
BuiltInMetricsConstant.SPANNER_METER_NAME,
151-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
152-
BuiltInMetricsConstant.GFE_LATENCIES_NAME,
153-
BuiltInMetricsConstant.AGGREGATION_WITH_MILLIS_HISTOGRAM,
154-
InstrumentType.HISTOGRAM,
155-
"ms");
156155
defineView(
157156
views,
158157
BuiltInMetricsConstant.GAX_METER_NAME,

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsRecorder.java

+44-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import io.opentelemetry.api.common.Attributes;
2424
import io.opentelemetry.api.common.AttributesBuilder;
2525
import io.opentelemetry.api.metrics.DoubleHistogram;
26+
import io.opentelemetry.api.metrics.LongCounter;
2627
import io.opentelemetry.api.metrics.Meter;
2728
import java.util.Map;
2829

@@ -35,6 +36,9 @@
3536
class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
3637

3738
private final DoubleHistogram gfeLatencyRecorder;
39+
private final DoubleHistogram afeLatencyRecorder;
40+
private final LongCounter gfeHeaderMissingCountRecorder;
41+
private final LongCounter afeHeaderMissingCountRecorder;
3842

3943
/**
4044
* Creates the following instruments for the following metrics:
@@ -59,6 +63,27 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
5963
.setDescription(
6064
"Latency between Google's network receiving an RPC and reading back the first byte of the response")
6165
.setUnit("ms")
66+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
67+
.build();
68+
this.afeLatencyRecorder =
69+
meter
70+
.histogramBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_LATENCIES_NAME)
71+
.setDescription(
72+
"Latency between Spanner API Frontend receiving an RPC and starting to write back the response.")
73+
.setExplicitBucketBoundariesAdvice(BuiltInMetricsConstant.BUCKET_BOUNDARIES)
74+
.setUnit("ms")
75+
.build();
76+
this.gfeHeaderMissingCountRecorder =
77+
meter
78+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.GFE_CONNECTIVITY_ERROR_NAME)
79+
.setDescription("Number of requests that failed to reach the Google network.")
80+
.setUnit("1")
81+
.build();
82+
this.afeHeaderMissingCountRecorder =
83+
meter
84+
.counterBuilder(serviceName + '/' + BuiltInMetricsConstant.AFE_CONNECTIVITY_ERROR_NAME)
85+
.setDescription("Number of requests that failed to reach the Spanner API Frontend.")
86+
.setUnit("1")
6287
.build();
6388
}
6489

@@ -69,8 +94,25 @@ class BuiltInMetricsRecorder extends OpenTelemetryMetricsRecorder {
6994
* @param gfeLatency Attempt Latency in ms
7095
* @param attributes Map of the attributes to store
7196
*/
72-
void recordGFELatency(double gfeLatency, Map<String, String> attributes) {
73-
gfeLatencyRecorder.record(gfeLatency, toOtelAttributes(attributes));
97+
void recordServerTimingHeaderMetrics(
98+
Long gfeLatency,
99+
Long afeLatency,
100+
Long gfeHeaderMissingCount,
101+
Long afeHeaderMissingCount,
102+
Map<String, String> attributes) {
103+
io.opentelemetry.api.common.Attributes otelAttributes = toOtelAttributes(attributes);
104+
if (gfeLatency != null) {
105+
gfeLatencyRecorder.record(gfeLatency, otelAttributes);
106+
}
107+
if (gfeHeaderMissingCount > 0) {
108+
gfeHeaderMissingCountRecorder.add(gfeHeaderMissingCount, otelAttributes);
109+
}
110+
if (afeLatency != null) {
111+
afeLatencyRecorder.record(afeLatency, otelAttributes);
112+
}
113+
if (afeHeaderMissingCount > 0) {
114+
afeHeaderMissingCountRecorder.add(afeHeaderMissingCount, otelAttributes);
115+
}
74116
}
75117

76118
Attributes toOtelAttributes(Map<String, String> attributes) {

google-cloud-spanner/src/main/java/com/google/cloud/spanner/BuiltInMetricsTracer.java

+30-21
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
3737
private final BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder;
3838
// These are RPC specific attributes and pertain to a specific API Trace
3939
private final Map<String, String> attributes = new HashMap<>();
40-
4140
private Long gfeLatency = null;
41+
private Long afeLatency = null;
42+
private long gfeHeaderMissingCount = 0;
43+
private long afeHeaderMissingCount = 0;
4244

4345
BuiltInMetricsTracer(
4446
MethodName methodName, BuiltInMetricsRecorder builtInOpenTelemetryMetricsRecorder) {
@@ -54,10 +56,9 @@ class BuiltInMetricsTracer extends MetricsTracer implements ApiTracer {
5456
@Override
5557
public void attemptSucceeded() {
5658
super.attemptSucceeded();
57-
if (gfeLatency != null) {
58-
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
59-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
60-
}
59+
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.OK.toString());
60+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
61+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
6162
}
6263

6364
/**
@@ -67,10 +68,9 @@ public void attemptSucceeded() {
6768
@Override
6869
public void attemptCancelled() {
6970
super.attemptCancelled();
70-
if (gfeLatency != null) {
71-
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
72-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
73-
}
71+
attributes.put(STATUS_ATTRIBUTE, StatusCode.Code.CANCELLED.toString());
72+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
73+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
7474
}
7575

7676
/**
@@ -84,10 +84,9 @@ public void attemptCancelled() {
8484
@Override
8585
public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
8686
super.attemptFailedDuration(error, delay);
87-
if (gfeLatency != null) {
88-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
89-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
90-
}
87+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
88+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
89+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
9190
}
9291

9392
/**
@@ -100,10 +99,9 @@ public void attemptFailedDuration(Throwable error, java.time.Duration delay) {
10099
@Override
101100
public void attemptFailedRetriesExhausted(Throwable error) {
102101
super.attemptFailedRetriesExhausted(error);
103-
if (gfeLatency != null) {
104-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
105-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
106-
}
102+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
103+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
104+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
107105
}
108106

109107
/**
@@ -116,16 +114,27 @@ public void attemptFailedRetriesExhausted(Throwable error) {
116114
@Override
117115
public void attemptPermanentFailure(Throwable error) {
118116
super.attemptPermanentFailure(error);
119-
if (gfeLatency != null) {
120-
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
121-
builtInOpenTelemetryMetricsRecorder.recordGFELatency(gfeLatency, attributes);
122-
}
117+
attributes.put(STATUS_ATTRIBUTE, extractStatus(error));
118+
builtInOpenTelemetryMetricsRecorder.recordServerTimingHeaderMetrics(
119+
gfeLatency, afeLatency, gfeHeaderMissingCount, afeHeaderMissingCount, attributes);
123120
}
124121

125122
void recordGFELatency(Long gfeLatency) {
126123
this.gfeLatency = gfeLatency;
127124
}
128125

126+
void recordAFELatency(Long afeLatency) {
127+
this.afeLatency = afeLatency;
128+
}
129+
130+
void recordGfeHeaderMissingCount(Long value) {
131+
this.gfeHeaderMissingCount = value;
132+
}
133+
134+
void recordAfeHeaderMissingCount(Long value) {
135+
this.afeHeaderMissingCount = value;
136+
}
137+
129138
@Override
130139
public void addAttributes(Map<String, String> attributes) {
131140
super.addAttributes(attributes);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/CompositeTracer.java

+24
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,28 @@ public void recordGFELatency(Long gfeLatency) {
198198
}
199199
}
200200
}
201+
202+
public void recordGfeHeaderMissingCount(Long value) {
203+
for (ApiTracer child : children) {
204+
if (child instanceof BuiltInMetricsTracer) {
205+
((BuiltInMetricsTracer) child).recordGfeHeaderMissingCount(value);
206+
}
207+
}
208+
}
209+
210+
public void recordAFELatency(Long afeLatency) {
211+
for (ApiTracer child : children) {
212+
if (child instanceof BuiltInMetricsTracer) {
213+
((BuiltInMetricsTracer) child).recordAFELatency(afeLatency);
214+
}
215+
}
216+
}
217+
218+
public void recordAfeHeaderMissingCount(Long value) {
219+
for (ApiTracer child : children) {
220+
if (child instanceof BuiltInMetricsTracer) {
221+
((BuiltInMetricsTracer) child).recordAfeHeaderMissingCount(value);
222+
}
223+
}
224+
}
201225
}

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

+7
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,10 @@ private static boolean isEmulatorEnabled(SpannerOptions options, String emulator
689689
&& options.getHost().endsWith(emulatorHost);
690690
}
691691

692+
public static boolean isEnableAFEServerTiming() {
693+
return "false".equalsIgnoreCase(System.getenv("SPANNER_DISABLE_AFE_SERVER_TIMING"));
694+
}
695+
692696
private static final RetrySettings ADMIN_REQUESTS_LIMIT_EXCEEDED_RETRY_SETTINGS =
693697
RetrySettings.newBuilder()
694698
.setInitialRetryDelayDuration(Duration.ofSeconds(5L))
@@ -1993,6 +1997,9 @@ private GrpcCallContext createBaseCallContext() {
19931997
if (endToEndTracingEnabled) {
19941998
context = context.withExtraHeaders(metadataProvider.newEndToEndTracingHeader());
19951999
}
2000+
if (isEnableAFEServerTiming()) {
2001+
context = context.withExtraHeaders(metadataProvider.newAfeServerTimingHeader());
2002+
}
19962003
return context
19972004
.withStreamWaitTimeoutDuration(waitTimeout)
19982005
.withStreamIdleTimeoutDuration(idleTimeout);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/HeaderInterceptor.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class HeaderInterceptor implements ClientInterceptor {
7272
private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
7373
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
7474
private static final String GFE_TIMING_HEADER = "gfet4t7";
75+
private static final String AFE_TIMING_HEADER = "afe";
7576
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
7677
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
7778
private static final Pattern SERVER_TIMING_PATTERN =
@@ -174,13 +175,25 @@ private void processHeader(
174175
if (compositeTracer != null) {
175176
compositeTracer.recordGFELatency(gfeLatency);
176177
}
177-
178178
if (span != null) {
179179
span.setAttribute("gfe_latency", String.valueOf(gfeLatency));
180180
}
181181
} else {
182182
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
183183
spannerRpcMetrics.recordGfeHeaderMissingCount(1L, attributes);
184+
if (compositeTracer != null) {
185+
compositeTracer.recordGfeHeaderMissingCount(1L);
186+
}
187+
}
188+
189+
// Record AFE metrics
190+
if (compositeTracer != null && GapicSpannerRpc.isEnableAFEServerTiming()) {
191+
if (serverTimingMetrics.containsKey(AFE_TIMING_HEADER)) {
192+
long afeLatency = serverTimingMetrics.get(AFE_TIMING_HEADER);
193+
compositeTracer.recordAFELatency(afeLatency);
194+
} else {
195+
compositeTracer.recordAfeHeaderMissingCount(1L);
196+
}
184197
}
185198
} catch (NumberFormatException e) {
186199
LOGGER.log(LEVEL, "Invalid server-timing object in header: {}", serverTiming);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/SpannerMetadataProvider.java

+8
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class SpannerMetadataProvider {
3838
private final String resourceHeaderKey;
3939
private static final String ROUTE_TO_LEADER_HEADER_KEY = "x-goog-spanner-route-to-leader";
4040
private static final String END_TO_END_TRACING_HEADER_KEY = "x-goog-spanner-end-to-end-tracing";
41+
private static final String AFE_SERVER_TIMING_HEADER_KEY =
42+
"x-goog-spanner-enable-afe-server-timing";
4143
private static final Pattern[] RESOURCE_TOKEN_PATTERNS = {
4244
Pattern.compile("^(?<headerValue>projects/[^/]*/instances/[^/]*/databases/[^/]*)(.*)?"),
4345
Pattern.compile("^(?<headerValue>projects/[^/]*/instances/[^/]*)(.*)?")
@@ -47,6 +49,8 @@ class SpannerMetadataProvider {
4749
ImmutableMap.of(ROUTE_TO_LEADER_HEADER_KEY, Collections.singletonList("true"));
4850
private static final Map<String, List<String>> END_TO_END_TRACING_HEADER_MAP =
4951
ImmutableMap.of(END_TO_END_TRACING_HEADER_KEY, Collections.singletonList("true"));
52+
private static final Map<String, List<String>> AFE_SERVER_TIMING_HEADER_MAP =
53+
ImmutableMap.of(AFE_SERVER_TIMING_HEADER_KEY, Collections.singletonList("true"));
5054

5155
private SpannerMetadataProvider(Map<String, String> headers, String resourceHeaderKey) {
5256
this.resourceHeaderKey = resourceHeaderKey;
@@ -96,6 +100,10 @@ Map<String, List<String>> newEndToEndTracingHeader() {
96100
return END_TO_END_TRACING_HEADER_MAP;
97101
}
98102

103+
Map<String, List<String>> newAfeServerTimingHeader() {
104+
return AFE_SERVER_TIMING_HEADER_MAP;
105+
}
106+
99107
private Map<Metadata.Key<String>, String> constructHeadersAsMetadata(
100108
Map<String, String> headers) {
101109
ImmutableMap.Builder<Metadata.Key<String>, String> headersAsMetadataBuilder =

google-cloud-spanner/src/test/java/com/google/cloud/spanner/AbstractNettyMockServerTest.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ abstract class AbstractNettyMockServerTest {
4747
protected static AtomicInteger fakeServerTiming =
4848
new AtomicInteger(new Random().nextInt(1000) + 1);
4949

50+
protected static AtomicInteger fakeAFEServerTiming =
51+
new AtomicInteger(new Random().nextInt(500) + 1);
52+
5053
protected Spanner spanner;
5154

5255
@BeforeClass
@@ -72,7 +75,9 @@ public <ReqT, RespT> ServerCall.Listener<ReqT> interceptCall(
7275
public void sendHeaders(Metadata headers) {
7376
headers.put(
7477
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER),
75-
String.format("gfet4t7; dur=%d", fakeServerTiming.get()));
78+
String.format(
79+
"afe; dur=%d, gfet4t7; dur=%d",
80+
fakeAFEServerTiming.get(), fakeServerTiming.get()));
7681
super.sendHeaders(headers);
7782
}
7883
},

0 commit comments

Comments
 (0)