From 9ca3f51efe5eb65873ebcbc8d846e378007a2591 Mon Sep 17 00:00:00 2001 From: David Zane Date: Tue, 6 Feb 2024 18:23:20 -0800 Subject: [PATCH] Tracing for deep search path Signed-off-by: David Zane --- CHANGELOG.md | 1 + .../tracing/SpanCreationContext.java | 5 + .../search/AbstractSearchAsyncAction.java | 1 + .../action/search/SearchRequestContext.java | 4 +- .../action/search/TransportSearchAction.java | 110 +++++++++++------- .../telemetry/tracing/AttributeNames.java | 25 ++++ .../telemetry/tracing/SpanBuilder.java | 49 ++++++++ ...ceableSearchRequestOperationsListener.java | 72 ++++++++++++ .../snapshots/SnapshotResiliencyTests.java | 3 +- .../telemetry/tracing/SpanBuilderTests.java | 19 +++ 10 files changed, 247 insertions(+), 42 deletions(-) create mode 100644 server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableSearchRequestOperationsListener.java diff --git a/CHANGELOG.md b/CHANGELOG.md index da03b2e4430a6..4a908d1b27007 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625)) - [Admission Control] Integrate CPU AC with ResourceUsageCollector and add CPU AC stats to nodes/stats ([#10887](https://github.com/opensearch-project/OpenSearch/pull/10887)) - [S3 Repository] Add setting to control connection count for sync client ([#12028](https://github.com/opensearch-project/OpenSearch/pull/12028)) +- Tracing for deep search path ([#12103](https://github.com/opensearch-project/OpenSearch/pull/12103)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanCreationContext.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanCreationContext.java index cbbcfe7a85d57..e416945b6ba84 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanCreationContext.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanCreationContext.java @@ -28,6 +28,11 @@ public final class SpanCreationContext { */ private SpanCreationContext() {} + SpanCreationContext(String spanName, Attributes attributes) { + this.spanName = spanName; + this.attributes = attributes; + } + /** * Sets the span type to server. * @return spanCreationContext diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 3c27d3ce59e4c..e4db3bc997834 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -220,6 +220,7 @@ public final void start() { null ) ); + onRequestEnd(searchRequestContext); return; } executePhase(this); diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index 383d9b5e82fe2..4759fb090aacf 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -78,7 +78,7 @@ void setTotalHits(TotalHits totalHits) { this.totalHits = totalHits; } - TotalHits totalHits() { + public TotalHits totalHits() { return totalHits; } @@ -89,7 +89,7 @@ void setShardStats(int total, int successful, int skipped, int failed) { this.shardStats.put(ShardStatsFieldNames.SEARCH_REQUEST_SLOWLOG_SHARD_FAILED, failed); } - String formattedShardStats() { + public String formattedShardStats() { if (shardStats.isEmpty()) { return ""; } else { diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 79e599ec9387b..5df19f46a3b7a 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -88,6 +88,12 @@ import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; +import org.opensearch.telemetry.tracing.listener.TraceableSearchRequestOperationsListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.RemoteClusterAware; import org.opensearch.transport.RemoteClusterService; @@ -173,6 +179,7 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -215,6 +223,7 @@ public TransportSearchAction( this.searchRequestOperationsCompositeListenerFactory = searchRequestOperationsCompositeListenerFactory; clusterService.getClusterSettings() .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); + this.tracer = tracer; } private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { @@ -431,49 +440,72 @@ private void executeRequest( if (originalSearchRequest.isPhaseTook() == null) { originalSearchRequest.setPhaseTook(clusterService.getClusterSettings().get(SEARCH_PHASE_TOOK_ENABLED)); } - SearchRequestOperationsListener.CompositeListener requestOperationsListeners = searchRequestOperationsCompositeListenerFactory - .buildCompositeListener(originalSearchRequest, logger); - SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest); - searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); - - PipelinedRequest searchRequest; - ActionListener listener; - try { - searchRequest = searchPipelineService.resolvePipeline(originalSearchRequest); - listener = searchRequest.transformResponseListener(originalListener); - } catch (Exception e) { - originalListener.onFailure(e); - return; - } - - ActionListener requestTransformListener = ActionListener.wrap(sr -> { - if (searchQueryMetricsEnabled) { - try { - searchQueryCategorizer.categorize(sr.source()); - } catch (Exception e) { - logger.error("Error while trying to categorize the query.", e); - } - } - ActionListener rewriteListener = buildRewriteListener( - sr, - task, - timeProvider, - searchAsyncActionProvider, - listener, - searchRequestContext + final Span requestSpan = tracer.startSpan(SpanBuilder.from(task, actionName)); + try (final SpanScope spanScope = tracer.withSpanInScope(requestSpan)) { + originalListener = TraceableActionListener.create( + originalListener, + requestSpan, + tracer ); - if (sr.source() == null) { - rewriteListener.onResponse(sr.source()); + + SearchRequestOperationsListener.CompositeListener requestOperationsListeners; + if (tracer.isRecording()) { + TraceableSearchRequestOperationsListener traceableSearchRequestOperationsListener = new TraceableSearchRequestOperationsListener(tracer, requestSpan); + requestOperationsListeners = searchRequestOperationsCompositeListenerFactory.buildCompositeListener( + originalSearchRequest, + logger, + traceableSearchRequestOperationsListener + ); } else { - Rewriteable.rewriteAndFetch( - sr.source(), - searchService.getRewriteContext(timeProvider::getAbsoluteStartMillis), - rewriteListener + requestOperationsListeners = searchRequestOperationsCompositeListenerFactory.buildCompositeListener( + originalSearchRequest, + logger ); } - }, listener::onFailure); - searchRequest.transformRequest(requestTransformListener); + SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest); + searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); + + PipelinedRequest searchRequest; + ActionListener listener; + + try { + searchRequest = searchPipelineService.resolvePipeline(originalSearchRequest); + listener = searchRequest.transformResponseListener(originalListener); + } catch (Exception e) { + originalListener.onFailure(e); + return; + } + + ActionListener requestTransformListener = ActionListener.wrap(sr -> { + if (searchQueryMetricsEnabled) { + try { + searchQueryCategorizer.categorize(sr.source()); + } catch (Exception e) { + logger.error("Error while trying to categorize the query.", e); + } + } + + ActionListener rewriteListener = buildRewriteListener( + sr, + task, + timeProvider, + searchAsyncActionProvider, + listener, + searchRequestContext + ); + if (sr.source() == null) { + rewriteListener.onResponse(sr.source()); + } else { + Rewriteable.rewriteAndFetch( + sr.source(), + searchService.getRewriteContext(timeProvider::getAbsoluteStartMillis), + rewriteListener + ); + } + }, listener::onFailure); + searchRequest.transformRequest(requestTransformListener); + } } private ActionListener buildRewriteListener( diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index b6b2cf360d1c5..5b1ff5422164c 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -70,6 +70,16 @@ private AttributeNames() { */ public static final String TRANSPORT_ACTION = "action"; + /** + * Task id + */ + public static final String TASK_ID = "task_id"; + + /** + * Parent task id + */ + public static final String PARENT_TASK_ID = "p_task_id"; + /** * Index Name */ @@ -94,4 +104,19 @@ private AttributeNames() { * Refresh Policy */ public static final String REFRESH_POLICY = "refresh_policy"; + + /** + * Search Request Source + */ + public static final String SOURCE = "source"; + + /** + * Search Response Shard Stats + */ + public static final String SHARDS = "shards"; + + /** + * Search Response Total Hits + */ + public static final String TOTAL_HITS = "total_hits"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index 1dce422943b7a..1f333e8d57761 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -14,6 +14,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; import org.opensearch.rest.RestRequest; +import org.opensearch.tasks.Task; import org.opensearch.telemetry.tracing.attributes.Attributes; import org.opensearch.transport.TcpChannel; import org.opensearch.transport.Transport; @@ -42,6 +43,15 @@ private SpanBuilder() { } + /** + * Creates {@link SpanCreationContext} from String + * @param spanName String. + * @return context. + */ + public static SpanCreationContext from(String spanName) { + return SpanCreationContext.server().name(spanName); + } + /** * Creates {@link SpanCreationContext} from the {@link HttpRequest} * @param request Http request. @@ -170,4 +180,43 @@ private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequ return attributes; } + /** + * Creates {@link SpanCreationContext} with parent set to specified SpanContext. + * @param spanName name of span. + * @param parentSpan target parent span. + * @return context + */ + public static SpanCreationContext from(String spanName, SpanContext parentSpan) { + return SpanCreationContext.server().name(spanName).parent(parentSpan); + } + + /** + * Creates {@link SpanCreationContext} with parent set to specified SpanContext. + * @param task search task. + * @param actionName action. + * @return context + */ + public static SpanCreationContext from(Task task, String actionName) { + return new SpanCreationContext(createSpanName(task, actionName), buildSpanAttributes(task, actionName)); + } + + private static Attributes buildSpanAttributes(Task task, String actionName) { + Attributes attributes = Attributes.create().addAttribute(AttributeNames.TRANSPORT_ACTION, actionName); + if (task != null) { + attributes.addAttribute(AttributeNames.TASK_ID, task.getId()); + if (task.getParentTaskId() != null && task.getParentTaskId().isSet()) { + attributes.addAttribute(AttributeNames.PARENT_TASK_ID, task.getParentTaskId().getId()); + } + } + return attributes; + + } + + private static String createSpanName(Task task, String actionName) { + if (task != null) { + return task.getType() + SEPARATOR + task.getAction(); + } else { + return actionName; + } + } } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableSearchRequestOperationsListener.java b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableSearchRequestOperationsListener.java new file mode 100644 index 0000000000000..97da54f29250a --- /dev/null +++ b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableSearchRequestOperationsListener.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.tracing.listener; + +import org.opensearch.action.search.SearchPhaseContext; +import org.opensearch.action.search.SearchRequestContext; +import org.opensearch.action.search.SearchRequestOperationsListener; +import org.opensearch.telemetry.tracing.AttributeNames; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanContext; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.noop.NoopSpan; + +import static org.opensearch.core.common.Strings.capitalize; + +/** + * SearchRequestOperationsListener subscriber for search request tracing + * + * @opensearch.internal + */ +public final class TraceableSearchRequestOperationsListener extends SearchRequestOperationsListener { + private final Tracer tracer; + private final Span requestSpan; + private Span phaseSpan; + private SpanScope phaseSpanScope; + + public TraceableSearchRequestOperationsListener(final Tracer tracer, final Span requestSpan) { + this.tracer = tracer; + this.requestSpan = requestSpan; + this.phaseSpan = NoopSpan.INSTANCE; + } + + @Override + protected void onPhaseStart(SearchPhaseContext context) { + phaseSpan = tracer.startSpan( + SpanBuilder.from("coord" + capitalize(context.getCurrentPhase().getName()), new SpanContext(requestSpan)) + ); + phaseSpanScope = tracer.withSpanInScope(phaseSpan); + } + + @Override + protected void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + phaseSpan.endSpan(); + phaseSpanScope.close(); + } + + @Override + protected void onPhaseFailure(SearchPhaseContext context) { + phaseSpan.endSpan(); + phaseSpanScope.close(); + } + + @Override + public void onRequestStart(SearchRequestContext searchRequestContext) {} + + @Override + public void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + // add response-related attributes on request end + requestSpan.addAttribute( + AttributeNames.TOTAL_HITS, + searchRequestContext.totalHits() == null ? 0 : searchRequestContext.totalHits().value + ); + } +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 7c50e961853b5..4b43b5f9c3ff5 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2310,7 +2310,8 @@ public void onFailure(final Exception e) { client ), NoopMetricsRegistry.INSTANCE, - searchRequestOperationsCompositeListenerFactory + searchRequestOperationsCompositeListenerFactory, + NoopTracer.INSTANCE ) ); actions.put( diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/SpanBuilderTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/SpanBuilderTests.java index b4183412cdf02..d1ead0e25ef8a 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/SpanBuilderTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/SpanBuilderTests.java @@ -19,6 +19,7 @@ import org.opensearch.http.HttpResponse; import org.opensearch.rest.RestRequest; import org.opensearch.telemetry.tracing.attributes.Attributes; +import org.opensearch.telemetry.tracing.noop.NoopSpan; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.transport.Transport; import org.opensearch.transport.TransportException; @@ -32,6 +33,14 @@ public class SpanBuilderTests extends OpenSearchTestCase { + public void testString() { + String spanName = "test-name"; + SpanCreationContext context = SpanBuilder.from(spanName); + Attributes attributes = context.getAttributes(); + assertEquals(spanName, context.getSpanName()); + assertNull(attributes); + } + public void testHttpRequestContext() { HttpRequest httpRequest = createHttpRequest(); SpanCreationContext context = SpanBuilder.from(httpRequest); @@ -67,6 +76,16 @@ public void testTransportContext() { assertEquals(connection.getNode().getHostAddress(), attributes.getAttributesMap().get(AttributeNames.TRANSPORT_TARGET_HOST)); } + public void testParentSpan() { + String spanName = "test-name"; + SpanContext parentSpanContext = new SpanContext(NoopSpan.INSTANCE); + SpanCreationContext context = SpanBuilder.from(spanName, parentSpanContext); + Attributes attributes = context.getAttributes(); + assertNull(attributes); + assertEquals(spanName, context.getSpanName()); + assertEquals(parentSpanContext, context.getParent()); + } + private static Transport.Connection createTransportConnection() { return new Transport.Connection() { @Override