From adeda2138cbc8da7a63ce1754e81bf581d478b8f Mon Sep 17 00:00:00 2001 From: David Zane Date: Tue, 30 Jan 2024 15:42:44 -0800 Subject: [PATCH] Tracing for deep search path Signed-off-by: David Zane --- CHANGELOG.md | 1 + .../search/AbstractSearchAsyncAction.java | 17 +++-- .../action/search/SearchRequestContext.java | 75 +++++++++++++++++-- .../search/SearchRequestCoordinatorTrace.java | 71 ++++++++++++++++++ .../SearchRequestOperationsListener.java | 18 ++--- .../action/search/SearchRequestSlowLog.java | 50 ++++++------- .../action/search/SearchRequestStats.java | 4 +- .../action/search/TransportSearchAction.java | 21 +++++- .../main/java/org/opensearch/node/Node.java | 5 +- .../telemetry/tracing/AttributeNames.java | 15 ++++ .../telemetry/tracing/SpanBuilder.java | 40 +++++++++- .../AbstractSearchAsyncActionTests.java | 6 +- ...erationsCompositeListenerFactoryTests.java | 4 +- ...earchRequestOperationsListenerSupport.java | 2 +- .../SearchRequestOperationsListenerTests.java | 6 +- .../search/SearchRequestSlowLogTests.java | 33 +++----- .../search/SearchRequestStatsTests.java | 12 +-- .../snapshots/SnapshotResiliencyTests.java | 3 +- 18 files changed, 291 insertions(+), 92 deletions(-) create mode 100644 server/src/main/java/org/opensearch/action/search/SearchRequestCoordinatorTrace.java diff --git a/CHANGELOG.md b/CHANGELOG.md index f9c2eda92ec73..7fcc7a7a8b9fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625)) - [Admission Control] Integrate CPU AC with ResourceUsageCollector and add CPU AC stats to nodes/stats ([#10887](https://github.com/opensearch-project/OpenSearch/pull/10887)) - [S3 Repository] Add setting to control connection count for sync client ([#12028](https://github.com/opensearch-project/OpenSearch/pull/12028)) +- Tracing for deep search path ([#12099](https://github.com/opensearch-project/OpenSearch/pull/12099)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 3c27d3ce59e4c..6db44d580f518 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -58,6 +58,7 @@ import org.opensearch.search.internal.SearchContext; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.search.pipeline.PipelinedRequest; +import org.opensearch.telemetry.tracing.SpanScope; import org.opensearch.transport.Transport; import java.util.ArrayDeque; @@ -220,6 +221,7 @@ public final void start() { null ) ); + searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchRequestContext); return; } executePhase(this); @@ -439,21 +441,23 @@ private void onPhaseEnd(SearchRequestContext searchRequestContext) { } } - void onPhaseStart(SearchPhase phase) { + void onPhaseStart(SearchPhase phase, SearchRequestContext searchRequestContext) { setCurrentPhase(phase); if (SearchPhaseName.isValidName(phase.getName())) { - this.searchRequestContext.getSearchRequestOperationsListener().onPhaseStart(this); + this.searchRequestContext.getSearchRequestOperationsListener().onPhaseStart(this, searchRequestContext); } } private void onRequestEnd(SearchRequestContext searchRequestContext) { - this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(this, searchRequestContext); + this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchRequestContext); } private void executePhase(SearchPhase phase) { try { - onPhaseStart(phase); - phase.recordAndRun(); + onPhaseStart(phase, searchRequestContext); + try (SpanScope spanScope = searchRequestContext.getTracer().withSpanInScope(searchRequestContext.getPhaseSpan())) { + phase.recordAndRun(); + } } catch (Exception e) { if (logger.isDebugEnabled()) { logger.debug(new ParameterizedMessage("Failed to execute [{}] while moving to [{}] phase", request, phase.getName()), e); @@ -705,6 +709,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At searchContextId = null; } } + searchRequestContext.setSearchTask(getTask()); searchRequestContext.setTotalHits(internalSearchResponse.hits().getTotalHits()); searchRequestContext.setShardStats(results.getNumShards(), successfulOps.get(), skippedOps.get(), failures.length); onPhaseEnd(searchRequestContext); @@ -717,7 +722,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At @Override public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) { if (SearchPhaseName.isValidName(phase.getName())) { - this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this); + this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this, searchRequestContext); } raisePhaseFailure(new SearchPhaseExecutionException(phase.getName(), msg, cause, buildShardFailures())); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index eceac7204b196..6fe54a1435897 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -8,11 +8,17 @@ package org.opensearch.action.search; +import org.apache.logging.log4j.LogManager; import org.apache.lucene.search.TotalHits; import org.opensearch.common.annotation.InternalApi; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.noop.NoopSpan; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import java.util.EnumMap; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -23,20 +29,59 @@ */ @InternalApi class SearchRequestContext { + private final SearchRequest searchRequest; + private SearchTask searchTask; private final SearchRequestOperationsListener searchRequestOperationsListener; private long absoluteStartNanos; private final Map phaseTookMap; private TotalHits totalHits; private final EnumMap shardStats; + private Tracer tracer; + private Span requestSpan; + private Span phaseSpan; - private final SearchRequest searchRequest; + /** + * This constructor is for testing only + */ + SearchRequestContext() { + this(new SearchRequest()); + } + + /** + * This constructor is for testing only + */ + SearchRequestContext(SearchRequest searchRequest) { + this(new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), searchRequest); + } + + /** + * This constructor is for testing only + */ + SearchRequestContext(SearchRequestOperationsListener searchRequestOperationsListener, SearchRequest searchRequest) { + this(searchRequestOperationsListener, searchRequest, NoopTracer.INSTANCE); + } - SearchRequestContext(final SearchRequestOperationsListener searchRequestOperationsListener, final SearchRequest searchRequest) { + SearchRequestContext(SearchRequestOperationsListener searchRequestOperationsListener, SearchRequest searchRequest, Tracer tracer) { this.searchRequestOperationsListener = searchRequestOperationsListener; + this.searchRequest = searchRequest; + this.tracer = tracer; this.absoluteStartNanos = System.nanoTime(); this.phaseTookMap = new HashMap<>(); this.shardStats = new EnumMap<>(ShardStatsFieldNames.class); - this.searchRequest = searchRequest; + this.requestSpan = NoopSpan.INSTANCE; + this.phaseSpan = NoopSpan.INSTANCE; + } + + public SearchRequest getSearchRequest() { + return searchRequest; + } + + public void setSearchTask(SearchTask searchTask) { + this.searchTask = searchTask; + } + + public SearchTask getSearchTask() { + return searchTask; } SearchRequestOperationsListener getSearchRequestOperationsListener() { @@ -78,7 +123,7 @@ void setTotalHits(TotalHits totalHits) { this.totalHits = totalHits; } - TotalHits totalHits() { + public TotalHits totalHits() { return totalHits; } @@ -89,7 +134,7 @@ void setShardStats(int total, int successful, int skipped, int failed) { this.shardStats.put(ShardStatsFieldNames.SEARCH_REQUEST_SLOWLOG_SHARD_FAILED, failed); } - String formattedShardStats() { + public String formattedShardStats() { if (shardStats.isEmpty()) { return ""; } else { @@ -107,6 +152,26 @@ String formattedShardStats() { ); } } + + public Tracer getTracer() { + return tracer; + } + + public void setRequestSpan(Span requestSpan) { + this.requestSpan = requestSpan; + } + + public Span getRequestSpan() { + return requestSpan; + } + + public void setPhaseSpan(Span phaseSpan) { + this.phaseSpan = phaseSpan; + } + + public Span getPhaseSpan() { + return phaseSpan; + } } enum ShardStatsFieldNames { diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestCoordinatorTrace.java b/server/src/main/java/org/opensearch/action/search/SearchRequestCoordinatorTrace.java new file mode 100644 index 0000000000000..248dfe6c59e31 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestCoordinatorTrace.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.telemetry.tracing.AttributeNames; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanContext; +import org.opensearch.telemetry.tracing.Tracer; + +import static org.opensearch.core.common.Strings.capitalize; + +/** + * Listener for search request tracing on the coordinator node + * + * @opensearch.internal + */ +public final class SearchRequestCoordinatorTrace extends SearchRequestOperationsListener { + private final Tracer tracer; + + public SearchRequestCoordinatorTrace(Tracer tracer) { + this.tracer = tracer; + } + + @Override + void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + searchRequestContext.setPhaseSpan( + tracer.startSpan( + SpanBuilder.from( + "coord" + capitalize(context.getCurrentPhase().getName()), + new SpanContext(searchRequestContext.getRequestSpan()) + ) + ) + ); + } + + @Override + void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + searchRequestContext.getPhaseSpan().endSpan(); + } + + @Override + void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + searchRequestContext.getPhaseSpan().endSpan(); + } + + @Override + void onRequestEnd(SearchRequestContext searchRequestContext) { + Span requestSpan = searchRequestContext.getRequestSpan(); + + // add response-related attributes on request end + requestSpan.addAttribute( + AttributeNames.TOTAL_HITS, + searchRequestContext.totalHits() == null ? "0" : searchRequestContext.totalHits().toString() + ); + requestSpan.addAttribute( + AttributeNames.SHARDS, + searchRequestContext.formattedShardStats().isEmpty() ? "null" : searchRequestContext.formattedShardStats() + ); + requestSpan.addAttribute( + AttributeNames.SOURCE, + searchRequestContext.getSearchRequest().source() == null ? "null" : searchRequestContext.getSearchRequest().source().toString() + ); + } +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java index 2a09cc084f79f..7eaa78588ac5b 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java @@ -31,15 +31,15 @@ protected SearchRequestOperationsListener(final boolean enabled) { this.enabled = enabled; } - abstract void onPhaseStart(SearchPhaseContext context); + abstract void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext); abstract void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext); - abstract void onPhaseFailure(SearchPhaseContext context); + abstract void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext); void onRequestStart(SearchRequestContext searchRequestContext) {} - void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} + void onRequestEnd(SearchRequestContext searchRequestContext) {} boolean isEnabled(SearchRequest searchRequest) { return isEnabled(); @@ -69,10 +69,10 @@ static final class CompositeListener extends SearchRequestOperationsListener { } @Override - void onPhaseStart(SearchPhaseContext context) { + void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) { for (SearchRequestOperationsListener listener : listeners) { try { - listener.onPhaseStart(context); + listener.onPhaseStart(context, searchRequestContext); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("onPhaseStart listener [{}] failed", listener), e); } @@ -91,10 +91,10 @@ void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestCo } @Override - void onPhaseFailure(SearchPhaseContext context) { + void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { for (SearchRequestOperationsListener listener : listeners) { try { - listener.onPhaseFailure(context); + listener.onPhaseFailure(context, searchRequestContext); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("onPhaseFailure listener [{}] failed", listener), e); } @@ -113,10 +113,10 @@ void onRequestStart(SearchRequestContext searchRequestContext) { } @Override - public void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + public void onRequestEnd(SearchRequestContext searchRequestContext) { for (SearchRequestOperationsListener listener : listeners) { try { - listener.onRequestEnd(context, searchRequestContext); + listener.onRequestEnd(searchRequestContext); } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("onRequestEnd listener [{}] failed", listener), e); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestSlowLog.java b/server/src/main/java/org/opensearch/action/search/SearchRequestSlowLog.java index 7f25f9026f215..621bff9595ced 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestSlowLog.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestSlowLog.java @@ -134,29 +134,29 @@ public SearchRequestSlowLog(ClusterService clusterService) { } @Override - void onPhaseStart(SearchPhaseContext context) {} + void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} @Override void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} @Override - void onPhaseFailure(SearchPhaseContext context) {} + void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} @Override void onRequestStart(SearchRequestContext searchRequestContext) {} @Override - void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + void onRequestEnd(SearchRequestContext searchRequestContext) { long tookInNanos = System.nanoTime() - searchRequestContext.getAbsoluteStartNanos(); if (warnThreshold >= 0 && tookInNanos > warnThreshold && level.isLevelEnabledFor(SlowLogLevel.WARN)) { - logger.warn(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext)); + logger.warn(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext)); } else if (infoThreshold >= 0 && tookInNanos > infoThreshold && level.isLevelEnabledFor(SlowLogLevel.INFO)) { - logger.info(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext)); + logger.info(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext)); } else if (debugThreshold >= 0 && tookInNanos > debugThreshold && level.isLevelEnabledFor(SlowLogLevel.DEBUG)) { - logger.debug(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext)); + logger.debug(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext)); } else if (traceThreshold >= 0 && tookInNanos > traceThreshold && level.isLevelEnabledFor(SlowLogLevel.TRACE)) { - logger.trace(new SearchRequestSlowLogMessage(context, tookInNanos, searchRequestContext)); + logger.trace(new SearchRequestSlowLogMessage(tookInNanos, searchRequestContext)); } } @@ -167,15 +167,11 @@ void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequest */ static final class SearchRequestSlowLogMessage extends OpenSearchLogMessage { - SearchRequestSlowLogMessage(SearchPhaseContext context, long tookInNanos, SearchRequestContext searchRequestContext) { - super(prepareMap(context, tookInNanos, searchRequestContext), message(context, tookInNanos, searchRequestContext)); + SearchRequestSlowLogMessage(long tookInNanos, SearchRequestContext searchRequestContext) { + super(prepareMap(tookInNanos, searchRequestContext), message(tookInNanos, searchRequestContext)); } - private static Map prepareMap( - SearchPhaseContext context, - long tookInNanos, - SearchRequestContext searchRequestContext - ) { + private static Map prepareMap(long tookInNanos, SearchRequestContext searchRequestContext) { final Map messageFields = new HashMap<>(); messageFields.put("took", TimeValue.timeValueNanos(tookInNanos)); messageFields.put("took_millis", TimeUnit.NANOSECONDS.toMillis(tookInNanos)); @@ -185,22 +181,24 @@ private static Map prepareMap( } else { messageFields.put("total_hits", "-1"); } - messageFields.put("search_type", context.getRequest().searchType()); + messageFields.put("search_type", searchRequestContext.getSearchRequest().searchType()); messageFields.put("shards", searchRequestContext.formattedShardStats()); - - if (context.getRequest().source() != null) { - String source = escapeJson(context.getRequest().source().toString(FORMAT_PARAMS)); + if (searchRequestContext.getSearchRequest().source() != null) { + String source = escapeJson(searchRequestContext.getSearchRequest().source().toString(FORMAT_PARAMS)); messageFields.put("source", source); } else { messageFields.put("source", "{}"); } - - messageFields.put("id", context.getTask().getHeader(Task.X_OPAQUE_ID)); + if (searchRequestContext.getSearchTask() != null) { + messageFields.put("id", searchRequestContext.getSearchTask().getHeader(Task.X_OPAQUE_ID)); + } else { + messageFields.put("id", ""); + } return messageFields; } // Message will be used in plaintext logs - private static String message(SearchPhaseContext context, long tookInNanos, SearchRequestContext searchRequestContext) { + private static String message(long tookInNanos, SearchRequestContext searchRequestContext) { final StringBuilder sb = new StringBuilder(); sb.append("took[").append(TimeValue.timeValueNanos(tookInNanos)).append("], "); sb.append("took_millis[").append(TimeUnit.NANOSECONDS.toMillis(tookInNanos)).append("], "); @@ -210,15 +208,15 @@ private static String message(SearchPhaseContext context, long tookInNanos, Sear } else { sb.append("total_hits[-1]"); } - sb.append("search_type[").append(context.getRequest().searchType()).append("], "); + sb.append("search_type[").append(searchRequestContext.getSearchRequest().searchType()).append("], "); sb.append("shards[").append(searchRequestContext.formattedShardStats()).append("], "); - if (context.getRequest().source() != null) { - sb.append("source[").append(context.getRequest().source().toString(FORMAT_PARAMS)).append("], "); + if (searchRequestContext.getSearchRequest().source() != null) { + sb.append("source[").append(searchRequestContext.getSearchRequest().source().toString(FORMAT_PARAMS)).append("], "); } else { sb.append("source[], "); } - if (context.getTask().getHeader(Task.X_OPAQUE_ID) != null) { - sb.append("id[").append(context.getTask().getHeader(Task.X_OPAQUE_ID)).append("]"); + if (searchRequestContext.getSearchTask() != null) { + sb.append("id[").append(searchRequestContext.getSearchTask().getHeader(Task.X_OPAQUE_ID)).append("]"); } else { sb.append("id[]"); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java b/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java index 88d599a0dcdaa..739407b773fac 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java @@ -58,7 +58,7 @@ public long getPhaseMetric(SearchPhaseName searchPhaseName) { } @Override - void onPhaseStart(SearchPhaseContext context) { + void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) { phaseStatsMap.get(context.getCurrentPhase().getSearchPhaseName()).current.inc(); } @@ -71,7 +71,7 @@ void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestCo } @Override - void onPhaseFailure(SearchPhaseContext context) { + void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { phaseStatsMap.get(context.getCurrentPhase().getSearchPhaseName()).current.dec(); } diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 79e599ec9387b..2512988f56f01 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -88,6 +88,11 @@ import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.RemoteClusterAware; import org.opensearch.transport.RemoteClusterService; @@ -173,6 +178,7 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -215,6 +222,7 @@ public TransportSearchAction( this.searchRequestOperationsCompositeListenerFactory = searchRequestOperationsCompositeListenerFactory; clusterService.getClusterSettings() .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); + this.tracer = tracer; } private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { @@ -433,11 +441,16 @@ private void executeRequest( } SearchRequestOperationsListener.CompositeListener requestOperationsListeners = searchRequestOperationsCompositeListenerFactory .buildCompositeListener(originalSearchRequest, logger); - SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest); + SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest, tracer); searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); PipelinedRequest searchRequest; ActionListener listener; + + Span requestSpan = tracer.startSpan(SpanBuilder.from("coordReq")); + originalListener = TraceableActionListener.create(originalListener, requestSpan, tracer); + searchRequestContext.setRequestSpan(requestSpan); + try { searchRequest = searchPipelineService.resolvePipeline(originalSearchRequest); listener = searchRequest.transformResponseListener(originalListener); @@ -473,7 +486,9 @@ private void executeRequest( ); } }, listener::onFailure); - searchRequest.transformRequest(requestTransformListener); + try (SpanScope spanScope = tracer.withSpanInScope(requestSpan)) { + searchRequest.transformRequest(requestTransformListener); + } } private ActionListener buildRewriteListener( diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 8510122c39fcb..f640522c1ca07 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -46,6 +46,7 @@ import org.opensearch.action.admin.cluster.snapshots.status.TransportNodesSnapshotsStatus; import org.opensearch.action.search.SearchExecutionStatsCollector; import org.opensearch.action.search.SearchPhaseController; +import org.opensearch.action.search.SearchRequestCoordinatorTrace; import org.opensearch.action.search.SearchRequestOperationsCompositeListenerFactory; import org.opensearch.action.search.SearchRequestOperationsListener; import org.opensearch.action.search.SearchRequestSlowLog; @@ -787,6 +788,7 @@ protected Node( final SearchRequestStats searchRequestStats = new SearchRequestStats(clusterService.getClusterSettings()); final SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); + final SearchRequestCoordinatorTrace searchRequestCoordinatorTrace = new SearchRequestCoordinatorTrace(tracer); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, settings); final IndicesService indicesService = new IndicesService( @@ -885,7 +887,7 @@ protected Node( final SearchRequestOperationsCompositeListenerFactory searchRequestOperationsCompositeListenerFactory = new SearchRequestOperationsCompositeListenerFactory( Stream.concat( - Stream.of(searchRequestStats, searchRequestSlowLog), + Stream.of(searchRequestStats, searchRequestSlowLog, searchRequestCoordinatorTrace), pluginComponents.stream() .filter(p -> p instanceof SearchRequestOperationsListener) .map(p -> (SearchRequestOperationsListener) p) @@ -1284,6 +1286,7 @@ protected Node( b.bind(Tracer.class).toInstance(tracer); b.bind(SearchRequestStats.class).toInstance(searchRequestStats); b.bind(SearchRequestSlowLog.class).toInstance(searchRequestSlowLog); + b.bind(SearchRequestCoordinatorTrace.class).toInstance(searchRequestCoordinatorTrace); b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index b6b2cf360d1c5..024b74ab6d2d0 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -94,4 +94,19 @@ private AttributeNames() { * Refresh Policy */ public static final String REFRESH_POLICY = "refresh_policy"; + + /** + * Search Request Source + */ + public static final String SOURCE = "source"; + + /** + * Search Response Shard Stats + */ + public static final String SHARDS = "shards"; + + /** + * Search Response Total Hits + */ + public static final String TOTAL_HITS = "total_hits"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index 1dce422943b7a..f49ca1aabdd18 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -14,6 +14,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; import org.opensearch.rest.RestRequest; +import org.opensearch.search.internal.SearchContext; import org.opensearch.telemetry.tracing.attributes.Attributes; import org.opensearch.transport.TcpChannel; import org.opensearch.transport.Transport; @@ -42,6 +43,15 @@ private SpanBuilder() { } + /** + * Creates {@link SpanCreationContext} from String + * @param spanName String. + * @return context. + */ + public static SpanCreationContext from(String spanName) { + return SpanCreationContext.server().name(spanName); + } + /** * Creates {@link SpanCreationContext} from the {@link HttpRequest} * @param request Http request. @@ -67,7 +77,8 @@ public static SpanCreationContext from(RestRequest request) { * @return context */ public static SpanCreationContext from(String action, Transport.Connection connection) { - return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); + return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); // this + // one } public static SpanCreationContext from(String spanName, String nodeId, ReplicatedWriteRequest request) { @@ -170,4 +181,31 @@ private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequ return attributes; } + /** + * Creates {@link SpanCreationContext} with parent set to specified SpanContext. + * @param spanName name of span. + * @param parentSpan target parent span. + * @return context + */ + public static SpanCreationContext from(String spanName, SpanContext parentSpan) { + return SpanCreationContext.server().name(spanName).parent(parentSpan); + } + + /** + * Creates {@link SpanCreationContext} from SearchRequest. + * @param spanName name of span. + * @param searchContext SearchRequest object. + * @return context + */ + public static SpanCreationContext from(String spanName, SearchContext searchContext) { + return SpanCreationContext.server().name(spanName).attributes(buildSpanAttributes(searchContext)); + } + + private static Attributes buildSpanAttributes(SearchContext searchContext) { + Attributes attributes = Attributes.create() + .addAttribute(AttributeNames.SHARD_ID, searchContext.request().shardId().getId()) + .addAttribute(AttributeNames.INDEX, searchContext.request().shardId().getIndexName()); + return attributes; + } + } diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index a7cbbffc51ed4..799306ce1a735 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -420,17 +420,17 @@ public void testOnPhaseStart() { action.onPhaseStart(new SearchPhase("test") { @Override public void run() {} - }); + }, new SearchRequestContext()); action.onPhaseStart(new SearchPhase("none") { @Override public void run() {} - }); + }, new SearchRequestContext()); assertEquals(0, testListener.getPhaseCurrent(action.getSearchPhaseName())); action.onPhaseStart(new SearchPhase(action.getName()) { @Override public void run() {} - }); + }, new SearchRequestContext()); assertEquals(1, testListener.getPhaseCurrent(action.getSearchPhaseName())); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsCompositeListenerFactoryTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsCompositeListenerFactoryTests.java index 78c5ba4412c68..d36fa74fdaabb 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsCompositeListenerFactoryTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsCompositeListenerFactoryTests.java @@ -119,13 +119,13 @@ public void testStandardListenerAndPerRequestListenerDisabled() { public SearchRequestOperationsListener createTestSearchRequestOperationsListener() { return new SearchRequestOperationsListener() { @Override - void onPhaseStart(SearchPhaseContext context) {} + void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} @Override void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} @Override - void onPhaseFailure(SearchPhaseContext context) {} + void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} }; } } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java index 0f737e00478cb..6e4cfb70d204b 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java @@ -17,7 +17,7 @@ */ public interface SearchRequestOperationsListenerSupport { default void onPhaseStart(SearchRequestOperationsListener listener, SearchPhaseContext context) { - listener.onPhaseStart(context); + listener.onPhaseStart(context, new SearchRequestContext()); } default void onPhaseEnd(SearchRequestOperationsListener listener, SearchPhaseContext context) { diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerTests.java index a53c35a8401b3..b825fa43039bf 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerTests.java @@ -29,7 +29,7 @@ public void testListenersAreExecuted() { SearchRequestOperationsListener testListener = new SearchRequestOperationsListener() { @Override - public void onPhaseStart(SearchPhaseContext context) { + public void onPhaseStart(SearchPhaseContext context, SearchRequestContext searchRequestContext) { searchPhaseMap.get(context.getCurrentPhase().getSearchPhaseName()).current.inc(); } @@ -40,7 +40,7 @@ public void onPhaseEnd(SearchPhaseContext context, SearchRequestContext searchRe } @Override - public void onPhaseFailure(SearchPhaseContext context) { + public void onPhaseFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { searchPhaseMap.get(context.getCurrentPhase().getSearchPhaseName()).current.dec(); } }; @@ -62,7 +62,7 @@ public void onPhaseFailure(SearchPhaseContext context) { for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { when(ctx.getCurrentPhase()).thenReturn(searchPhase); when(searchPhase.getSearchPhaseName()).thenReturn(searchPhaseName); - compositeListener.onPhaseStart(ctx); + compositeListener.onPhaseStart(ctx, new SearchRequestContext()); assertEquals(totalListeners, searchPhaseMap.get(searchPhaseName).current.count()); } } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java index f009988ffae17..d5a4883da6a6c 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java @@ -114,7 +114,6 @@ public void testMultipleSlowLoggersUseSingleLog4jLogger() { public void testOnRequestEnd() throws InterruptedException { final Logger logger = mock(Logger.class); final SearchRequestContext searchRequestContext = mock(SearchRequestContext.class); - final SearchPhaseContext searchPhaseContext = mock(SearchPhaseContext.class); final SearchRequest searchRequest = mock(SearchRequest.class); final SearchTask searchTask = mock(SearchTask.class); @@ -132,11 +131,11 @@ public void testOnRequestEnd() throws InterruptedException { new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger) ); when(searchRequestContext.getAbsoluteStartNanos()).thenReturn(System.nanoTime() - 1L); - when(searchPhaseContext.getRequest()).thenReturn(searchRequest); - when(searchPhaseContext.getTask()).thenReturn(searchTask); + when(searchRequestContext.getSearchRequest()).thenReturn(searchRequest); + when(searchRequestContext.getSearchTask()).thenReturn(searchTask); when(searchRequest.searchType()).thenReturn(SearchType.QUERY_THEN_FETCH); - searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchPhaseContext, searchRequestContext); + searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(searchRequestContext); verify(logger, never()).warn(any(SearchRequestSlowLog.SearchRequestSlowLogMessage.class)); verify(logger, times(1)).info(any(SearchRequestSlowLog.SearchRequestSlowLogMessage.class)); @@ -146,7 +145,6 @@ public void testOnRequestEnd() throws InterruptedException { public void testConcurrentOnRequestEnd() throws InterruptedException { final Logger logger = mock(Logger.class); - final SearchPhaseContext searchPhaseContext = mock(SearchPhaseContext.class); final SearchRequest searchRequest = mock(SearchRequest.class); final SearchTask searchTask = mock(SearchTask.class); @@ -161,8 +159,6 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService, logger); final List searchListenersList = new ArrayList<>(List.of(searchRequestSlowLog)); - when(searchPhaseContext.getRequest()).thenReturn(searchRequest); - when(searchPhaseContext.getTask()).thenReturn(searchTask); when(searchRequest.searchType()).thenReturn(SearchType.QUERY_THEN_FETCH); int numRequests = 50; @@ -179,6 +175,7 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger), searchRequest ); + searchRequestContext.setSearchTask(searchTask); searchRequestContext.setAbsoluteStartNanos((i < numRequestsLogged) ? 0 : System.nanoTime()); searchRequestContexts.add(searchRequestContext); } @@ -188,7 +185,7 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { threads[i] = new Thread(() -> { phaser.arriveAndAwaitAdvance(); SearchRequestContext thisContext = searchRequestContexts.get(finalI); - thisContext.getSearchRequestOperationsListener().onRequestEnd(searchPhaseContext, thisContext); + thisContext.getSearchRequestOperationsListener().onRequestEnd(thisContext); countDownLatch.countDown(); }); threads[i].start(); @@ -210,11 +207,7 @@ public void testSearchRequestSlowLogHasJsonFields_EmptySearchRequestContext() th new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), searchRequest ); - SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( - searchPhaseContext, - 10, - searchRequestContext - ); + SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage(10, searchRequestContext); assertThat(p.getValueFor("took"), equalTo("10nanos")); assertThat(p.getValueFor("took_millis"), equalTo("0")); @@ -223,7 +216,7 @@ public void testSearchRequestSlowLogHasJsonFields_EmptySearchRequestContext() th assertThat(p.getValueFor("search_type"), equalTo("QUERY_THEN_FETCH")); assertThat(p.getValueFor("shards"), equalTo("")); assertThat(p.getValueFor("source"), equalTo("{\\\"query\\\":{\\\"match_all\\\":{\\\"boost\\\":1.0}}}")); - assertThat(p.getValueFor("id"), equalTo(null)); + assertThat(p.getValueFor("id"), equalTo("")); } public void testSearchRequestSlowLogHasJsonFields_NotEmptySearchRequestContext() throws IOException { @@ -239,11 +232,7 @@ public void testSearchRequestSlowLogHasJsonFields_NotEmptySearchRequestContext() searchRequestContext.updatePhaseTookMap(SearchPhaseName.EXPAND.getName(), 5L); searchRequestContext.setTotalHits(new TotalHits(3L, TotalHits.Relation.EQUAL_TO)); searchRequestContext.setShardStats(10, 8, 1, 1); - SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( - searchPhaseContext, - 10, - searchRequestContext - ); + SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage(10, searchRequestContext); assertThat(p.getValueFor("took"), equalTo("10nanos")); assertThat(p.getValueFor("took_millis"), equalTo("0")); @@ -252,7 +241,7 @@ public void testSearchRequestSlowLogHasJsonFields_NotEmptySearchRequestContext() assertThat(p.getValueFor("search_type"), equalTo("QUERY_THEN_FETCH")); assertThat(p.getValueFor("shards"), equalTo("{total:10, successful:8, skipped:1, failed:1}")); assertThat(p.getValueFor("source"), equalTo("{\\\"query\\\":{\\\"match_all\\\":{\\\"boost\\\":1.0}}}")); - assertThat(p.getValueFor("id"), equalTo(null)); + assertThat(p.getValueFor("id"), equalTo("")); } public void testSearchRequestSlowLogHasJsonFields_PartialContext() throws IOException { @@ -269,7 +258,6 @@ public void testSearchRequestSlowLogHasJsonFields_PartialContext() throws IOExce searchRequestContext.setTotalHits(new TotalHits(3L, TotalHits.Relation.EQUAL_TO)); searchRequestContext.setShardStats(5, 3, 1, 1); SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( - searchPhaseContext, 10000000000L, searchRequestContext ); @@ -281,7 +269,7 @@ public void testSearchRequestSlowLogHasJsonFields_PartialContext() throws IOExce assertThat(p.getValueFor("search_type"), equalTo("QUERY_THEN_FETCH")); assertThat(p.getValueFor("shards"), equalTo("{total:5, successful:3, skipped:1, failed:1}")); assertThat(p.getValueFor("source"), equalTo("{\\\"query\\\":{\\\"match_all\\\":{\\\"boost\\\":1.0}}}")); - assertThat(p.getValueFor("id"), equalTo(null)); + assertThat(p.getValueFor("id"), equalTo("")); } public void testSearchRequestSlowLogSearchContextPrinterToLog() throws IOException { @@ -298,7 +286,6 @@ public void testSearchRequestSlowLogSearchContextPrinterToLog() throws IOExcepti searchRequestContext.setTotalHits(new TotalHits(3L, TotalHits.Relation.EQUAL_TO)); searchRequestContext.setShardStats(10, 8, 1, 1); SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( - searchPhaseContext, 100000, searchRequestContext ); diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java index 377ccebbfd418..87598ad1e8271 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java @@ -34,9 +34,9 @@ public void testSearchRequestPhaseFailure() { for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { when(mockSearchPhase.getSearchPhaseName()).thenReturn(searchPhaseName); - testRequestStats.onPhaseStart(ctx); + testRequestStats.onPhaseStart(ctx, new SearchRequestContext()); assertEquals(1, testRequestStats.getPhaseCurrent(searchPhaseName)); - testRequestStats.onPhaseFailure(ctx); + testRequestStats.onPhaseFailure(ctx, new SearchRequestContext()); assertEquals(0, testRequestStats.getPhaseCurrent(searchPhaseName)); } } @@ -52,7 +52,7 @@ public void testSearchRequestStats() { for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { when(mockSearchPhase.getSearchPhaseName()).thenReturn(searchPhaseName); long tookTimeInMillis = randomIntBetween(1, 10); - testRequestStats.onPhaseStart(ctx); + testRequestStats.onPhaseStart(ctx, new SearchRequestContext()); long startTime = System.nanoTime() - TimeUnit.MILLISECONDS.toNanos(tookTimeInMillis); when(mockSearchPhase.getStartTimeInNanos()).thenReturn(startTime); assertEquals(1, testRequestStats.getPhaseCurrent(searchPhaseName)); @@ -84,7 +84,7 @@ public void testSearchRequestStatsOnPhaseStartConcurrently() throws InterruptedE for (int i = 0; i < numTasks; i++) { threads[i] = new Thread(() -> { phaser.arriveAndAwaitAdvance(); - testRequestStats.onPhaseStart(ctx); + testRequestStats.onPhaseStart(ctx, new SearchRequestContext()); countDownLatch.countDown(); }); threads[i].start(); @@ -155,8 +155,8 @@ public void testSearchRequestStatsOnPhaseFailureConcurrently() throws Interrupte for (int i = 0; i < numTasks; i++) { threads[i] = new Thread(() -> { phaser.arriveAndAwaitAdvance(); - testRequestStats.onPhaseStart(ctx); - testRequestStats.onPhaseFailure(ctx); + testRequestStats.onPhaseStart(ctx, new SearchRequestContext()); + testRequestStats.onPhaseFailure(ctx, new SearchRequestContext()); countDownLatch.countDown(); }); threads[i].start(); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 9bb1f51c51cf6..f097b33f54447 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2313,7 +2313,8 @@ public void onFailure(final Exception e) { client ), NoopMetricsRegistry.INSTANCE, - searchRequestOperationsCompositeListenerFactory + searchRequestOperationsCompositeListenerFactory, + NoopTracer.INSTANCE ) ); actions.put(