From d38507694f4f72453622602a55539b9def00cefe Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 12 Sep 2024 07:28:31 -0500 Subject: [PATCH] feat(search): search query rewriter (#11279) --- .../src/main/resources/search.graphql | 15 + .../metadata/aspect/GraphRetriever.java | 57 +++ .../elasticsearch/query/ESBrowseDAO.java | 16 +- .../elasticsearch/query/ESSearchDAO.java | 37 +- .../query/filter/BaseQueryFilterRewriter.java | 239 +++++++++++ .../filter/ContainerExpansionRewriter.java | 85 ++++ .../query/filter/DomainExpansionRewriter.java | 85 ++++ .../query/filter/QueryFilterRewriteChain.java | 32 ++ .../query/filter/QueryFilterRewriter.java | 35 ++ .../filter/QueryFilterRewriterContext.java | 50 +++ .../filter/QueryFilterRewriterSearchType.java | 9 + .../request/AutocompleteRequestHandler.java | 16 +- .../query/request/SearchRequestHandler.java | 33 +- .../metadata/search/utils/ESUtils.java | 85 +++- .../ElasticSearchTimeseriesAspectService.java | 32 +- .../elastic/query/ESAggregatedStatsDAO.java | 10 +- .../search/LineageServiceTestBase.java | 10 +- .../search/SearchServiceTestBase.java | 9 +- .../metadata/search/TestEntityTestBase.java | 9 +- .../indexbuilder/MappingsBuilderTest.java | 2 +- .../metadata/search/query/BrowseDAOTest.java | 8 +- .../search/query/SearchDAOTestBase.java | 10 +- .../ContainerExpansionRewriterTest.java | 365 +++++++++++++++++ .../filter/DomainExpansionRewriterTest.java | 370 ++++++++++++++++++ .../AutocompleteRequestHandlerTest.java | 23 +- .../request/SearchRequestHandlerTest.java | 29 +- .../query/request/TestSearchFieldConfig.java | 3 +- .../SearchDocumentTransformerTest.java | 2 +- .../metadata/search/utils/ESUtilsTest.java | 120 +++++- .../TimeseriesAspectServiceTestBase.java | 4 +- .../TimeseriesAspectServiceUnitTest.java | 7 +- .../SampleDataFixtureConfiguration.java | 12 +- .../SearchLineageFixtureConfiguration.java | 10 +- .../config/SearchCommonTestConfiguration.java | 6 + .../MCLSpringCommonTestConfiguration.java | 3 +- .../linkedin/metadata/query/SearchFlags.pdl | 5 + .../metadata/query/filter/Condition.pdl | 15 + .../config/DataHubAppConfiguration.java | 4 + .../QueryFilterRewriterConfiguration.java | 24 ++ .../search/SearchServiceConfiguration.java | 8 + .../src/main/resources/application.yaml | 9 + .../search/ElasticSearchServiceFactory.java | 13 +- .../QueryFilterRewriterChainFactory.java | 51 +++ ...cSearchTimeseriesAspectServiceFactory.java | 7 +- ...linkedin.analytics.analytics.snapshot.json | 5 +- .../com.linkedin.entity.aspects.snapshot.json | 5 +- ...com.linkedin.entity.entities.snapshot.json | 17 +- ...m.linkedin.platform.platform.snapshot.json | 6 +- .../metadata/search/utils/QueryUtils.java | 21 + .../gms/servlet/ConfigSearchExport.java | 15 +- 50 files changed, 1917 insertions(+), 126 deletions(-) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java rename metadata-io/src/test/java/com/linkedin/metadata/search/{elasticsearch => }/query/request/TestSearchFieldConfig.java (94%) create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index ed214ebe6632d..9c4375e70d9ba 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -550,6 +550,21 @@ enum FilterOperator { Represent the relation less than or equal to, e.g. ownerCount <= 3 """ LESS_THAN_OR_EQUAL_TO + + """ + Represent the relation: URN field any nested children in addition to the given URN + """ + DESCENDANTS_INCL + + """ + Represent the relation: URN field matches any nested parent in addition to the given URN + """ + ANCESTORS_INCL + + """ + Represent the relation: URN field matches any nested child or parent in addition to the given URN + """ + RELATED_INCL } """ diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java index cedaac25ffee9..f6858e7da4ba6 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; import java.util.List; +import java.util.function.Function; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -40,4 +41,60 @@ RelatedEntitiesScrollResult scrollRelatedEntities( int count, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis); + + /** + * Consume graph edges + * + * @param consumer consumer function, return true to exit early + * @param sourceTypes + * @param sourceEntityFilter + * @param destinationTypes + * @param destinationEntityFilter + * @param relationshipTypes + * @param relationshipFilter + * @param sortCriteria + * @param count + * @param startTimeMillis + * @param endTimeMillis + */ + default void consumeRelatedEntities( + @Nonnull Function consumer, + @Nullable List sourceTypes, + @Nonnull Filter sourceEntityFilter, + @Nullable List destinationTypes, + @Nonnull Filter destinationEntityFilter, + @Nonnull List relationshipTypes, + @Nonnull RelationshipFilter relationshipFilter, + @Nonnull List sortCriteria, + int count, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { + + String scrollId = null; + boolean exitCriteria = false; + + while (!exitCriteria) { + RelatedEntitiesScrollResult result = + scrollRelatedEntities( + sourceTypes, + sourceEntityFilter, + destinationTypes, + destinationEntityFilter, + relationshipTypes, + relationshipFilter, + sortCriteria, + scrollId, + count, + startTimeMillis, + endTimeMillis); + + exitCriteria = consumer.apply(result); + + if (result == null || result.getEntities().isEmpty() || result.getScrollId() == null) { + exitCriteria = true; + } else { + scrollId = result.getScrollId(); + } + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index b55418d12c7c2..f18fd0d1e2605 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -23,6 +23,7 @@ import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.search.utils.SearchUtils; @@ -69,6 +70,7 @@ public class ESBrowseDAO { private final RestHighLevelClient client; @Nonnull private final SearchConfiguration searchConfiguration; @Nullable private final CustomSearchConfiguration customSearchConfiguration; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; private static final String BROWSE_PATH = "browsePaths"; private static final String BROWSE_PATH_DEPTH = "browsePaths.length"; @@ -607,7 +609,8 @@ private QueryBuilder buildQueryStringV2( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getQuery( finalOpContext, input, @@ -623,7 +626,7 @@ private QueryBuilder buildQueryStringV2( queryBuilder.filter( SearchRequestHandler.getFilterQuery( - finalOpContext, filter, entitySpec.getSearchableFieldTypes())); + finalOpContext, filter, entitySpec.getSearchableFieldTypes(), queryFilterRewriteChain)); return queryBuilder; } @@ -643,7 +646,11 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getQuery( finalOpContext, input, @@ -669,7 +676,8 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( return set1; })); queryBuilder.filter( - SearchRequestHandler.getFilterQuery(finalOpContext, filter, searchableFields)); + SearchRequestHandler.getFilterQuery( + finalOpContext, filter, searchableFields, queryFilterRewriteChain)); return queryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index cb342794aff58..d6329ba75d428 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; @@ -76,6 +77,7 @@ public class ESSearchDAO { private final String elasticSearchImplementation; @Nonnull private final SearchConfiguration searchConfiguration; @Nullable private final CustomSearchConfiguration customSearchConfiguration; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; public long docCount(@Nonnull OperationContext opContext, @Nonnull String entityName) { return docCount(opContext, entityName, null); @@ -88,7 +90,10 @@ public long docCount( new CountRequest(opContext.getSearchContext().getIndexConvention().getIndexName(entitySpec)) .query( SearchRequestHandler.getFilterQuery( - opContext, filter, entitySpec.getSearchableFieldTypes())); + opContext, + filter, + entitySpec.getSearchableFieldTypes(), + queryFilterRewriteChain)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { return client.count(countRequest, RequestOptions.DEFAULT).getCount(); } catch (IOException e) { @@ -115,7 +120,10 @@ private SearchResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpec, searchConfiguration, customSearchConfiguration) + entitySpec, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .extractResult(opContext, searchResponse, filter, from, size)); } catch (Exception e) { log.error("Search query failed", e); @@ -212,7 +220,10 @@ private ScrollResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .extractScrollResult( opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); } catch (Exception e) { @@ -255,7 +266,11 @@ public SearchResult search( Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getSearchRequest( opContext, finalInput, transformedFilters, sortCriteria, from, size, facets); searchRequest.indices( @@ -288,7 +303,8 @@ public SearchResult filter( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getFilterRequest(opContext, transformedFilters, sortCriteria, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); @@ -321,7 +337,8 @@ public AutoCompleteResult autoComplete( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = - AutocompleteRequestHandler.getBuilder(entitySpec, customSearchConfiguration); + AutocompleteRequestHandler.getBuilder( + entitySpec, customSearchConfiguration, queryFilterRewriteChain); SearchRequest req = builder.getSearchRequest( opContext, @@ -366,7 +383,11 @@ public Map aggregateByValue( } IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getAggregationRequest( opContext, field, @@ -481,7 +502,7 @@ private SearchRequest getScrollRequest( } return SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getSearchRequest( opContext, finalInput, postFilters, sortCriteria, sort, pitId, keepAlive, size, facets); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java new file mode 100644 index 0000000000000..800d59bacc1d8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java @@ -0,0 +1,239 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; + +@Slf4j +public abstract class BaseQueryFilterRewriter implements QueryFilterRewriter { + + protected T expandUrnsByGraph( + @Nonnull OperationContext opContext, + T queryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + + if (matchTermsQueryFieldName(queryBuilder, getRewriterFieldNames())) { + return (T) + expandTerms( + opContext, + (TermsQueryBuilder) queryBuilder, + relationshipTypes, + relationshipDirection, + pageSize, + limit); + } else if (queryBuilder instanceof BoolQueryBuilder) { + return (T) + handleNestedFilters( + opContext, + (BoolQueryBuilder) queryBuilder, + relationshipTypes, + relationshipDirection, + pageSize, + limit); + } + return queryBuilder; + } + + /** + * The assumption here is that the input query builder is part of the `filter` of a parent query + * builder + * + * @param boolQueryBuilder bool query builder that is part of a filter + * @return terms query builders needing exp + */ + private BoolQueryBuilder handleNestedFilters( + OperationContext opContext, + BoolQueryBuilder boolQueryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + + List filterQueryBuilders = + boolQueryBuilder.filter().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List shouldQueryBuilders = + boolQueryBuilder.should().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List mustQueryBuilders = + boolQueryBuilder.must().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List mustNotQueryBuilders = + boolQueryBuilder.mustNot().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + + BoolQueryBuilder expandedQueryBuilder = QueryBuilders.boolQuery(); + filterQueryBuilders.forEach(expandedQueryBuilder::filter); + shouldQueryBuilders.forEach(expandedQueryBuilder::should); + mustQueryBuilders.forEach(expandedQueryBuilder::must); + mustNotQueryBuilders.forEach(expandedQueryBuilder::mustNot); + expandedQueryBuilder.queryName(boolQueryBuilder.queryName()); + expandedQueryBuilder.adjustPureNegative(boolQueryBuilder.adjustPureNegative()); + expandedQueryBuilder.minimumShouldMatch(boolQueryBuilder.minimumShouldMatch()); + expandedQueryBuilder.boost(boolQueryBuilder.boost()); + + return expandedQueryBuilder; + } + + /** + * Expand URNs by graph walk + * + * @param opContext context + * @param termsQueryBuilder initial terms query builder + * @param relationshipTypes relationship to walk + * @param relationshipDirection direction to walk + * @param pageSize pagination size + * @param limit max results + * @return updated query builder with expanded terms + */ + private static QueryBuilder expandTerms( + OperationContext opContext, + TermsQueryBuilder termsQueryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + Set queryUrns = + termsQueryBuilder.values().stream() + .map(urnObj -> UrnUtils.getUrn(urnObj.toString())) + .collect(Collectors.toSet()); + Set expandedUrns = new HashSet<>(queryUrns); + + if (!queryUrns.isEmpty()) { + + scrollGraph( + opContext.getRetrieverContext().get().getGraphRetriever(), + queryUrns, + relationshipTypes, + relationshipDirection, + expandedUrns, + pageSize, + limit); + + return expandTermsQueryUrnValues(termsQueryBuilder, expandedUrns); + } + + return termsQueryBuilder; + } + + private static boolean matchTermsQueryFieldName( + QueryBuilder queryBuilder, Set fieldNames) { + if (queryBuilder instanceof TermsQueryBuilder) { + return fieldNames.stream() + .anyMatch(fieldName -> fieldName.equals(((TermsQueryBuilder) queryBuilder).fieldName())); + } + return false; + } + + private static TermsQueryBuilder expandTermsQueryUrnValues( + TermsQueryBuilder termsQueryBuilder, Set values) { + return QueryBuilders.termsQuery( + termsQueryBuilder.fieldName(), values.stream().map(Urn::toString).sorted().toArray()) + .queryName(termsQueryBuilder.queryName()) + .boost(termsQueryBuilder.boost()); + } + + private static void scrollGraph( + @Nonnull GraphRetriever graphRetriever, + @Nonnull Set queryUrns, + List relationshipTypes, + RelationshipDirection relationshipDirection, + @Nonnull Set visitedUrns, + int pageSize, + int limit) { + + List entityTypes = + queryUrns.stream().map(Urn::getEntityType).distinct().collect(Collectors.toList()); + List queryUrnStrs = queryUrns.stream().map(Urn::toString).collect(Collectors.toList()); + + Set nextUrns = new HashSet<>(); + + Supplier earlyExitCriteria = + () -> (queryUrns.size() + visitedUrns.size() + nextUrns.size()) >= limit; + + Function consumer = + result -> { + if (result != null) { + // track next hop urns + nextUrns.addAll( + result.getEntities().stream() + .map(e -> UrnUtils.getUrn(e.asRelatedEntity().getUrn())) + .filter(urn -> !visitedUrns.contains(urn)) + .collect(Collectors.toSet())); + } + + // exit early if we have enough + return earlyExitCriteria.get(); + }; + + graphRetriever.consumeRelatedEntities( + consumer, + entityTypes, + QueryUtils.newDisjunctiveFilter(newCriterion("urn", queryUrnStrs)), + entityTypes, + EMPTY_FILTER, + relationshipTypes, + newRelationshipFilter(EMPTY_FILTER, relationshipDirection), + Edge.EDGE_SORT_CRITERION, + pageSize, + null, + null); + + // mark visited + visitedUrns.addAll(queryUrns); + + if (earlyExitCriteria.get()) { + visitedUrns.addAll(nextUrns); + } else if (!nextUrns.isEmpty()) { + // next hop + scrollGraph( + graphRetriever, + nextUrns, + relationshipTypes, + relationshipDirection, + visitedUrns, + pageSize, + limit); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java new file mode 100644 index 0000000000000..ca2b67ad32f64 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java @@ -0,0 +1,85 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; + +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +public class ContainerExpansionRewriter extends BaseQueryFilterRewriter { + + @Getter + private final Set rewriterSearchTypes = + Set.of(AUTOCOMPLETE, FULLTEXT_SEARCH, STRUCTURED_SEARCH); + + @Builder.Default private Condition defaultCondition = Condition.DESCENDANTS_INCL; + + @Nonnull private final QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration config; + + @Nonnull + @Override + public Set getRewriterFieldNames() { + return Set.of("container.keyword"); + } + + @Override + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + + if (filterQuery != null && isQueryTimeEnabled(rewriterContext)) { + switch (rewriterContext.getCondition() == null + ? defaultCondition + : rewriterContext.getCondition()) { + case DESCENDANTS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + case ANCESTORS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + default: + // UNDIRECTED doesn't work at the graph service layer + // RelationshipDirection.UNDIRECTED; + T descendantQuery = + expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + return expandUrnsByGraph( + opContext, + descendantQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + } + } + + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java new file mode 100644 index 0000000000000..fbe8337d6e599 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java @@ -0,0 +1,85 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; + +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +public class DomainExpansionRewriter extends BaseQueryFilterRewriter { + + @Getter + private final Set rewriterSearchTypes = + Set.of(AUTOCOMPLETE, FULLTEXT_SEARCH, STRUCTURED_SEARCH); + + @Builder.Default private Condition defaultCondition = Condition.DESCENDANTS_INCL; + + @Nonnull private final QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration config; + + @Nonnull + @Override + public Set getRewriterFieldNames() { + return Set.of("domains.keyword"); + } + + @Override + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + + if (filterQuery != null && isQueryTimeEnabled(rewriterContext)) { + switch (rewriterContext.getCondition() == null + ? defaultCondition + : rewriterContext.getCondition()) { + case DESCENDANTS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + case ANCESTORS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + default: + // UNDIRECTED doesn't work at the graph service layer + // RelationshipDirection.UNDIRECTED; + T descendantQuery = + expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + return expandUrnsByGraph( + opContext, + descendantQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + } + } + + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java new file mode 100644 index 0000000000000..48fc5c0625e33 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java @@ -0,0 +1,32 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import io.datahubproject.metadata.context.OperationContext; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.opensearch.index.query.QueryBuilder; + +public class QueryFilterRewriteChain { + public static final QueryFilterRewriteChain EMPTY = new QueryFilterRewriteChain(List.of()); + private final List filterRewriters; + + public static QueryFilterRewriteChain of(@Nonnull QueryFilterRewriter... filters) { + return new QueryFilterRewriteChain(Arrays.stream(filters).collect(Collectors.toList())); + } + + public QueryFilterRewriteChain(List filterRewriters) { + this.filterRewriters = filterRewriters; + } + + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + for (QueryFilterRewriter queryFilterRewriter : filterRewriters) { + filterQuery = queryFilterRewriter.rewrite(opContext, rewriterContext, filterQuery); + } + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java new file mode 100644 index 0000000000000..95d4cb2887624 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import com.linkedin.metadata.query.SearchFlags; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.opensearch.index.query.QueryBuilder; + +public interface QueryFilterRewriter { + + T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery); + + @Nonnull + Set getRewriterFieldNames(); + + @Nonnull + Set getRewriterSearchTypes(); + + default boolean isQueryTimeEnabled( + @Nonnull QueryFilterRewriterContext queryFilterRewriterContext) { + return isQueryTimeEnabled( + queryFilterRewriterContext.getSearchType(), queryFilterRewriterContext.getSearchFlags()); + } + + default boolean isQueryTimeEnabled( + @Nonnull QueryFilterRewriterSearchType rewriteSearchType, @Nullable SearchFlags searchFlags) { + return getRewriterSearchTypes().contains(rewriteSearchType) && searchFlags == null + || searchFlags.isRewriteQuery() == null + || Boolean.TRUE.equals(searchFlags.isRewriteQuery()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java new file mode 100644 index 0000000000000..274b97f01b29f --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java @@ -0,0 +1,50 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.TIMESERIES; + +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +@Getter +public class QueryFilterRewriterContext { + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; + @Nonnull private final QueryFilterRewriterSearchType searchType; + @Nullable private final Condition condition; + @Nullable private final SearchFlags searchFlags; + + public T rewrite( + @Nonnull OperationContext opContext, @Nullable T filterQuery) { + return queryFilterRewriteChain.rewrite(opContext, this, filterQuery); + } + + public static class QueryFilterRewriterContextBuilder { + private QueryFilterRewriterContext build() { + return null; + } + + public QueryFilterRewriterContext build(boolean isTimeseries) { + if (this.searchType == null) { + if (isTimeseries) { + this.searchType = TIMESERIES; + } else if (this.searchFlags != null) { + this.searchType = this.searchFlags.isFulltext() ? FULLTEXT_SEARCH : STRUCTURED_SEARCH; + } else { + this.searchType = AUTOCOMPLETE; + } + } + + return new QueryFilterRewriterContext( + this.queryFilterRewriteChain, this.searchType, this.condition, this.searchFlags); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java new file mode 100644 index 0000000000000..5cef4c8371d8d --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +public enum QueryFilterRewriterSearchType { + STRUCTURED_SEARCH, + FULLTEXT_SEARCH, + AUTOCOMPLETE, + TIMESERIES, + PREDICATE +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 8ee9587ca2ae4..0bca269c14dba 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.query.AutoCompleteEntityArray; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.utils.ESUtils; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; @@ -54,10 +55,12 @@ public class AutocompleteRequestHandler { private final CustomizedQueryHandler customizedQueryHandler; private final EntitySpec entitySpec; + private final QueryFilterRewriteChain queryFilterRewriteChain; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -83,13 +86,18 @@ public AutocompleteRequestHandler( set1.addAll(set2); return set1; })); + this.queryFilterRewriteChain = queryFilterRewriteChain; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, k -> new AutocompleteRequestHandler(entitySpec, customSearchConfiguration)); + entitySpec, + k -> + new AutocompleteRequestHandler( + entitySpec, customSearchConfiguration, queryFilterRewriteChain)); } public SearchRequest getSearchRequest( @@ -113,7 +121,7 @@ public SearchRequest getSearchRequest( // Initial query with input filters BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, false, searchableFieldTypes, opContext.getAspectRetriever()); + filter, false, searchableFieldTypes, opContext, queryFilterRewriteChain); baseQuery.filter(filterQuery); // Add autocomplete query diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 163bae64169d5..748d21c985d00 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.search.SearchResultMetadata; import com.linkedin.metadata.search.SearchSuggestion; import com.linkedin.metadata.search.SearchSuggestionArray; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.features.Features; import com.linkedin.metadata.search.utils.ESAccessControlUtil; import com.linkedin.metadata.search.utils.ESUtils; @@ -78,17 +79,21 @@ public class SearchRequestHandler { private final AggregationQueryBuilder aggregationQueryBuilder; private final Map> searchableFieldTypes; + private final QueryFilterRewriteChain queryFilterRewriteChain; + private SearchRequestHandler( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { - this(ImmutableList.of(entitySpec), configs, customSearchConfiguration); + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + this(ImmutableList.of(entitySpec), configs, customSearchConfiguration, queryFilterRewriteChain); } private SearchRequestHandler( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.entitySpecs = entitySpecs; Map> entitySearchAnnotations = getSearchableAnnotations(); @@ -112,24 +117,31 @@ private SearchRequestHandler( set1.addAll(set2); return set1; })); + this.queryFilterRewriteChain = queryFilterRewriteChain; } public static SearchRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.of(entitySpec), - k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpec, configs, customSearchConfiguration, queryFilterRewriteChain)); } public static SearchRequestHandler getBuilder( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.copyOf(entitySpecs), - k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpecs, configs, customSearchConfiguration, queryFilterRewriteChain)); } private Map> getSearchableAnnotations() { @@ -156,16 +168,17 @@ private Set getDefaultQueryFieldNames(List annotat public BoolQueryBuilder getFilterQuery( @Nonnull OperationContext opContext, @Nullable Filter filter) { - return getFilterQuery(opContext, filter, searchableFieldTypes); + return getFilterQuery(opContext, filter, searchableFieldTypes, queryFilterRewriteChain); } public static BoolQueryBuilder getFilterQuery( @Nonnull OperationContext opContext, @Nullable Filter filter, - Map> searchableFieldTypes) { + Map> searchableFieldTypes, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, false, searchableFieldTypes, opContext.getAspectRetriever()); + filter, false, searchableFieldTypes, opContext, queryFilterRewriteChain); return applyDefaultSearchFilters(opContext, filter, filterQuery); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 6c9b339af42c7..f9ca0760aaf66 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,9 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; +import static com.linkedin.metadata.query.filter.Condition.ANCESTORS_INCL; +import static com.linkedin.metadata.query.filter.Condition.DESCENDANTS_INCL; +import static com.linkedin.metadata.query.filter.Condition.RELATED_INCL; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; @@ -19,6 +22,8 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; import io.datahubproject.metadata.context.OperationContext; import java.util.Arrays; import java.util.Collections; @@ -140,13 +145,14 @@ public static BoolQueryBuilder buildFilterQuery( @Nullable Filter filter, boolean isTimeseries, final Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; } - StructuredPropertyUtils.validateFilter(filter, aspectRetriever); + StructuredPropertyUtils.validateFilter(filter, opContext.getAspectRetriever()); if (filter.getOr() != null) { // If caller is using the new Filters API, build boolean query from that. @@ -156,7 +162,11 @@ public static BoolQueryBuilder buildFilterQuery( or -> finalQueryBuilder.should( ESUtils.buildConjunctiveFilterQuery( - or, isTimeseries, searchableFieldTypes, aspectRetriever))); + or, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain))); // The default is not always 1 (ensure consistent default) finalQueryBuilder.minimumShouldMatch(1); } else if (filter.getCriteria() != null) { @@ -172,7 +182,11 @@ public static BoolQueryBuilder buildFilterQuery( || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -187,7 +201,8 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries, Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -200,11 +215,19 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( // `filter` instead of `must` (enables caching and bypasses scoring) andQueryBuilder.filter( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } else { andQueryBuilder.mustNot( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } } }); @@ -243,8 +266,9 @@ public static QueryBuilder getQueryBuilderFromCriterion( @Nonnull final Criterion criterion, boolean isTimeseries, final Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { - final String fieldName = toParentField(criterion.getField(), aspectRetriever); + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + final String fieldName = toParentField(criterion.getField(), opContext.getAspectRetriever()); /* * Check the field-name for a "sibling" field, or one which should ALWAYS @@ -259,11 +283,21 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes, aspectRetriever); + maybeFieldToExpand.get(), + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); } return getQueryBuilderFromCriterionForSingleField( - criterion, isTimeseries, searchableFieldTypes, criterion.getField(), aspectRetriever); + criterion, + isTimeseries, + searchableFieldTypes, + criterion.getField(), + opContext, + queryFilterRewriteChain); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -501,7 +535,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -513,10 +548,16 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( if (criterion.hasValue()) { criterionToQuery.setValue(criterion.getValue()); } - criterionToQuery.setField(toKeywordField(field, isTimeseries, aspectRetriever)); + criterionToQuery.setField( + toKeywordField(field, isTimeseries, opContext.getAspectRetriever())); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, isTimeseries, searchableFieldTypes, null, aspectRetriever) + criterionToQuery, + isTimeseries, + searchableFieldTypes, + null, + opContext, + queryFilterRewriteChain) .queryName(field)); } return orQueryBuilder; @@ -528,8 +569,10 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( boolean isTimeseries, final Map> searchableFieldTypes, @Nullable String queryName, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final Condition condition = criterion.getCondition(); + final AspectRetriever aspectRetriever = opContext.getAspectRetriever(); final String fieldName = toParentField(criterion.getField(), aspectRetriever); if (condition == Condition.IS_NULL) { @@ -563,6 +606,18 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( } else if (condition == Condition.END_WITH) { return buildEndsWithConditionFromCriterion( fieldName, criterion, queryName, isTimeseries, aspectRetriever); + } else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) { + + return QueryFilterRewriterContext.builder() + .queryFilterRewriteChain(queryFilterRewriteChain) + .condition(condition) + .searchFlags(opContext.getSearchContext().getSearchFlags()) + .build(isTimeseries) + .rewrite( + opContext, + buildEqualsConditionFromCriterion( + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)) + .queryName(queryName != null ? queryName : fieldName); } } throw new UnsupportedOperationException("Unsupported condition: " + condition); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 9b4d373d25d8f..cb364f41aa218 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.ESUtils; @@ -100,18 +101,21 @@ public class ElasticSearchTimeseriesAspectService private final TimeseriesAspectIndexBuilders indexBuilders; private final RestHighLevelClient searchClient; private final ESAggregatedStatsDAO esAggregatedStatsDAO; + private final QueryFilterRewriteChain queryFilterRewriteChain; public ElasticSearchTimeseriesAspectService( @Nonnull RestHighLevelClient searchClient, @Nonnull TimeseriesAspectIndexBuilders indexBuilders, @Nonnull ESBulkProcessor bulkProcessor, - int numRetries) { + int numRetries, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.indexBuilders = indexBuilders; this.searchClient = searchClient; this.bulkProcessor = bulkProcessor; this.numRetries = numRetries; + this.queryFilterRewriteChain = queryFilterRewriteChain; - esAggregatedStatsDAO = new ESAggregatedStatsDAO(searchClient); + esAggregatedStatsDAO = new ESAggregatedStatsDAO(searchClient, queryFilterRewriteChain); } private static EnvelopedAspect parseDocument(@Nonnull SearchHit doc) { @@ -298,7 +302,8 @@ public long countByFilter( .getEntityRegistry() .getEntitySpec(entityName) .getSearchableFieldTypes(), - opContext.getAspectRetriever())); + opContext, + queryFilterRewriteChain)); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -328,7 +333,7 @@ public List getAspectValues( QueryBuilders.boolQuery() .must( ESUtils.buildFilterQuery( - filter, true, searchableFieldTypes, opContext.getAspectRetriever())); + filter, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -340,7 +345,7 @@ public List getAspectValues( .setValue(startTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( - startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -350,7 +355,7 @@ public List getAspectValues( .setValue(endTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( - endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -435,7 +440,8 @@ public DeleteAspectValuesResult deleteAspectValues( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); final Optional result = bulkProcessor @@ -471,7 +477,8 @@ public String deleteAspectValuesAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -505,7 +512,8 @@ public String reindexAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -563,7 +571,7 @@ public TimeseriesScrollResult scrollAspects( QueryBuilders.boolQuery() .filter( ESUtils.buildFilterQuery( - filter, true, searchableFieldTypes, opContext.getAspectRetriever())); + filter, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -573,7 +581,7 @@ public TimeseriesScrollResult scrollAspects( .setValue(startTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( - startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -583,7 +591,7 @@ public TimeseriesScrollResult scrollAspects( .setValue(endTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( - endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } SearchResponse response = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 1bf96841e5fe1..0ad8bd6f4bc54 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; import com.linkedin.metadata.models.TimeseriesFieldSpec; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.timeseries.elastic.indexbuilder.MappingsBuilder; import com.linkedin.timeseries.AggregationSpec; @@ -61,9 +62,13 @@ public class ESAggregatedStatsDAO { ES_AGGREGATION_PREFIX + ES_MAX_AGGREGATION_PREFIX + ES_FIELD_TIMESTAMP; private static final int MAX_TERM_BUCKETS = 24 * 60; // minutes in a day. private final RestHighLevelClient searchClient; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; - public ESAggregatedStatsDAO(@Nonnull RestHighLevelClient searchClient) { + public ESAggregatedStatsDAO( + @Nonnull RestHighLevelClient searchClient, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.searchClient = searchClient; + this.queryFilterRewriteChain = queryFilterRewriteChain; } private static String toEsAggName(final String aggName) { @@ -375,7 +380,8 @@ public GenericTable getAggregatedStats( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); AspectSpec aspectSpec = getTimeseriesAspectSpec(opContext, entityName, aspectName); // Build and attach the grouping aggregations diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index eee0e0d0f2ec6..3cb7e8bd3fb1b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -56,6 +56,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SimpleRanker; @@ -206,9 +207,14 @@ private ElasticSearchService buildEntitySearchService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = - new ESBrowseDAO(searchClientSpy, getSearchConfiguration(), getCustomSearchConfiguration()); + new ESBrowseDAO( + searchClientSpy, + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(searchClientSpy, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 1cd0c9550a0fc..45bc8548706bb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SimpleRanker; @@ -131,10 +132,14 @@ private ElasticSearchService buildEntitySearchService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), getSearchConfiguration(), getCustomSearchConfiguration()); + getSearchClient(), + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index b04c7d2bc60b9..56708d7ca814e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; @@ -94,10 +95,14 @@ private ElasticSearchService buildService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), getSearchConfiguration(), getCustomSearchConfiguration()); + getSearchClient(), + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); ElasticSearchService searchService = new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 75da2bc62aaad..ea9658e9c585e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -15,7 +15,7 @@ import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder; -import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; +import com.linkedin.metadata.search.query.request.TestSearchFieldConfig; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; import java.io.Serializable; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index b71e4ddc54a78..9c3d515f9322f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; @@ -50,7 +51,12 @@ public void setup() throws RemoteInvocationException, URISyntaxException { .prefix("es_browse_dao_test") .hashIdAlgo("MD5") .build())); - browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration); + browseDAO = + new ESBrowseDAO( + mockClient, + searchConfiguration, + customSearchConfiguration, + QueryFilterRewriteChain.EMPTY); } public static Urn makeUrn(Object id) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index 4b9e5da82bcd0..e0258f0593339 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.search.SearchResultMetadata; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.opensearch.SearchDAOOpenSearchTest; import com.linkedin.metadata.utils.SearchUtil; import io.datahubproject.metadata.context.OperationContext; @@ -233,7 +234,8 @@ public void testTransformIndexIntoEntityNameSingle() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); // Empty aggregations final SearchResultMetadata searchResultMetadata = new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); @@ -323,7 +325,8 @@ public void testTransformIndexIntoEntityNameNested() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); // One nested facet Map entityTypeMap = Map.of( @@ -461,7 +464,8 @@ public void testExplain() { ? ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH : ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ExplainResponse explainResponse = searchDAO.explain( getOperationContext() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java new file mode 100644 index 0000000000000..2c49567d49ea7 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java @@ -0,0 +1,365 @@ +package com.linkedin.metadata.search.query.filter; + +import static com.linkedin.metadata.Constants.CONTAINER_ENTITY_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.elasticsearch.query.filter.ContainerExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class ContainerExpansionRewriterTest { + private static final String FIELD_NAME = "container.keyword"; + private final String grandParentUrn = "urn:li:container:grand"; + private final String parentUrn = "urn:li:container:foo"; + private final String parentUrn2 = "urn:li:container:foo2"; + private final String childUrn = "urn:li:container:bar"; + private final String childUrn2 = "urn:li:container:bar2"; + + private OperationContext opContext; + private GraphRetriever mockGraphRetriever; + + @BeforeMethod + public void init() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + + mockGraphRetriever = spy(GraphRetriever.class); + RetrieverContext mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + + opContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> entityRegistry, + () -> + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(mockGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) + .build(), + null, + null); + } + + @Test + public void testTermsQueryRewrite() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config(QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration.DEFAULT) + .build(); + + TermsQueryBuilder notTheFieldQuery = QueryBuilders.termsQuery("notTheField", childUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + notTheFieldQuery), + notTheFieldQuery, + "Expected no rewrite due to non-applicable field"); + + TermsQueryBuilder disabledRewriteQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .searchFlags(new SearchFlags().setRewriteQuery(false)) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + disabledRewriteQuery), + disabledRewriteQuery, + "Expected no rewrite due to disabled rewrite searchFlags"); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + TermsQueryBuilder expectedRewrite = QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testTermsQueryRewritePagination() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + // Page 1 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + // Page 2 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn2, RelationshipDirection.OUTGOING, null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn, + grandParentUrn, + RelationshipDirection.OUTGOING, + null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn2, + grandParentUrn, + RelationshipDirection.OUTGOING, + null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + TermsQueryBuilder expectedRewrite = + QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn, parentUrn2, grandParentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testNestedBoolQueryRewrite() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested container + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + testQuery.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.filter(QueryBuilders.existsQuery("someField")); + testQuery.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.should(QueryBuilders.existsQuery("someField")); + testQuery.must( + QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.must(QueryBuilders.existsQuery("someField")); + testQuery.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.mustNot(QueryBuilders.existsQuery("someField")); + + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + expectedRewrite.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.filter(QueryBuilders.existsQuery("someField")); + expectedRewrite.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery() + .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.should(QueryBuilders.existsQuery("someField")); + expectedRewrite.must( + QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.must(QueryBuilders.existsQuery("someField")); + expectedRewrite.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.mustNot(QueryBuilders.existsQuery("someField")); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite of nested filters and pass through for non-container fields"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java new file mode 100644 index 0000000000000..8ee7dd3718ca9 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java @@ -0,0 +1,370 @@ +package com.linkedin.metadata.search.query.filter; + +import static com.linkedin.metadata.Constants.DOMAIN_ENTITY_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.elasticsearch.query.filter.DomainExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DomainExpansionRewriterTest { + private static final String FIELD_NAME = "domains.keyword"; + private final String grandParentUrn = "urn:li:domain:grand"; + private final String parentUrn = "urn:li:domain:foo"; + private final String parentUrn2 = "urn:li:domain:foo2"; + private final String childUrn = "urn:li:domain:bar"; + private final String childUrn2 = "urn:li:domain:bar2"; + + private OperationContext opContext; + private GraphRetriever mockGraphRetriever; + + @BeforeMethod + public void init() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + + mockGraphRetriever = spy(GraphRetriever.class); + RetrieverContext mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + + opContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> entityRegistry, + () -> + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(mockGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) + .build(), + null, + null); + } + + @Test + public void testTermsQueryRewrite() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config(QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration.DEFAULT) + .build(); + + TermsQueryBuilder notTheFieldQuery = QueryBuilders.termsQuery("notTheField", parentUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + notTheFieldQuery), + notTheFieldQuery, + "Expected no rewrite due to non-applicable field"); + + TermsQueryBuilder disabledRewriteQuery = QueryBuilders.termsQuery(FIELD_NAME, parentUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .searchFlags(new SearchFlags().setRewriteQuery(false)) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + disabledRewriteQuery), + disabledRewriteQuery, + "Expected no rewrite due to disabled rewrite searchFlags"); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, parentUrn); + TermsQueryBuilder expectedRewrite = QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testTermsQueryRewritePagination() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + // Page 1 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn, + grandParentUrn, + RelationshipDirection.INCOMING, + null)))); + + // Page 2 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn2, + grandParentUrn, + RelationshipDirection.INCOMING, + null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn2, parentUrn2, RelationshipDirection.INCOMING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, grandParentUrn); + TermsQueryBuilder expectedRewrite = + QueryBuilders.termsQuery( + FIELD_NAME, childUrn, childUrn2, parentUrn, parentUrn2, grandParentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testNestedBoolQueryRewrite() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + testQuery.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.filter(QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); + testQuery.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.should(QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + testQuery.must( + QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.must(QueryBuilders.boolQuery().must(QueryBuilders.existsQuery("someField"))); + testQuery.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.mustNot(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("someField"))); + + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + expectedRewrite.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.filter( + QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); + expectedRewrite.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery() + .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.should( + QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + expectedRewrite.must( + QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.must(QueryBuilders.boolQuery().must(QueryBuilders.existsQuery("someField"))); + expectedRewrite.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.mustNot( + QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("someField"))); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite of nested and pass through of other fields."); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 9376552f7abc5..572d79ebf2f0c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; @@ -33,7 +34,9 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), CustomSearchConfiguration.builder().build()); + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY); private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); @@ -170,7 +173,8 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -195,7 +199,8 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -237,7 +242,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -275,7 +281,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -337,7 +344,8 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -389,7 +397,8 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 2f7120e1f0b5e..7da0a14f21279 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; @@ -17,6 +18,7 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; @@ -94,7 +96,8 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { public void testDatasetFieldsAndHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler datasetHandler = - SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); + SearchRequestHandler.getBuilder( + entitySpec, testQueryConfig, null, QueryFilterRewriteChain.EMPTY); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -114,7 +117,11 @@ public void testDatasetFieldsAndHighlights() { public void testCustomHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + mock(QueryFilterRewriteChain.class)); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -139,7 +146,8 @@ public void testCustomHighlights() { @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -179,7 +187,8 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -242,7 +251,8 @@ public void testSearchRequestHandler() { @Test public void testAggregationsInSearch() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); final String nestedAggString = String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR); SearchRequest searchRequest = @@ -310,7 +320,8 @@ public void testAggregationsInSearch() { public void testFilteredSearch() { final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -666,7 +677,8 @@ public void testBrowsePathQueryFilter() { SearchRequestHandler.getFilterQuery( operationContext.withSearchFlags(flags -> flags.setFulltext(false)), filter, - new HashMap<>()); + new HashMap<>(), + QueryFilterRewriteChain.EMPTY); assertEquals(test.should().size(), 1); @@ -689,7 +701,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); return (BoolQueryBuilder) requestHandler diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java similarity index 94% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java index 062298796dd7c..61dcc5a9b4975 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java @@ -1,8 +1,9 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; import java.util.Optional; import java.util.Set; import org.junit.jupiter.api.Assertions; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index def14f9be7054..63550ca6c5dc5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -26,7 +26,7 @@ import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; +import com.linkedin.metadata.search.query.request.TestSearchFieldConfig; import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RetrieverContext; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index bbc494159a498..94241ec5e89b0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -15,8 +15,10 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.StructuredPropertyDefinition; +import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.HashMap; @@ -87,7 +89,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -108,7 +114,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"terms\" : {\n" @@ -130,7 +140,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"terms\" : {\n" @@ -152,7 +166,11 @@ public void testGetQueryBuilderFromCriterionContain() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -175,7 +193,11 @@ public void testGetQueryBuilderFromCriterionContain() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -218,7 +240,11 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -241,7 +267,11 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -281,7 +311,11 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -303,7 +337,11 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -343,7 +381,11 @@ public void testGetQueryBuilderFromCriterionExists() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -368,7 +410,11 @@ public void testGetQueryBuilderFromCriterionExists() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -395,7 +441,11 @@ public void testGetQueryBuilderFromCriterionIsNull() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -420,7 +470,11 @@ public void testGetQueryBuilderFromCriterionIsNull() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -453,7 +507,11 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded! QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -493,7 +551,11 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded without keyword. result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -535,9 +597,11 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); + OperationContext opContext = mock(OperationContext.class); + when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetriever); + singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -560,9 +624,15 @@ public void testGetQueryBuilderFromStructPropEqualsValueV1() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); + OperationContext opContextV1 = mock(OperationContext.class); + when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + singleValueCriterion, + false, + new HashMap<>(), + opContextV1, + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -581,9 +651,11 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + OperationContext opContext = mock(OperationContext.class); + when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetriever); + singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -608,7 +680,7 @@ public void testGetQueryBuilderFromStructPropExists() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), aspectRetriever); + timeseriesField, true, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -633,9 +705,15 @@ public void testGetQueryBuilderFromStructPropExistsV1() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + OperationContext opContextV1 = mock(OperationContext.class); + when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + singleValueCriterion, + false, + new HashMap<>(), + opContextV1, + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -660,7 +738,7 @@ public void testGetQueryBuilderFromStructPropExistsV1() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), aspectRetrieverV1); + timeseriesField, true, new HashMap<>(), opContextV1, QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 6c650e725fd5c..15597132289b2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -37,6 +37,7 @@ import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; @@ -148,7 +149,8 @@ private ElasticSearchTimeseriesAspectService buildService() { opContext.getEntityRegistry(), opContext.getSearchContext().getIndexConvention()), getBulkProcessor(), - 1); + 1, + QueryFilterRewriteChain.EMPTY); } /* diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java index b77902d34b2d7..db9d8b450ef7a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; @@ -39,7 +40,11 @@ public class TimeseriesAspectServiceUnitTest { private final RestClient _restClient = mock(RestClient.class); private final TimeseriesAspectService _timeseriesAspectService = new ElasticSearchTimeseriesAspectService( - _searchClient, _timeseriesAspectIndexBuilders, _bulkProcessor, 0); + _searchClient, + _timeseriesAspectIndexBuilders, + _bulkProcessor, + 0, + QueryFilterRewriteChain.EMPTY); private final OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization(_indexConvention); diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 20f6084f95329..781201f3478f9 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -28,6 +28,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SearchRanker; @@ -74,6 +75,8 @@ public class SampleDataFixtureConfiguration { @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired private QueryFilterRewriteChain queryFilterRewriteChain; + @Bean(name = "sampleDataPrefix") protected String sampleDataPrefix() { return "smpldat"; @@ -197,9 +200,14 @@ protected ElasticSearchService entitySearchServiceHelper(EntityIndexBuilders ind false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, - customSearchConfiguration); + customSearchConfiguration, + queryFilterRewriteChain); ESBrowseDAO browseDAO = - new ESBrowseDAO(_searchClient, _searchConfiguration, _customSearchConfiguration); + new ESBrowseDAO( + _searchClient, + _searchConfiguration, + _customSearchConfiguration, + queryFilterRewriteChain); ESWriteDAO writeDAO = new ESWriteDAO(_searchClient, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 71ccaa1d32501..918463ec59b36 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SearchRanker; @@ -132,16 +133,19 @@ protected EntityIndexBuilders entityIndexBuilders( @Bean(name = "searchLineageEntitySearchService") protected ElasticSearchService entitySearchService( - @Qualifier("searchLineageEntityIndexBuilders") EntityIndexBuilders indexBuilders) { + @Qualifier("searchLineageEntityIndexBuilders") EntityIndexBuilders indexBuilders, + final QueryFilterRewriteChain queryFilterRewriteChain) { ESSearchDAO searchDAO = new ESSearchDAO( searchClient, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, searchConfiguration, - null); + null, + queryFilterRewriteChain); ESBrowseDAO browseDAO = - new ESBrowseDAO(searchClient, searchConfiguration, customSearchConfiguration); + new ESBrowseDAO( + searchClient, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain); ESWriteDAO writeDAO = new ESWriteDAO(searchClient, bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java index 20fb8c3832504..547ab1d746dbe 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import org.springframework.boot.test.context.TestConfiguration; @@ -55,4 +56,9 @@ public CustomSearchConfiguration customSearchConfiguration() throws Exception { public OperationContext queryOperationContext() { return TestOperationContexts.systemContextNoSearchAuthorization(); } + + @Bean + public QueryFilterRewriteChain queryFilterRewriteChain() { + return QueryFilterRewriteChain.EMPTY; + } } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index 68768051eccad..b34bb5bd0e0a8 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -40,7 +40,8 @@ "com.linkedin.metadata.dao.producer", "com.linkedin.gms.factory.change", "com.datahub.event.hook", - "com.linkedin.gms.factory.notifications" + "com.linkedin.gms.factory.notifications", + "com.linkedin.gms.factory.search.filter" }) public class MCLSpringCommonTestConfiguration { diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index 0561a9c6f7374..a3d2067ae5db2 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -53,4 +53,9 @@ record SearchFlags { * Include mentioned fields inside elastic highlighting query */ customHighlightingFields:optional array[string] + + /** + * invoke query rewrite chain for filters based on configured rewriters + */ + rewriteQuery: optional boolean = true } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl index 0578fd6e7c5e7..a79055ea3db54 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl @@ -59,4 +59,19 @@ enum Condition { * Represent the relation: String field starts with value, e.g. name starts with PageView */ START_WITH + + /** + * Represent the relation: URN field any nested children in addition to the given URN + */ + DESCENDANTS_INCL + + /** + * Represent the relation: URN field matches any nested parent in addition to the given URN + */ + ANCESTORS_INCL + + /** + * Represent the relation: URN field matches any nested child or parent in addition to the given URN + */ + RELATED_INCL } \ No newline at end of file diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java index 28cc9304bf913..cc96429c65e76 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java @@ -4,6 +4,7 @@ import com.linkedin.metadata.config.cache.CacheConfiguration; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.config.search.SearchServiceConfiguration; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import lombok.Data; @@ -37,6 +38,9 @@ public class DataHubAppConfiguration { /** ElasticSearch configurations */ private ElasticSearchConfiguration elasticSearch; + /* Search Service configurations */ + private SearchServiceConfiguration searchService; + /** System Update configurations */ private SystemUpdateConfiguration systemUpdate; diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java new file mode 100644 index 0000000000000..f8b62d3c4bb7f --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java @@ -0,0 +1,24 @@ +package com.linkedin.metadata.config.search; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +public class QueryFilterRewriterConfiguration { + + private ExpansionRewriterConfiguration containerExpansion; + private ExpansionRewriterConfiguration domainExpansion; + + @NoArgsConstructor + @AllArgsConstructor + @Data + public static class ExpansionRewriterConfiguration { + public static final ExpansionRewriterConfiguration DEFAULT = + new ExpansionRewriterConfiguration(false, 100, 100); + + boolean enabled; + private int pageSize; + private int limit; + } +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java new file mode 100644 index 0000000000000..6c6d397b3fec1 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.config.search; + +import lombok.Data; + +@Data +public class SearchServiceConfiguration { + private QueryFilterRewriterConfiguration queryFilterRewriter; +} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 795fa10d33c8b..8abed3dcb44cc 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -108,6 +108,15 @@ searchService: cache: hazelcast: serviceName: ${SEARCH_SERVICE_HAZELCAST_SERVICE_NAME:hazelcast-service} + queryFilterRewriter: + containerExpansion: + enabled: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_ENABLED:true} + pageSize: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_PAGE_SIZE:100} + limit: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_LIMIT:100} + domainExpansion: + enabled: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_ENABLED:true} + pageSize: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_PAGE_SIZE:100} + limit: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_LIMIT:100} configEntityRegistry: path: ${ENTITY_REGISTRY_CONFIG_PATH:../../metadata-models/src/main/resources/entity-registry.yml} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 7a40c474ace74..d921e20f722cc 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -15,6 +15,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import java.io.IOException; import javax.annotation.Nonnull; @@ -55,7 +56,9 @@ public class ElasticSearchServiceFactory { @Bean(name = "elasticSearchService") @Nonnull - protected ElasticSearchService getInstance(final ConfigurationProvider configurationProvider) + protected ElasticSearchService getInstance( + final ConfigurationProvider configurationProvider, + final QueryFilterRewriteChain queryFilterRewriteChain) throws IOException { log.info("Search configuration: {}", configurationProvider.getElasticSearch().getSearch()); @@ -73,12 +76,16 @@ protected ElasticSearchService getInstance(final ConfigurationProvider configura configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(), elasticSearchConfiguration.getImplementation(), searchConfiguration, - customSearchConfiguration); + customSearchConfiguration, + queryFilterRewriteChain); return new ElasticSearchService( entityIndexBuilders, esSearchDAO, new ESBrowseDAO( - components.getSearchClient(), searchConfiguration, customSearchConfiguration), + components.getSearchClient(), + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain), new ESWriteDAO( components.getSearchClient(), components.getBulkProcessor(), diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java new file mode 100644 index 0000000000000..2ddb0e6af9255 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java @@ -0,0 +1,51 @@ +package com.linkedin.gms.factory.search.filter; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.search.elasticsearch.query.filter.ContainerExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.DomainExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriter; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class QueryFilterRewriterChainFactory { + + @Bean + @ConditionalOnProperty( + name = "searchService.queryFilterRewriter.containerExpansion.enabled", + havingValue = "true") + public QueryFilterRewriter containerExpansionRewriter( + final ConfigurationProvider configurationProvider) { + return ContainerExpansionRewriter.builder() + .config( + configurationProvider + .getSearchService() + .getQueryFilterRewriter() + .getContainerExpansion()) + .build(); + } + + @Bean + @ConditionalOnProperty( + name = "searchService.queryFilterRewriter.domainExpansion.enabled", + havingValue = "true") + public QueryFilterRewriter domainExpansionRewriter( + final ConfigurationProvider configurationProvider) { + return DomainExpansionRewriter.builder() + .config( + configurationProvider.getSearchService().getQueryFilterRewriter().getDomainExpansion()) + .build(); + } + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Bean + public QueryFilterRewriteChain queryFilterRewriteChain( + Optional> queryFilterRewriters) { + return new QueryFilterRewriteChain(queryFilterRewriters.orElse(Collections.emptyList())); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java index c68db9c3d3e5e..e26de0e730195 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java @@ -3,6 +3,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; import javax.annotation.Nonnull; @@ -25,12 +26,14 @@ public class ElasticSearchTimeseriesAspectServiceFactory { @Bean(name = "elasticSearchTimeseriesAspectService") @Nonnull - protected ElasticSearchTimeseriesAspectService getInstance() { + protected ElasticSearchTimeseriesAspectService getInstance( + final QueryFilterRewriteChain queryFilterRewriteChain) { return new ElasticSearchTimeseriesAspectService( components.getSearchClient(), new TimeseriesAspectIndexBuilders( components.getIndexBuilder(), entityRegistry, components.getIndexConvention()), components.getBulkProcessor(), - components.getNumRetries()); + components.getNumRetries(), + queryFilterRewriteChain); } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json index c4532cba9e6be..e8cc193f3458d 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json @@ -56,9 +56,11 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -68,6 +70,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 3688311b1f234..bc4d222e316b0 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -162,9 +162,11 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -174,6 +176,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 204793886b366..982a409ef8e4b 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -4542,7 +4542,11 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", @@ -6040,15 +6044,23 @@ }, "doc" : "Include mentioned fields inside elastic highlighting query", "optional" : true + }, { + "name" : "rewriteQuery", + "type" : "boolean", + "doc" : "invoke query rewrite chain for filters based on configured rewriters", + "default" : true, + "optional" : true } ] }, { "type" : "enum", "name" : "Condition", "namespace" : "com.linkedin.metadata.query.filter", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -6058,6 +6070,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 3d16550db1e0f..1a35b52474e4f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -4536,7 +4536,11 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 730a2886ab2bf..f6a37f958c30d 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -32,6 +32,7 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import javax.validation.constraints.Null; import org.apache.commons.collections.CollectionUtils; public class QueryUtils { @@ -57,6 +58,26 @@ public static Criterion newCriterion( .setCondition(condition); } + // Creates new Criterion with field and value, using EQUAL condition. + @Nullable + public static Criterion newCriterion(@Nonnull String field, @Nonnull List values) { + return newCriterion(field, values, Condition.EQUAL); + } + + // Creates new Criterion with field, value and condition. + @Null + public static Criterion newCriterion( + @Nonnull String field, @Nonnull List values, @Nonnull Condition condition) { + if (values.isEmpty()) { + return null; + } + return new Criterion() + .setField(field) + .setValue(values.get(0)) // Hack! This is due to bad modeling. + .setValues(new StringArray(values)) + .setCondition(condition); + } + // Creates new Filter from a map of Criteria by removing null-valued Criteria and using EQUAL // condition (default). @Nonnull diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index 07f91bb52fe10..f56cbc36e4a66 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import io.datahubproject.metadata.context.OperationContext; import jakarta.servlet.http.HttpServlet; @@ -35,22 +36,27 @@ @Slf4j public class ConfigSearchExport extends HttpServlet { - private ConfigurationProvider getConfigProvider(WebApplicationContext ctx) { + private static ConfigurationProvider getConfigProvider(WebApplicationContext ctx) { return (ConfigurationProvider) ctx.getBean("configurationProvider"); } - private AspectRetriever getAspectRetriever(WebApplicationContext ctx) { + private static AspectRetriever getAspectRetriever(WebApplicationContext ctx) { return (AspectRetriever) ctx.getBean("aspectRetriever"); } - private OperationContext getOperationContext(WebApplicationContext ctx) { + private static OperationContext getOperationContext(WebApplicationContext ctx) { return (OperationContext) ctx.getBean("systemOperationContext"); } + private static QueryFilterRewriteChain getQueryFilterRewriteChain(WebApplicationContext ctx) { + return ctx.getBean(QueryFilterRewriteChain.class); + } + private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { SearchConfiguration searchConfiguration = getConfigProvider(ctx).getElasticSearch().getSearch(); AspectRetriever aspectRetriever = getAspectRetriever(ctx); EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); + QueryFilterRewriteChain queryFilterRewriteChain = getQueryFilterRewriteChain(ctx); CSVWriter writer = CSVWriter.builder().printWriter(pw).build(); @@ -85,7 +91,8 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, null, queryFilterRewriteChain) .getSearchRequest( getOperationContext(ctx) .withSearchFlags(