diff --git a/build.gradle b/build.gradle index bfeebb47f..a39c3dfd2 100644 --- a/build.gradle +++ b/build.gradle @@ -39,6 +39,8 @@ project.ext.externalDependency = [ 'ebeanAgent': 'io.ebean:ebean-agent:11.27.1', 'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:5.6.8', 'elasticSearchTransport': 'org.elasticsearch.client:transport:5.6.8', + 'elasticSearchRest7': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.9.3', + 'elasticSearchTransport7': 'org.elasticsearch.client:transport:7.9.3', 'guava': 'com.google.guava:guava:27.0.1-jre', 'h2': 'com.h2database:h2:1.4.196', 'jacksonCore': 'com.fasterxml.jackson.core:jackson-core:2.9.7', @@ -50,6 +52,7 @@ project.ext.externalDependency = [ 'junitJupiterEngine': "org.junit.jupiter:junit-jupiter-engine:$junitJupiterVersion", 'lombok': 'org.projectlombok:lombok:1.18.12', 'mockito': 'org.mockito:mockito-core:3.0.0', + 'mockitoInline': 'org.mockito:mockito-inline:3.0.0', 'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11', 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.0', 'parseqTest': 'com.linkedin.parseq:parseq:3.0.7:test', diff --git a/dao-impl/elasticsearch-dao-7/build.gradle b/dao-impl/elasticsearch-dao-7/build.gradle new file mode 100644 index 000000000..d2dcf61c4 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/build.gradle @@ -0,0 +1,21 @@ +apply plugin: 'java' + +apply from: "$rootDir/gradle/java-publishing.gradle" + +dependencies { + compile project(':dao-api') + + compile externalDependency.elasticSearchRest7 + compile externalDependency.elasticSearchTransport7 + compile externalDependency.guava + compile externalDependency.lombok + compile externalDependency.commonsIo + compile externalDependency.commonsLang + compile externalDependency.jacksonCore + compile externalDependency.jacksonDataBind + + annotationProcessor externalDependency.lombok + + testCompile project(':testing:test-models') + testCompile externalDependency.mockitoInline +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/BaseBrowseConfig.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/BaseBrowseConfig.java new file mode 100644 index 000000000..bead97b8b --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/BaseBrowseConfig.java @@ -0,0 +1,57 @@ +package com.linkedin.metadata.dao.browse; + +import com.linkedin.data.schema.RecordDataSchema; +import com.linkedin.data.template.RecordTemplate; +import javax.annotation.Nonnull; + + +public abstract class BaseBrowseConfig { + private static RecordDataSchema searchDocumentSchema; + + @Nonnull + public String getBrowseDepthFieldName() { + return "browsePaths.length"; + } + + @Nonnull + public String getBrowsePathFieldName() { + return "browsePaths"; + } + + @Nonnull + public String getUrnFieldName() { + return "urn"; + } + + @Nonnull + public String getSortingField() { + return "urn"; + } + + @Nonnull + public String getRemovedField() { + return "removed"; + } + + public boolean hasFieldInSchema(@Nonnull String fieldName) { + return getSearchDocumentSchema().contains(fieldName); + } + + private RecordDataSchema getSearchDocumentSchema() { + if (searchDocumentSchema == null) { + try { + searchDocumentSchema = getSearchDocument().newInstance().schema(); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Couldn't create an instance of the search document"); + } + } + return searchDocumentSchema; + } + + @Nonnull + public String getIndexName() { + return getSearchDocument().getSimpleName().toLowerCase(); + } + + public abstract Class getSearchDocument(); +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/ESBrowseDAO.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/ESBrowseDAO.java new file mode 100644 index 000000000..f2c6e4bd8 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/browse/ESBrowseDAO.java @@ -0,0 +1,307 @@ +package com.linkedin.metadata.dao.browse; + +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.dao.BaseBrowseDAO; +import com.linkedin.metadata.dao.exception.ESQueryException; +import com.linkedin.metadata.dao.utils.ESUtils; +import com.linkedin.metadata.dao.utils.SearchUtils; +import com.linkedin.metadata.query.BrowseResult; +import com.linkedin.metadata.query.BrowseResultEntity; +import com.linkedin.metadata.query.BrowseResultEntityArray; +import com.linkedin.metadata.query.BrowseResultGroup; +import com.linkedin.metadata.query.BrowseResultGroupArray; +import com.linkedin.metadata.query.BrowseResultMetadata; +import com.linkedin.metadata.query.Filter; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.sort.SortOrder; + + +/** + * A browse DAO for Elasticsearch backend. + */ +@Slf4j +public class ESBrowseDAO extends BaseBrowseDAO { + private final RestHighLevelClient _client; + private final BaseBrowseConfig _config; + + public ESBrowseDAO(@Nonnull RestHighLevelClient esClient, @Nonnull BaseBrowseConfig config) { + this._client = esClient; + this._config = config; + } + + /** + * Gets a list of groups/entities that match given browse request. + * + * @param path the path to be browsed + * @param requestParams the request map with fields and values as filters + * @param from index of the first entity located in path + * @param size the max number of entities contained in the response + * @return a {@link BrowseResult} that contains a list of groups/entities + */ + @Override + @Nonnull + public BrowseResult browse(@Nonnull String path, @Nullable Filter requestParams, int from, int size) { + final Map requestMap = SearchUtils.getRequestMap(requestParams); + + try { + final SearchResponse groupsResponse = + _client.search(constructGroupsSearchRequest(path, requestMap), RequestOptions.DEFAULT); + final SearchResponse entitiesResponse = + _client.search(constructEntitiesSearchRequest(path, requestMap, from, size), RequestOptions.DEFAULT); + final BrowseResult result = extractQueryResult(groupsResponse, entitiesResponse, path, from); + result.getMetadata().setPath(path); + return result; + } catch (Exception e) { + log.error("Browse query failed: " + e.getMessage()); + throw new ESQueryException("Browse query failed: ", e); + } + } + + /** + * Builds aggregations for search request. + * + * @param path the path which is being browsed + * @return {@link AggregationBuilder} + */ + @Nonnull + private AggregationBuilder buildAggregations(@Nonnull String path) { + final String includeFilter = ESUtils.escapeReservedCharacters(path) + "/.*"; + final String excludeFilter = ESUtils.escapeReservedCharacters(path) + "/.*/.*"; + + return AggregationBuilders.terms("groups") + .field(_config.getBrowsePathFieldName()) + .size(Integer.MAX_VALUE) + .order(BucketOrder.count(true)) // Ascending order + .includeExclude(new IncludeExclude(includeFilter, excludeFilter)); + } + + /** + * Constructs group search request. + * + * @param path the path which is being browsed + * @return {@link SearchRequest} + */ + @Nonnull + protected SearchRequest constructGroupsSearchRequest(@Nonnull String path, @Nonnull Map requestMap) { + final SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(buildQueryString(path, requestMap, true)); + searchSourceBuilder.aggregation(buildAggregations(path)); + searchRequest.source(searchSourceBuilder); + return searchRequest; + } + + /** + * Builds query string. + * + * @param path the path which is being browsed + * @param requestMap entity filters e.g. status=PUBLISHED for features + * @param isGroupQuery true if it's group query false otherwise + * @return {@link QueryBuilder} + */ + @Nonnull + private QueryBuilder buildQueryString(@Nonnull String path, @Nonnull Map requestMap, + boolean isGroupQuery) { + final String browsePathFieldName = _config.getBrowsePathFieldName(); + final String browseDepthFieldName = _config.getBrowseDepthFieldName(); + final String removedFieldName = _config.getRemovedField(); + final int browseDepthVal = getPathDepth(path) + 1; + + final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); + + queryBuilder.mustNot(QueryBuilders.termQuery(removedFieldName, "true")); + + if (!path.isEmpty()) { + queryBuilder.filter(QueryBuilders.termQuery(browsePathFieldName, path)); + } + + if (isGroupQuery) { + queryBuilder.filter(QueryBuilders.rangeQuery(browseDepthFieldName).gt(browseDepthVal)); + } else { + queryBuilder.filter(QueryBuilders.termQuery(browseDepthFieldName, browseDepthVal)); + } + + requestMap.forEach((field, val) -> { + if (_config.hasFieldInSchema(field)) { + queryBuilder.filter(QueryBuilders.termQuery(field, val)); + } + }); + + return queryBuilder; + } + + /** + * Constructs search request for entity search. + * + * @param path the path which is being browsed + * @param from index of first entity + * @param size count of entities + * @return {@link SearchRequest} + */ + @VisibleForTesting + @Nonnull + SearchRequest constructEntitiesSearchRequest(@Nonnull String path, @Nonnull Map requestMap, int from, + int size) { + final SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.from(from); + searchSourceBuilder.size(size); + searchSourceBuilder.fetchSource(new String[]{_config.getBrowsePathFieldName(), _config.getUrnFieldName()}, null); + searchSourceBuilder.sort(_config.getSortingField(), SortOrder.ASC); + searchSourceBuilder.query(buildQueryString(path, requestMap, false)); + searchRequest.source(searchSourceBuilder); + return searchRequest; + } + + /** + * Extracts search responses into browse result. + * + * @param groupsResponse groups search response + * @param entitiesResponse entity search response + * @param path the path which is being browsed + * @param from index of first entity + * @return {@link BrowseResult} + */ + @Nonnull + private BrowseResult extractQueryResult(@Nonnull SearchResponse groupsResponse, + @Nonnull SearchResponse entitiesResponse, @Nonnull String path, int from) { + final List browseResultEntityList = extractEntitiesResponse(entitiesResponse, path); + final BrowseResultMetadata browseResultMetadata = extractGroupsResponse(groupsResponse, path); + browseResultMetadata.setTotalNumEntities( + browseResultMetadata.getTotalNumEntities() + entitiesResponse.getHits().getTotalHits().value); + return new BrowseResult().setEntities(new BrowseResultEntityArray(browseResultEntityList)) + .setMetadata(browseResultMetadata) + .setFrom(from) + .setPageSize(browseResultEntityList.size()) + .setNumEntities((int) entitiesResponse.getHits().getTotalHits().value); + } + + /** + * Extracts group search response into browse result metadata. + * + * @param groupsResponse groups search response + * @param path the path which is being browsed + * @return {@link BrowseResultMetadata} + */ + @Nonnull + private BrowseResultMetadata extractGroupsResponse(@Nonnull SearchResponse groupsResponse, @Nonnull String path) { + final ParsedTerms groups = (ParsedTerms) groupsResponse.getAggregations().getAsMap().get("groups"); + final BrowseResultGroupArray groupsAgg = new BrowseResultGroupArray(); + for (Terms.Bucket group : groups.getBuckets()) { + groupsAgg.add( + new BrowseResultGroup().setName(getSimpleName(group.getKeyAsString())).setCount(group.getDocCount())); + } + return new BrowseResultMetadata() + .setGroups(groupsAgg) + .setTotalNumEntities(groupsResponse.getHits().getTotalHits().value) + .setPath(path); + } + + /** + * Extracts entity search response into list of browse result entities. + * + * @param entitiesResponse entity search response + * @return list of {@link BrowseResultEntity} + */ + @VisibleForTesting + @Nonnull + List extractEntitiesResponse(@Nonnull SearchResponse entitiesResponse, + @Nonnull String currentPath) { + final List entityMetadataArray = new ArrayList<>(); + Arrays.stream(entitiesResponse.getHits().getHits()).forEach(hit -> { + try { + final List allPaths = (List) hit.getSourceAsMap().get(_config.getBrowsePathFieldName()); + final String nextLevelPath = getNextLevelPath(allPaths, currentPath); + if (nextLevelPath != null) { + entityMetadataArray.add(new BrowseResultEntity().setName(getSimpleName(nextLevelPath)) + .setUrn(Urn.createFromString((String) hit.getSourceAsMap().get(_config.getUrnFieldName())))); + } + } catch (URISyntaxException e) { + log.error("URN is not valid: " + e.toString()); + } + }); + return entityMetadataArray; + } + + /** + * Extracts the name of group/entity from path. + * + *

Example: /foo/bar/baz => baz + * + * @param path path of the group/entity + * @return String + */ + @Nonnull + private String getSimpleName(@Nonnull String path) { + return path.substring(path.lastIndexOf('/') + 1); + } + + @VisibleForTesting + @Nullable + static String getNextLevelPath(@Nonnull List paths, @Nonnull String currentPath) { + final String normalizedCurrentPath = currentPath.toLowerCase(); + final int pathDepth = getPathDepth(currentPath); + return paths.stream() + .filter(x -> x.toLowerCase().startsWith(normalizedCurrentPath) && getPathDepth(x) == (pathDepth + 1)) + .findFirst() + .orElse(null); + } + + private static int getPathDepth(@Nonnull String path) { + return StringUtils.countMatches(path, "/"); + } + + /** + * Gets a list of paths for a given urn. + * + * @param urn urn of the entity + * @return all paths related to a given urn + */ + @Nonnull + public List getBrowsePaths(@Nonnull Urn urn) { + final SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + searchRequest.source( + new SearchSourceBuilder().query(QueryBuilders.termQuery(_config.getUrnFieldName(), urn.toString()))); + final SearchHit[] searchHits; + try { + searchHits = _client.search(searchRequest, RequestOptions.DEFAULT).getHits().getHits(); + } catch (Exception e) { + log.error("Get paths from urn query failed: " + e.getMessage()); + throw new ESQueryException("Get paths from urn query failed: ", e); + } + + if (searchHits.length == 0) { + return Collections.emptyList(); + } + final Map sourceMap = searchHits[0].getSourceAsMap(); + if (!sourceMap.containsKey(_config.getBrowsePathFieldName())) { + return Collections.emptyList(); + } + return (List) sourceMap.get(_config.getBrowsePathFieldName()); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/exception/ESQueryException.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/exception/ESQueryException.java new file mode 100644 index 000000000..f6f1ba761 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/exception/ESQueryException.java @@ -0,0 +1,15 @@ +package com.linkedin.metadata.dao.exception; + +/** + * An exception to be thrown when elastic search query fails. + */ +public class ESQueryException extends RuntimeException { + + public ESQueryException(String message) { + super(message); + } + + public ESQueryException(String message, Throwable throwable) { + super(message, throwable); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseESAutoCompleteQuery.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseESAutoCompleteQuery.java new file mode 100644 index 000000000..efcdb6b6e --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseESAutoCompleteQuery.java @@ -0,0 +1,60 @@ +package com.linkedin.metadata.dao.search; + +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.query.AutoCompleteResult; +import com.linkedin.metadata.query.Filter; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; + + +@Slf4j +public abstract class BaseESAutoCompleteQuery { + + public BaseESAutoCompleteQuery() { + } + + /** + * Constructs the search query for auto complete request. + * + *

TODO: merge this with regular search query construction to take filters as context for suggestions + * + * @param field the field name for the auto complete + * @param input the type ahead query text + * @param requestParams the request map as filters + * @return a valid search request + */ + @Nonnull + abstract SearchRequest constructAutoCompleteQuery(@Nonnull String input, @Nonnull String field, + @Nullable Filter requestParams); + + /** + * Gets a list of suggestions out of raw search hits. + * + * @param searchResponse the raw search response from search engine + * @param field the field name for the auto complete + * @param input the string that needs to be completed + * @param limit number of suggestions to return + * @return A list of suggestion strings + */ + @Nonnull + abstract StringArray getSuggestionList(@Nonnull SearchResponse searchResponse, @Nonnull String field, + @Nonnull String input, int limit); + + @VisibleForTesting + @Nonnull + AutoCompleteResult extractAutoCompleteResult(@Nonnull SearchResponse searchResponse, @Nonnull String input, + @Nonnull String field, int limit) { + + return new AutoCompleteResult().setQuery(input) + .setSuggestions(getSuggestionList(searchResponse, field, input, limit)); + } + + @Nonnull + String getAutocompleteQueryTemplate() { + return ""; + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseSearchConfig.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseSearchConfig.java new file mode 100644 index 000000000..67e7ea1c4 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/BaseSearchConfig.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.dao.search; + +import com.linkedin.data.template.RecordTemplate; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + + +public abstract class BaseSearchConfig { + + @Nonnull + public abstract Set getFacetFields(); + + @Nullable + public Set getLowCardinalityFields() { + return null; + } + + @Nonnull + public String getIndexName() { + return getSearchDocument().getSimpleName().toLowerCase(); + } + + @Nonnull + public abstract Class getSearchDocument(); + + @Nonnull + public abstract String getDefaultAutocompleteField(); + + @Nonnull + public abstract String getSearchQueryTemplate(); + + @Nonnull + public abstract String getAutocompleteQueryTemplate(); +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForHighCardinalityFields.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForHighCardinalityFields.java new file mode 100644 index 000000000..b22563f9f --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForHighCardinalityFields.java @@ -0,0 +1,106 @@ +package com.linkedin.metadata.dao.search; + +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.dao.utils.ESUtils; +import com.linkedin.metadata.query.Filter; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +@Slf4j +public class ESAutoCompleteQueryForHighCardinalityFields extends BaseESAutoCompleteQuery { + private static final Integer DEFAULT_AUTOCOMPLETE_QUERY_SIZE = 100; + private BaseSearchConfig _config; + + ESAutoCompleteQueryForHighCardinalityFields(BaseSearchConfig config) { + this._config = config; + } + + @Nonnull + SearchRequest constructAutoCompleteQuery(@Nonnull String input, @Nonnull String field, + @Nullable Filter filter) { + + SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + + searchSourceBuilder.size(DEFAULT_AUTOCOMPLETE_QUERY_SIZE); + searchSourceBuilder.query(buildAutoCompleteQueryString(input, field)); + searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter)); + searchRequest.source(searchSourceBuilder); + log.debug("Auto complete request is: " + searchRequest.toString()); + return searchRequest; + } + + /** + * Constructs auto complete query given request. + * + * @param input the type ahead query text + * @param field the field name for the auto complete + * @return built autocomplete query + */ + @Nonnull + QueryBuilder buildAutoCompleteQueryString(@Nonnull String input, @Nonnull String field) { + String query = _config.getAutocompleteQueryTemplate(); + query = query.replace("$INPUT", input).replace("$FIELD", field); + return QueryBuilders.wrapperQuery(query); + } + + + @Nonnull + StringArray getSuggestionList(@Nonnull SearchResponse searchResponse, @Nonnull String field, + @Nonnull String input, int limit) { + Set autoCompletionList = new LinkedHashSet<>(); + SearchHit[] hits = searchResponse.getHits().getHits(); + Integer count = 0; + for (SearchHit hit : hits) { + Map source = hit.getSourceAsMap(); + if (count >= limit) { + break; + } + if (source.containsKey(field)) { + autoCompletionList.addAll(decoupleArrayToGetSubstringMatch(source.get(field), input)); + count = autoCompletionList.size(); + } + } + return new StringArray(autoCompletionList); + } + + /** + * Obtains relevant string from an object of which the input string is a prefix of. + * + *

This is useful for autocomplete queries where the field is indexed as an array of strings but the returned + * value should be a string from this array that completes the input string

+ * + *

If the field is instead stored as a string, the function returns field's value as it is

+ * + *

If the field contains multiple elements, but none of them completes the input string, field's value is + * returned as it is

+ * + * @param fieldVal value of the field stored in ES. This could be a string, list of strings, etc + * @param input the string that needs to be completed + * @return String obtained from fieldVal that completes the input string + */ + @Nonnull + static List decoupleArrayToGetSubstringMatch(@Nonnull Object fieldVal, @Nonnull String input) { + if (!(fieldVal instanceof List)) { + return Collections.singletonList(fieldVal.toString()); + } + List stringVals = (List) fieldVal; + return stringVals.stream() + .map(Object::toString) + .filter(x -> x.toLowerCase().contains(input.toLowerCase())) + .collect(Collectors.toList()); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForLowCardinalityFields.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForLowCardinalityFields.java new file mode 100644 index 000000000..917b6012f --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESAutoCompleteQueryForLowCardinalityFields.java @@ -0,0 +1,80 @@ +package com.linkedin.metadata.dao.search; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.dao.utils.ESUtils; +import com.linkedin.metadata.query.Filter; +import java.util.LinkedHashSet; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +@Slf4j +public class ESAutoCompleteQueryForLowCardinalityFields extends BaseESAutoCompleteQuery { + + private static final String DEFAULT_QUERY_ANALYZER = "lowercase_keyword"; + private BaseSearchConfig _config; + + ESAutoCompleteQueryForLowCardinalityFields(BaseSearchConfig config) { + this._config = config; + } + + @Nonnull + SearchRequest constructAutoCompleteQuery(@Nonnull String input, @Nonnull String field, + @Nullable Filter filter) { + + SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + + searchSourceBuilder.query(buildAutoCompleteQueryString(input, field, filter)); + searchSourceBuilder.aggregation(AggregationBuilders.terms(field).field(field)); + searchRequest.source(searchSourceBuilder); + log.debug("Auto complete request is: " + searchRequest.toString()); + return searchRequest; + } + + /** + * Constructs auto complete query given request. + * + * @param input the type ahead query text + * @param field the field name for the auto complete + * @param filter the search filters + * @return built autocomplete query + */ + @Nonnull + QueryBuilder buildAutoCompleteQueryString(@Nonnull String input, @Nonnull String field, + @Nullable Filter filter) { + String subFieldDelimitEdgeNgram = field + ".delimited_edgengram"; + String subFieldEdgeNgram = field + ".edgengram"; + BoolQueryBuilder query = ESUtils.buildFilterQuery(filter); + if (input.length() > 0) { + query.must(QueryBuilders + .queryStringQuery(input) + .fields(ImmutableMap.of(field, 1f, subFieldDelimitEdgeNgram, 1f, subFieldEdgeNgram, 1f)) + .analyzer(DEFAULT_QUERY_ANALYZER)); + } + return query; + } + + @Nonnull + StringArray getSuggestionList(@Nonnull SearchResponse searchResponse, @Nonnull String field, + @Nonnull String input, int limit) { + Set autoCompletionList = new LinkedHashSet<>(); + Aggregation aggregation = searchResponse.getAggregations().get(field); + ((ParsedTerms) aggregation) + .getBuckets() + .stream() + .forEach(b -> autoCompletionList.add(b.getKeyAsString())); + return new StringArray(autoCompletionList); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESBulkWriterDAO.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESBulkWriterDAO.java new file mode 100644 index 000000000..f6f29c518 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESBulkWriterDAO.java @@ -0,0 +1,75 @@ +package com.linkedin.metadata.dao.search; + +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.dao.BaseSearchWriterDAO; +import com.linkedin.metadata.dao.utils.RecordUtils; +import javax.annotation.Nonnull; +import org.elasticsearch.action.bulk.BulkProcessor; +import org.elasticsearch.action.delete.DeleteRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.update.UpdateRequest; +import org.elasticsearch.common.xcontent.XContentType; + + +/** + * A {@link BaseSearchWriterDAO} that uses ElasticSearch's bulk update API. + */ +public final class ESBulkWriterDAO extends BaseSearchWriterDAO { + private static final String DEFAULT_DOCUMENT_TYPE = "doc"; + private static final int MAX_RETRIES = 3; + + private final BulkProcessor _bulkProcessor; + private final String _indexName; + private final String _documentType; + + /** + * Constructor. + * + * @param documentClass schema of the class to index + * @param bulkProcessor the bulk process to use to write to ES + * @param indexName the name of the index to write updates to + */ + public ESBulkWriterDAO(@Nonnull Class documentClass, @Nonnull BulkProcessor bulkProcessor, + @Nonnull String indexName) { + this(documentClass, bulkProcessor, indexName, DEFAULT_DOCUMENT_TYPE); + } + + /** + * Constructor. + * + * @param documentClass schema of the class to index + * @param bulkProcessor the bulk process to use to write to ES + * @param indexName the name of the index to write updates to + * @param documentType the type of document + */ + public ESBulkWriterDAO(@Nonnull Class documentClass, @Nonnull BulkProcessor bulkProcessor, + @Nonnull String indexName, @Nonnull String documentType) { + super(documentClass); + _bulkProcessor = bulkProcessor; + _indexName = indexName; + _documentType = documentType; + } + + @Override + public void upsertDocument(@Nonnull DOCUMENT document, @Nonnull String docId) { + final String documentJson = RecordUtils.toJsonString(document); + final IndexRequest indexRequest = + new IndexRequest(_indexName, _documentType, docId).source(documentJson, XContentType.JSON); + final UpdateRequest updateRequest = + new UpdateRequest(_indexName, _documentType, docId).doc(documentJson, XContentType.JSON) + .detectNoop(false) + .upsert(indexRequest) + .retryOnConflict(MAX_RETRIES); + _bulkProcessor.add(updateRequest); + } + + @Override + public void deleteDocument(@Nonnull String docId) { + _bulkProcessor.add(new DeleteRequest(_indexName, _documentType, docId)); + } + + @Override + public void close() { + _bulkProcessor.close(); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESSearchDAO.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESSearchDAO.java new file mode 100644 index 000000000..9bc83db07 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/search/ESSearchDAO.java @@ -0,0 +1,450 @@ +package com.linkedin.metadata.dao.search; + +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataList; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.LongMap; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.dao.BaseSearchDAO; +import com.linkedin.metadata.dao.SearchResult; +import com.linkedin.metadata.dao.exception.ESQueryException; +import com.linkedin.metadata.dao.utils.ESUtils; +import com.linkedin.metadata.dao.utils.QueryUtils; +import com.linkedin.metadata.query.AggregationMetadata; +import com.linkedin.metadata.query.AggregationMetadataArray; +import com.linkedin.metadata.query.AutoCompleteResult; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.Filter; +import com.linkedin.metadata.query.SearchResultMetadata; +import com.linkedin.metadata.query.SortCriterion; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.Aggregations; +import org.elasticsearch.search.aggregations.bucket.filter.ParsedFilter; +import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.builder.SearchSourceBuilder; + +import static com.linkedin.metadata.dao.utils.SearchUtils.*; + + +/** + * A search DAO for Elasticsearch backend. + */ +@Slf4j +public class ESSearchDAO extends BaseSearchDAO { + + private static final Integer DEFAULT_TERM_BUCKETS_SIZE_100 = 100; + private static final String URN_FIELD = "urn"; + + private RestHighLevelClient _client; + private BaseSearchConfig _config; + private BaseESAutoCompleteQuery _autoCompleteQueryForLowCardFields; + private BaseESAutoCompleteQuery _autoCompleteQueryForHighCardFields; + + // TODO: Currently takes elastic search client, in future, can take other clients such as galene + // TODO: take params and settings needed to create the client + public ESSearchDAO(@Nonnull RestHighLevelClient esClient, @Nonnull Class documentClass, + @Nonnull BaseSearchConfig config) { + super(documentClass); + _client = esClient; + _config = config; + _autoCompleteQueryForLowCardFields = new ESAutoCompleteQueryForLowCardinalityFields(_config); + _autoCompleteQueryForHighCardFields = new ESAutoCompleteQueryForHighCardinalityFields(_config); + } + + @Nonnull + protected BaseESAutoCompleteQuery getAutocompleteQueryGenerator(@Nonnull String field) { + if (_config.getLowCardinalityFields() != null && _config.getLowCardinalityFields().contains(field)) { + return _autoCompleteQueryForLowCardFields; + } + return _autoCompleteQueryForHighCardFields; + } + + /** + * Constructs the base query string given input. + * + * @param input the search input text + * @return built query + */ + @Nonnull + QueryBuilder buildQueryString(@Nonnull String input) { + final String query = _config.getSearchQueryTemplate().replace("$INPUT", input); + return QueryBuilders.wrapperQuery(query); + } + + @Nonnull + private SearchResult executeAndExtract(@Nonnull SearchRequest searchRequest, int from, int size) { + try { + final SearchResponse searchResponse = _client.search(searchRequest, RequestOptions.DEFAULT); + // extract results, validated against document model as well + return extractQueryResult(searchResponse, from, size); + } catch (Exception e) { + log.error("Search query failed:" + e.getMessage()); + throw new ESQueryException("Search query failed:", e); + } + } + + /** + * TODO: This part will be replaced by searchTemplateAPI when the elastic is upgraded to 6.4 or later + */ + @Override + @Nonnull + public SearchResult search(@Nonnull String input, @Nullable Filter postFilters, + @Nullable SortCriterion sortCriterion, int from, int size) { + return search(input, postFilters, sortCriterion, null, from, size); + } + + /** + * Gets a list of documents that match given search request. The results are aggregated and filters are applied to the + * search hits and not the aggregation results. + * + *

This method uses preference parameter to control the shard copy on which to execute the search operation. + * The string used as preference can be a user ID or session ID for instance. This ensures that all queries of a + * given user are always going to hit the same shards, so scores can remain more consistent across queries. Using a + * preference value that identifies the current user or session could help optimize usage of the caches. + * + *

WARNING: using a preference parameter that is same for all queries will lead to hot spots that could + * potentially impact latency, hence choose this parameter judiciously. + * + * @param input the search input text + * @param postFilters the request map with fields and values as filters to be applied to search hits + * @param sortCriterion {@link SortCriterion} to be applied to search results + * @param preference controls a preference of the shard copy on which to execute the search + * @param from index to start the search from + * @param size the number of search hits to return + * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata + */ + @Nonnull + public SearchResult search(@Nonnull String input, @Nullable Filter postFilters, + @Nullable SortCriterion sortCriterion, @Nullable String preference, int from, int size) { + + // Step 0: TODO: Add type casting if needed and add request params validation against the model + // Step 1: construct the query + final SearchRequest req = constructSearchQuery(input, postFilters, sortCriterion, preference, from, size); + // Step 2: execute the query and extract results, validated against document model as well + return executeAndExtract(req, from, size); + } + + @Override + @Nonnull + public SearchResult filter(@Nullable Filter filters, @Nullable SortCriterion sortCriterion, + int from, int size) { + + final SearchRequest searchRequest = getFilteredSearchQuery(filters, sortCriterion, from, size); + return executeAndExtract(searchRequest, from, size); + } + + /** + * Returns a {@link SearchRequest} given filters to be applied to search query and sort criterion to be applied to + * search results. + * + * @param filters {@link Filter} list of conditions with fields and values + * @param sortCriterion {@link SortCriterion} to be applied to the search results + * @param from index to start the search from + * @param size the number of search hits to return + * @return {@link SearchRequest} that contains the filtered query + */ + @Nonnull + SearchRequest getFilteredSearchQuery(@Nullable Filter filters, @Nullable SortCriterion sortCriterion, + int from, int size) { + + final BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); + if (filters != null) { + filters.getCriteria().forEach(criterion -> { + if (!criterion.getValue().trim().isEmpty()) { + boolQueryBuilder.filter(getQueryBuilderFromCriterion(criterion)); + } + }); + } + final SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + searchSourceBuilder.query(boolQueryBuilder); + searchSourceBuilder.from(from).size(size); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + searchRequest.source(searchSourceBuilder); + + return searchRequest; + } + + /** + * Constructs the search query based on the query request. + * + *

TODO: This part will be replaced by searchTemplateAPI when the elastic is upgraded to 6.4 or later + * + * @param input the search input text + * @param filter the search filter + * @param from index to start the search from + * @param size the number of search hits to return + * @return a valid search request + * @deprecated please use {@link #constructSearchQuery(String, Filter, SortCriterion, String, int, int)} instead + */ + @Nonnull + public SearchRequest constructSearchQuery(@Nonnull String input, @Nullable Filter filter, + @Nullable SortCriterion sortCriterion, int from, int size) { + + return constructSearchQuery(input, filter, sortCriterion, null, from, size); + } + + /** + * Constructs the search query based on the query request. + * + *

TODO: This part will be replaced by searchTemplateAPI when the elastic is upgraded to 6.4 or later + * + * @param input the search input text + * @param filter the search filter + * @param preference controls a preference of the shard copy on which to execute the search + * @param from index to start the search from + * @param size the number of search hits to return + * @return a valid search request + */ + @Nonnull + SearchRequest constructSearchQuery(@Nonnull String input, @Nullable Filter filter, + @Nullable SortCriterion sortCriterion, @Nullable String preference, int from, int size) { + + SearchRequest searchRequest = new SearchRequest(_config.getIndexName()); + if (preference != null) { + searchRequest.preference(preference); + } + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + + searchSourceBuilder.from(from); + searchSourceBuilder.size(size); + + searchSourceBuilder.query(buildQueryString(input)); + searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter)); + buildAggregations(searchSourceBuilder, filter); + ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion); + + searchRequest.source(searchSourceBuilder); + log.debug("Search request is: " + searchRequest.toString()); + return searchRequest; + } + + /** + * Constructs the aggregations and sub-aggregations by adding other facets' filters if they are set in request. + * + *

Retrieves dynamic aggregation bucket values when the selections change on the fly + * + * @param searchSourceBuilder the builder to build search source for search request + * @param filter the search filters + */ + private void buildAggregations(@Nonnull SearchSourceBuilder searchSourceBuilder, + @Nullable Filter filter) { + Set facetFields = _config.getFacetFields(); + for (String facet : facetFields) { + AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(DEFAULT_TERM_BUCKETS_SIZE_100); + Optional.ofNullable(filter).map(Filter::getCriteria).ifPresent(criteria -> { + for (Criterion criterion : criteria) { + if (!facetFields.contains(criterion.getField()) || criterion.getField().equals(facet)) { + continue; + } + QueryBuilder filterQueryBuilder = ESUtils.getQueryBuilderFromCriterionForSearch(criterion); + aggBuilder.subAggregation(AggregationBuilders.filter(criterion.getField(), filterQueryBuilder)); + } + }); + searchSourceBuilder.aggregation(aggBuilder); + } + } + + /** + * Extracts a list of documents from the raw search response. + * + * @param searchResponse the raw search response from search engine + * @param from offset from the first result you want to fetch + * @param size page size + * @return collection of a list of documents and related search result metadata + */ + @Nonnull + public SearchResult extractQueryResult(@Nonnull SearchResponse searchResponse, int from, int size) { + + int totalCount = (int) searchResponse.getHits().getTotalHits().value; + int totalPageCount = QueryUtils.getTotalPageCount(totalCount, size); + + return SearchResult.builder() + // format + .documentList(getDocuments(searchResponse)) + .searchResultMetadata(extractSearchResultMetadata(searchResponse)) + .from(from) + .pageSize(size) + .havingMore(QueryUtils.hasMore(from, size, totalPageCount)) + .totalCount(totalCount) + .totalPageCount(totalPageCount) + .build(); + } + + /** + * Gets list of documents from search hits. + * + * @param searchResponse the raw search response from search engine + * @return List of documents + */ + @Nonnull + List getDocuments(@Nonnull SearchResponse searchResponse) { + return (Arrays.stream(searchResponse.getHits().getHits())).map(hit -> + newDocument(buildDocumentsDataMap(hit.getSourceAsMap()))).collect(Collectors.toList()); + } + + /** + * Builds data map for documents. + * + * @param objectMap an object map represents one raw search hit + * @return a data map + */ + @Nonnull + DataMap buildDocumentsDataMap(@Nonnull Map objectMap) { + + final DataMap dataMap = new DataMap(); + for (Map.Entry entry : objectMap.entrySet()) { + if (entry.getValue() instanceof ArrayList) { + dataMap.put(entry.getKey(), new DataList((ArrayList) entry.getValue())); + } else if (entry.getValue() != null) { + dataMap.put(entry.getKey(), entry.getValue()); + } + } + return dataMap; + } + + @Override + @Nonnull + public AutoCompleteResult autoComplete(@Nonnull String query, @Nullable String field, @Nullable Filter requestParams, + int limit) { + if (field == null) { + field = _config.getDefaultAutocompleteField(); + } + try { + SearchRequest req = constructAutoCompleteQuery(query, field, requestParams); + SearchResponse searchResponse = _client.search(req, RequestOptions.DEFAULT); + return extractAutoCompleteResult(searchResponse, query, field, limit); + } catch (Exception e) { + log.error("Auto complete query failed:" + e.getMessage()); + throw new ESQueryException("Auto complete query failed:", e); + } + } + + @Nonnull + public AutoCompleteResult extractAutoCompleteResult(@Nonnull SearchResponse searchResponse, @Nonnull String input, + @Nonnull String field, int limit) { + return getAutocompleteQueryGenerator(field).extractAutoCompleteResult(searchResponse, input, field, limit); + } + + @Nonnull + public SearchRequest constructAutoCompleteQuery(@Nonnull String input, @Nonnull String field, + @Nullable Filter requestParams) { + return getAutocompleteQueryGenerator(field).constructAutoCompleteQuery(input, field, requestParams); + } + + /** + * Extracts SearchResultMetadata section. + * + * @param searchResponse the raw {@link SearchResponse} as obtained from the search engine + * @return {@link SearchResultMetadata} with aggregation and list of urns obtained from {@link SearchResponse} + */ + @Nonnull + SearchResultMetadata extractSearchResultMetadata(@Nonnull SearchResponse searchResponse) { + final SearchResultMetadata searchResultMetadata = + new SearchResultMetadata().setSearchResultMetadatas(new AggregationMetadataArray()).setUrns(new UrnArray()); + + try { + // populate the urns from search response + if (searchResponse.getHits() != null && searchResponse.getHits().getHits() != null) { + searchResultMetadata.setUrns(Arrays.stream(searchResponse.getHits().getHits()) + .map(this::getUrnFromSearchHit) + .collect(Collectors.toCollection(UrnArray::new))); + } + } catch (NullPointerException e) { + throw new RuntimeException("Missing urn field in search document " + e); + } + + final Aggregations aggregations = searchResponse.getAggregations(); + if (aggregations == null) { + return searchResultMetadata; + } + + final AggregationMetadataArray aggregationMetadataArray = new AggregationMetadataArray(); + + for (Map.Entry entry : aggregations.getAsMap().entrySet()) { + final Map oneTermAggResult = extractTermAggregations((ParsedTerms) entry.getValue()); + final AggregationMetadata aggregationMetadata = + new AggregationMetadata().setName(entry.getKey()).setAggregations(new LongMap(oneTermAggResult)); + aggregationMetadataArray.add(aggregationMetadata); + } + + return searchResultMetadata.setSearchResultMetadatas(aggregationMetadataArray); + } + + /** + * Extracts term aggregations give a parsed term. + * + * @param terms an abstract parse term, input can be either ParsedStringTerms ParsedLongTerms + * @return a map with aggregation key and corresponding doc counts + */ + @Nonnull + private Map extractTermAggregations(@Nonnull ParsedTerms terms) { + + final Map aggResult = new HashMap<>(); + List bucketList = terms.getBuckets(); + + for (Terms.Bucket bucket : bucketList) { + String key = bucket.getKeyAsString(); + ParsedFilter parsedFilter = extractBucketAggregations(bucket); + // Gets filtered sub aggregation doc count if exist + Long docCount = parsedFilter != null ? parsedFilter.getDocCount() : bucket.getDocCount(); + if (docCount > 0) { + aggResult.put(key, docCount); + } + } + + return aggResult; + } + + /** + * Extracts sub aggregations from one term bucket. + * + * @param bucket a term bucket + * @return a parsed filter if exist + */ + @Nullable + private ParsedFilter extractBucketAggregations(@Nonnull Terms.Bucket bucket) { + + ParsedFilter parsedFilter = null; + Map bucketAggregations = bucket.getAggregations().getAsMap(); + for (Map.Entry entry : bucketAggregations.entrySet()) { + parsedFilter = (ParsedFilter) entry.getValue(); + // TODO: implement and test multi parsed filters + } + + return parsedFilter; + } + + @Nonnull + private Urn getUrnFromSearchHit(@Nonnull SearchHit hit) { + try { + return Urn.createFromString(hit.getSourceAsMap().get(URN_FIELD).toString()); + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid urn in search document " + e); + } + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/ESUtils.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/ESUtils.java new file mode 100644 index 000000000..954b23e36 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/ESUtils.java @@ -0,0 +1,113 @@ +package com.linkedin.metadata.dao.utils; + +import com.linkedin.metadata.query.Condition; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.Filter; +import com.linkedin.metadata.query.SortCriterion; +import java.util.Arrays; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.ScoreSortBuilder; +import org.elasticsearch.search.sort.SortOrder; + + +public class ESUtils { + + private static final String DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD = "urn"; + + /* + * Refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html for list of reserved + * characters in an Elasticsearch regular expression. + */ + private static final String ELASTICSEARCH_REGEXP_RESERVED_CHARACTERS = "?+*|{}[]()"; + + private ESUtils() { + + } + + /** + * Constructs the filter query given filter map. + * + *

Multiple values can be selected for a filter, and it is currently modeled as string separated by comma + * + * @param filter the search filter + * @return built filter query + */ + @Nonnull + public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter) { + BoolQueryBuilder boolFilter = new BoolQueryBuilder(); + if (filter == null) { + return boolFilter; + } + for (Criterion criterion : filter.getCriteria()) { + boolFilter.must(getQueryBuilderFromCriterionForSearch(criterion)); + } + return boolFilter; + } + + /** + * Builds search query using criterion. + * This method is similar to SearchUtils.getQueryBuilderFromCriterion(). + * The only difference is this method use match query instead of term query for EQUAL. + * + * @param criterion {@link Criterion} single criterion which contains field, value and a comparison operator + * @return QueryBuilder + */ + @Nonnull + public static QueryBuilder getQueryBuilderFromCriterionForSearch(@Nonnull Criterion criterion) { + final Condition condition = criterion.getCondition(); + if (condition == Condition.EQUAL) { + BoolQueryBuilder filters = new BoolQueryBuilder(); + Arrays.stream(criterion.getValue().trim().split("\\s*,\\s*")) + .forEach(elem -> filters.should(QueryBuilders.matchQuery(criterion.getField(), elem))); + return filters; + } else { + return SearchUtils.getQueryBuilderFromCriterion(criterion); + } + } + + /** + * Populates source field of search query with the sort order as per the criterion provided. + * + *

+ * If no sort criterion is provided then the default sorting criterion is chosen which is descending order of score + * Furthermore to resolve conflicts, the results are further sorted by ascending order of urn + * If the input sort criterion is urn itself, then no additional sort criterion is applied as there will be no conflicts. + *

+ * + * @param searchSourceBuilder {@link SearchSourceBuilder} that needs to be populated with sort order + * @param sortCriterion {@link SortCriterion} to be applied to the search results + */ + public static void buildSortOrder(@Nonnull SearchSourceBuilder searchSourceBuilder, @Nullable SortCriterion sortCriterion) { + if (sortCriterion == null) { + searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); + } else { + final SortOrder esSortOrder = (sortCriterion.getOrder() == com.linkedin.metadata.query.SortOrder.ASCENDING) + ? SortOrder.ASC + : SortOrder.DESC; + searchSourceBuilder.sort(new FieldSortBuilder(sortCriterion.getField()).order(esSortOrder)); + } + if (sortCriterion == null || !sortCriterion.getField().equals(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD)) { + searchSourceBuilder.sort(new FieldSortBuilder(DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD).order(SortOrder.ASC)); + } + } + + /** + * Escapes the Elasticsearch reserved characters in the given input string. + * + * @param input input string + * @return input string in which reserved characters are escaped + */ + @Nonnull + public static String escapeReservedCharacters(@Nonnull String input) { + for (char reservedChar : ELASTICSEARCH_REGEXP_RESERVED_CHARACTERS.toCharArray()) { + input = input.replace(String.valueOf(reservedChar), "\\" + reservedChar); + } + return input; + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/SearchUtils.java b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/SearchUtils.java new file mode 100644 index 000000000..4f5c01344 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/main/java/com/linkedin/metadata/dao/utils/SearchUtils.java @@ -0,0 +1,89 @@ +package com.linkedin.metadata.dao.utils; + +import com.linkedin.metadata.query.Condition; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.Filter; +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; + + +@Slf4j +public class SearchUtils { + + private SearchUtils() { + + } + + /** + * Validates the request params and create a request map out of it. + * + * @param requestParams the search request with fields and values + * @return a request map + */ + @Nonnull + public static Map getRequestMap(@Nullable Filter requestParams) { + if (requestParams == null) { + return Collections.emptyMap(); + } + + requestParams.getCriteria().forEach(criterion -> { + if (criterion.getCondition() != com.linkedin.metadata.query.Condition.EQUAL) { + throw new UnsupportedOperationException("Unsupported condition: " + criterion.getCondition()); + } + }); + + return requestParams.getCriteria().stream().collect(Collectors.toMap(Criterion::getField, Criterion::getValue)); + } + + /** + * Builds search query using criterion. + * + * @param criterion {@link Criterion} single criterion which contains field, value and a comparison operator + * @return QueryBuilder + */ + @Nonnull + public static QueryBuilder getQueryBuilderFromCriterion(@Nonnull Criterion criterion) { + final Condition condition = criterion.getCondition(); + if (condition == Condition.EQUAL) { + return QueryBuilders.termsQuery(criterion.getField(), criterion.getValue().trim().split("\\s*,\\s*")); + } else if (condition == Condition.GREATER_THAN) { + return QueryBuilders.rangeQuery(criterion.getField()).gt(criterion.getValue().trim()); + } else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) { + return QueryBuilders.rangeQuery(criterion.getField()).gte(criterion.getValue().trim()); + } else if (condition == Condition.LESS_THAN) { + return QueryBuilders.rangeQuery(criterion.getField()).lt(criterion.getValue().trim()); + } else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) { + return QueryBuilders.rangeQuery(criterion.getField()).lte(criterion.getValue().trim()); + } + + throw new UnsupportedOperationException("Unsupported condition: " + condition); + } + + @Nonnull + public static String toEntityType(@Nonnull Class c) { + String result = c.getSimpleName().toLowerCase(); + if (result.endsWith("entity")) { + result = result.substring(0, result.length() - 6); + } + return result; + } + + @Nonnull + public static String readResourceFile(@Nonnull Class clazz, @Nonnull String filePath) { + try (InputStream inputStream = clazz.getClassLoader().getResourceAsStream(filePath)) { + return IOUtils.toString(inputStream); + } catch (IOException e) { + log.error("Can't read file: " + filePath); + throw new RuntimeException("Can't read file: " + filePath); + } + } +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/ESUtilsTest.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/ESUtilsTest.java new file mode 100644 index 000000000..4d31c505e --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/ESUtilsTest.java @@ -0,0 +1,83 @@ +package com.linkedin.metadata.dao; + +import com.linkedin.metadata.query.Condition; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.CriterionArray; +import com.linkedin.metadata.query.Filter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import org.apache.commons.io.IOUtils; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.testng.annotations.Test; + +import static com.linkedin.metadata.dao.utils.ESUtils.*; +import static org.testng.Assert.*; + + +public class ESUtilsTest { + + private static String loadJsonFromResource(String resourceName) throws IOException { + return IOUtils.toString(ClassLoader.getSystemResourceAsStream(resourceName), StandardCharsets.UTF_8); + } + + @Test + public void testBuildFilterQueryWithEmptyFilter() throws Exception { + // Test null filter + BoolQueryBuilder queryBuilder = buildFilterQuery(null); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/EmptyFilterQuery.json")); + + // Test empty filter + Filter filter = new Filter().setCriteria(new CriterionArray()); + queryBuilder = buildFilterQuery(filter); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/EmptyFilterQuery.json")); + } + + @Test + public void testBuildFilterQueryWithAndFilter() throws IOException { + Filter filter = new Filter().setCriteria(new CriterionArray( + Arrays.asList(new Criterion().setField("key1").setValue("value1").setCondition(Condition.EQUAL), + new Criterion().setField("key2").setValue("value2").setCondition(Condition.EQUAL)))); + QueryBuilder queryBuilder = buildFilterQuery(filter); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/AndFilterQuery.json")); + } + + @Test + public void testBuildFilterQueryWithOrFilter() throws IOException { + Filter filter = new Filter().setCriteria(new CriterionArray(Collections.singletonList( + new Criterion().setField("key1").setValue("value1,value2").setCondition(Condition.EQUAL)))); + QueryBuilder queryBuilder = buildFilterQuery(filter); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/OrFilterQuery.json")); + } + + @Test + public void testBuildFilterQueryWithComplexFilter() throws IOException { + Filter filter = new Filter().setCriteria(new CriterionArray( + Arrays.asList(new Criterion().setField("key1").setValue("value1,value2").setCondition(Condition.EQUAL), + new Criterion().setField("key2").setValue("value2").setCondition(Condition.EQUAL)))); + QueryBuilder queryBuilder = buildFilterQuery(filter); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/ComplexFilterQuery.json")); + } + + @Test + public void testBuildFilterQueryWithRangeFilter() throws IOException { + Filter filter = new Filter().setCriteria(new CriterionArray( + Arrays.asList(new Criterion().setField("key1").setValue("value1").setCondition(Condition.GREATER_THAN), + new Criterion().setField("key1").setValue("value2").setCondition(Condition.LESS_THAN), + new Criterion().setField("key2").setValue("value3").setCondition(Condition.GREATER_THAN_OR_EQUAL_TO), + new Criterion().setField("key3").setValue("value4").setCondition(Condition.LESS_THAN_OR_EQUAL_TO) + ))); + QueryBuilder queryBuilder = buildFilterQuery(filter); + assertEquals(queryBuilder.toString(), loadJsonFromResource("filterQuery/RangeFilterQuery.json")); + } + + @Test + public void testEscapeReservedCharacters() { + assertEquals(escapeReservedCharacters("foobar"), "foobar"); + assertEquals(escapeReservedCharacters("**"), "\\*\\*"); + assertEquals(escapeReservedCharacters("()"), "\\(\\)"); + assertEquals(escapeReservedCharacters("{}"), "\\{\\}"); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/BrowseDAOTest.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/BrowseDAOTest.java new file mode 100644 index 000000000..433124b23 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/BrowseDAOTest.java @@ -0,0 +1,103 @@ +package com.linkedin.metadata.dao.browse; + +import com.linkedin.common.urn.Urn; +import com.linkedin.testing.TestUtils; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class BrowseDAOTest { + private BaseBrowseConfig _browseConfig; + private RestHighLevelClient _mockClient; + private ESBrowseDAO _browseDAO; + + @BeforeMethod + public void setup() { + _browseConfig = new TestBrowseConfig(); + _mockClient = mock(RestHighLevelClient.class); + _browseDAO = new ESBrowseDAO(_mockClient, _browseConfig); + } + + @Test + public void testMatchingPaths() { + List browsePaths = Arrays.asList("/all/subscriptions/premium_new_signups_v2/subs_new_bookings", + "/certified/lls/subs_new_bookings", + "/certified/lls/lex/subs_new_bookings", + "/certified/lls/consumer/subs_new_bookings", + "/subs_new_bookings", + "/School/Characteristics/General/Embedding/network_standardized_school_embeddings_v3" + ); + + // Scenario 1: inside /Certified/LLS + String path1 = "/certified/lls"; + assertEquals(ESBrowseDAO.getNextLevelPath(browsePaths, path1), "/certified/lls/subs_new_bookings"); + + // Scenario 2: inside /Certified/LLS/Consumer + String path2 = "/certified/lls/consumer"; + assertEquals(ESBrowseDAO.getNextLevelPath(browsePaths, path2), "/certified/lls/consumer/subs_new_bookings"); + + // Scenario 3: inside root directory + String path3 = ""; + assertEquals(ESBrowseDAO.getNextLevelPath(browsePaths, path3), "/subs_new_bookings"); + + // Scenario 4: inside an incorrect path /foo + // this situation should ideally not arise for entity browse queries + String path4 = "/foo"; + assertNull(ESBrowseDAO.getNextLevelPath(browsePaths, path4)); + + // Scenario 5: one of the browse paths isn't normalized + String path5 = "/school/characteristics/general/embedding"; + assertEquals(ESBrowseDAO.getNextLevelPath(browsePaths, path5), + "/School/Characteristics/General/Embedding/network_standardized_school_embeddings_v3"); + + // Scenario 6: current path isn't normalized, which ideally should not be the case + String path6 = "/School/Characteristics/General/Embedding"; + assertEquals(ESBrowseDAO.getNextLevelPath(browsePaths, path6), + "/School/Characteristics/General/Embedding/network_standardized_school_embeddings_v3"); + } + + @Test + public void testGetBrowsePath() throws Exception { + SearchResponse mockSearchResponse = mock(SearchResponse.class); + SearchHits mockSearchHits = mock(SearchHits.class); + SearchHit mockSearchHit = mock(SearchHit.class); + Urn dummyUrn = TestUtils.makeUrn(0); + Map sourceMap = new HashMap<>(); + + // Test when there is no search hit for getBrowsePaths + when(mockSearchHits.getHits()).thenReturn(new SearchHit[0]); + when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); + when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + assertEquals(_browseDAO.getBrowsePaths(dummyUrn).size(), 0); + + // Test the case of single search hit & browsePaths field doesn't exist + sourceMap.remove(_browseConfig.getBrowsePathFieldName()); + when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); + when(mockSearchHits.getHits()).thenReturn(new SearchHit[]{mockSearchHit}); + when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); + when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + assertEquals(_browseDAO.getBrowsePaths(dummyUrn).size(), 0); + + // Test the case of single search hit & browsePaths field exists + sourceMap.put(_browseConfig.getBrowsePathFieldName(), Collections.singletonList("foo")); + when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); + when(mockSearchHits.getHits()).thenReturn(new SearchHit[]{mockSearchHit}); + when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); + when(_mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + assertEquals(_browseDAO.getBrowsePaths(dummyUrn).size(), 1); + assertEquals(_browseDAO.getBrowsePaths(dummyUrn).get(0), "foo"); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/TestBrowseConfig.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/TestBrowseConfig.java new file mode 100644 index 000000000..fcf823c44 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/browse/TestBrowseConfig.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.dao.browse; + +import com.linkedin.testing.EntityDocument; + + +public class TestBrowseConfig extends BaseBrowseConfig { + + @Override + public Class getSearchDocument() { + return EntityDocument.class; + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/ESSearchDAOTest.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/ESSearchDAOTest.java new file mode 100644 index 000000000..9d2399bd7 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/ESSearchDAOTest.java @@ -0,0 +1,230 @@ +package com.linkedin.metadata.dao.search; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.UrnArray; +import com.linkedin.data.DataList; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.dao.utils.QueryUtils; +import com.linkedin.metadata.query.AggregationMetadataArray; +import com.linkedin.metadata.query.Condition; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.CriterionArray; +import com.linkedin.metadata.query.Filter; +import com.linkedin.metadata.query.SearchResultMetadata; +import com.linkedin.metadata.query.SortCriterion; +import com.linkedin.metadata.query.SortOrder; +import com.linkedin.testing.EntityDocument; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.io.IOUtils; +import org.apache.lucene.search.TotalHits; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.linkedin.metadata.dao.utils.QueryUtils.*; +import static com.linkedin.testing.TestUtils.*; +import static org.testng.Assert.*; +import static org.mockito.Mockito.*; + + +public class ESSearchDAOTest { + + private ESSearchDAO _searchDAO; + private ESAutoCompleteQueryForHighCardinalityFields _esAutoCompleteQuery; + private TestSearchConfig _testSearchConfig; + + private static String loadJsonFromResource(String resourceName) throws IOException { + String jsonStr = IOUtils.toString(ClassLoader.getSystemResourceAsStream(resourceName), StandardCharsets.UTF_8); + return jsonStr.replaceAll("\\s+", ""); + } + + @BeforeMethod + public void setup() throws Exception { + _testSearchConfig = new TestSearchConfig(); + _searchDAO = new ESSearchDAO(null, EntityDocument.class, _testSearchConfig); + _esAutoCompleteQuery = new ESAutoCompleteQueryForHighCardinalityFields(_testSearchConfig); + } + + @Test + public void testDecoupleArrayToGetSubstringMatch() throws Exception { + // Test empty fieldVal + List fieldValList = Collections.emptyList(); + String searchInput = "searchInput"; + List searchResult = ESAutoCompleteQueryForHighCardinalityFields + .decoupleArrayToGetSubstringMatch(fieldValList, searchInput); + assertEquals(searchResult.size(), 0); + + // Test non-list fieldVal + String fieldValString = "fieldVal"; + searchInput = "searchInput"; + searchResult = ESAutoCompleteQueryForHighCardinalityFields + .decoupleArrayToGetSubstringMatch(fieldValString, searchInput); + assertEquals(searchResult.size(), 1); + + // Test list fieldVal with no match + fieldValList = Arrays.asList("fieldVal1", "fieldVal2", "fieldVal3"); + searchInput = "searchInput"; + searchResult = ESAutoCompleteQueryForHighCardinalityFields + .decoupleArrayToGetSubstringMatch(fieldValList, searchInput); + assertEquals(searchResult.size(), 0); + + // Test list fieldVal with single match + searchInput = "val1"; + searchResult = ESAutoCompleteQueryForHighCardinalityFields + .decoupleArrayToGetSubstringMatch(fieldValList, searchInput); + assertEquals(searchResult.size(), 1); + assertEquals(searchResult.get(0), "fieldVal1"); + + // Test list fieldVal with multiple match + searchInput = "val"; + searchResult = ESAutoCompleteQueryForHighCardinalityFields + .decoupleArrayToGetSubstringMatch(fieldValList, searchInput); + assertEquals(searchResult.size(), 3); + assertTrue(searchResult.equals(fieldValList)); + } + + @Test + public void testGetSuggestionList() throws Exception { + SearchHits searchHits = mock(SearchHits.class); + SearchHit hit1 = makeSearchHit(1); + SearchHit hit2 = makeSearchHit(2); + SearchHit hit3 = makeSearchHit(3); + when(searchHits.getHits()).thenReturn(new SearchHit[]{hit1, hit2, hit3}); + when(searchHits.getTotalHits()).thenReturn(new TotalHits(10L, TotalHits.Relation.EQUAL_TO)); + SearchResponse searchResponse = mock(SearchResponse.class); + when(searchResponse.getHits()).thenReturn(searchHits); + + StringArray res = _esAutoCompleteQuery.getSuggestionList(searchResponse, "name", "test", 2); + + assertEquals(res.size(), 2); + } + + @Test + public void testExtractSearchResultMetadata() throws Exception { + // Test: no aggregations in search response + SearchHits searchHits1 = mock(SearchHits.class); + when(searchHits1.getTotalHits()).thenReturn(new TotalHits(10L, TotalHits.Relation.EQUAL_TO)); + SearchResponse searchResponse1 = mock(SearchResponse.class); + when(searchResponse1.getHits()).thenReturn(searchHits1); + assertEquals(_searchDAO.extractSearchResultMetadata(searchResponse1), getDefaultSearchResultMetadata()); + + // Test: urn field exists in search document + SearchHits searchHits2 = mock(SearchHits.class); + SearchHit hit1 = makeSearchHit(1); + SearchHit hit2 = makeSearchHit(2); + when(searchHits2.getHits()).thenReturn(new SearchHit[]{hit1, hit2}); + SearchResponse searchResponse2 = mock(SearchResponse.class); + when(searchResponse2.getHits()).thenReturn(searchHits2); + UrnArray urns = new UrnArray(Arrays.asList(makeUrn(1), makeUrn(2))); + assertEquals(_searchDAO.extractSearchResultMetadata(searchResponse2), getDefaultSearchResultMetadata().setUrns(urns)); + + // Test: urn field does not exist in one search document, exists in another + SearchHits searchHits3 = mock(SearchHits.class); + SearchHit hit3 = mock(SearchHit.class); + when(hit3.getFields().get("urn")).thenReturn(null); + SearchHit hit4 = makeSearchHit(1); + when(searchHits3.getHits()).thenReturn(new SearchHit[]{hit3, hit4}); + SearchResponse searchResponse3 = mock(SearchResponse.class); + when(searchResponse3.getHits()).thenReturn(searchHits3); + assertThrows(RuntimeException.class, () -> _searchDAO.extractSearchResultMetadata(searchResponse3)); + } + + @Test + public void testBuildDocumentsDataMap() { + Map sourceData = new HashMap<>(); + sourceData.put("field1", "val1"); + sourceData.put("field2", null); + ArrayList arrayList = new ArrayList<>(Arrays.asList("foo", "bar")); + sourceData.put("field3", arrayList); + DataMap dataMap = new DataMap(); + dataMap.put("field1", "val1"); + dataMap.put("field3", new DataList(arrayList)); + assertEquals(_searchDAO.buildDocumentsDataMap(sourceData), dataMap); + } + + @Test + public void testFilteredQueryWithTermsFilter() throws IOException { + int from = 0; + int size = 10; + Filter filter = newFilter(ImmutableMap.of("key1", "value1, value2 ", "key2", "value3", "key3", " ")); + SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("urn"); + + // Test 1: sort order provided + SearchRequest searchRequest = _searchDAO.getFilteredSearchQuery(filter, sortCriterion, from, size); + assertEquals(searchRequest.source().toString(), loadJsonFromResource("SortByUrnTermsFilterQuery.json")); + assertEquals(searchRequest.indices(), new String[] {_testSearchConfig.getIndexName()}); + + // Test 2: no sort order provided, default is used. + searchRequest = _searchDAO.getFilteredSearchQuery(filter, null, from, size); + assertEquals(searchRequest.source().toString(), loadJsonFromResource("DefaultSortTermsFilterQuery.json")); + assertEquals(searchRequest.indices(), new String[] {_testSearchConfig.getIndexName()}); + + // Test 3: empty request map provided + searchRequest = _searchDAO.getFilteredSearchQuery(EMPTY_FILTER, sortCriterion, from, size); + assertEquals(searchRequest.source().toString(), loadJsonFromResource("EmptyFilterQuery.json")); + assertEquals(searchRequest.indices(), new String[] {_testSearchConfig.getIndexName()}); + } + + @Test + public void testFilteredQueryWithRangeFilter() throws IOException { + int from = 0; + int size = 10; + final Filter filter1 = new Filter().setCriteria(new CriterionArray(Arrays.asList( + new Criterion().setField("field_gt").setValue("100").setCondition(Condition.GREATER_THAN), + new Criterion().setField("field_gte").setValue("200").setCondition(Condition.GREATER_THAN_OR_EQUAL_TO), + new Criterion().setField("field_lt").setValue("300").setCondition(Condition.LESS_THAN), + new Criterion().setField("field_lte").setValue("400").setCondition(Condition.LESS_THAN_OR_EQUAL_TO) + ))); + SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("urn"); + + SearchRequest searchRequest = _searchDAO.getFilteredSearchQuery(filter1, sortCriterion, from, size); + assertEquals(searchRequest.source().toString(), loadJsonFromResource("RangeFilterQuery.json")); + assertEquals(searchRequest.indices(), new String[] {_testSearchConfig.getIndexName()}); + } + + @Test + public void testFilteredQueryUnsupportedCondition() { + int from = 0; + int size = 10; + final Filter filter2 = new Filter().setCriteria(new CriterionArray(Arrays.asList( + new Criterion().setField("field_contain").setValue("value_contain").setCondition(Condition.CONTAIN) + ))); + SortCriterion sortCriterion = new SortCriterion().setOrder(SortOrder.ASCENDING).setField("urn"); + assertThrows(UnsupportedOperationException.class, () -> _searchDAO.getFilteredSearchQuery(filter2, sortCriterion, from, size)); + } + + @Test + public void testPreferenceInSearchQuery() { + String input = "test"; + Map requestMap = Collections.singletonMap("key", "value"); + Filter filter = QueryUtils.newFilter(requestMap); + String preference = "urn:li:servicePrincipal:appName"; + SearchRequest searchRequest = _searchDAO.constructSearchQuery(input, filter, null, preference, 0, 10); + assertEquals(searchRequest.preference(), preference); + } + + private static SearchHit makeSearchHit(int id) { + SearchHit hit = mock(SearchHit.class); + Map sourceMap = new HashMap<>(); + sourceMap.put("urn", makeUrn(id).toString()); + sourceMap.put("name", "test" + id); + when(hit.getSourceAsMap()).thenReturn(sourceMap); + when(hit.getSourceAsMap()).thenReturn(sourceMap); + return hit; + } + + private static SearchResultMetadata getDefaultSearchResultMetadata() { + return new SearchResultMetadata().setSearchResultMetadatas(new AggregationMetadataArray()).setUrns(new UrnArray()); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/TestSearchConfig.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/TestSearchConfig.java new file mode 100644 index 000000000..c9558b91e --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/search/TestSearchConfig.java @@ -0,0 +1,39 @@ +package com.linkedin.metadata.dao.search; + +import com.linkedin.testing.EntityDocument; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nonnull; + +public class TestSearchConfig extends BaseSearchConfig { + @Override + @Nonnull + public Set getFacetFields() { + return Collections.unmodifiableSet(new HashSet<>()); + } + + @Override + @Nonnull + public Class getSearchDocument() { + return EntityDocument.class; + } + + @Override + @Nonnull + public String getDefaultAutocompleteField() { + return "urn"; + } + + @Override + @Nonnull + public String getSearchQueryTemplate() { + return "{}"; + } + + @Override + @Nonnull + public String getAutocompleteQueryTemplate() { + return ""; + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/utils/SearchUtilsTest.java b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/utils/SearchUtilsTest.java new file mode 100644 index 000000000..79f1788dc --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/java/com/linkedin/metadata/dao/utils/SearchUtilsTest.java @@ -0,0 +1,40 @@ +package com.linkedin.metadata.dao.utils; + +import com.linkedin.metadata.query.Condition; +import com.linkedin.metadata.query.Criterion; +import com.linkedin.metadata.query.CriterionArray; +import com.linkedin.metadata.query.Filter; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.testng.annotations.Test; + +import static com.linkedin.metadata.dao.utils.SearchUtils.*; +import static org.testng.Assert.*; + +public class SearchUtilsTest { + @Test + public void testGetRequestMap() { + // Empty filter + final Filter filter1 = QueryUtils.newFilter(null); + final Map actual1 = getRequestMap(filter1); + assertTrue(actual1.isEmpty()); + + // Filter with criteria with default condition + final Map requestParams = Collections.unmodifiableMap(new HashMap() { + { + put("key1", "value1"); + put("key2", "value2"); + } + }); + final Filter filter2 = QueryUtils.newFilter(requestParams); + final Map actual2 = getRequestMap(filter2); + assertEquals(actual2, requestParams); + + // Filter with unsupported condition + final Filter filter3 = new Filter().setCriteria(new CriterionArray( + new Criterion().setField("key").setValue("value").setCondition(Condition.CONTAIN) + )); + assertThrows(UnsupportedOperationException.class, () -> getRequestMap(filter3)); + } +} diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/DefaultSortTermsFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/DefaultSortTermsFilterQuery.json new file mode 100644 index 000000000..47230a87c --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/DefaultSortTermsFilterQuery.json @@ -0,0 +1,41 @@ +{ + "from" : 0, + "size" : 10, + "query" : { + "bool" : { + "filter" : [ + { + "terms" : { + "key1" : [ + "value1", + "value2" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "key2" : [ + "value3" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + "sort" : [ + { + "_score" : { + "order" : "desc" + } + }, + { + "urn" : { + "order" : "asc" + } + } + ] +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/EmptyFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/EmptyFilterQuery.json new file mode 100644 index 000000000..da27feb01 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/EmptyFilterQuery.json @@ -0,0 +1,17 @@ +{ + "from" : 0, + "size" : 10, + "query" : { + "bool" : { + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + "sort" : [ + { + "urn" : { + "order" : "asc" + } + } + ] +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/RangeFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/RangeFilterQuery.json new file mode 100644 index 000000000..4869a11a6 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/RangeFilterQuery.json @@ -0,0 +1,63 @@ +{ + "from" : 0, + "size" : 10, + "query" : { + "bool" : { + "filter" : [ + { + "range" : { + "field_gt" : { + "from" : "100", + "to" : null, + "include_lower" : false, + "include_upper" : true, + "boost" : 1.0 + } + } + }, + { + "range" : { + "field_gte" : { + "from" : "200", + "to" : null, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + }, + { + "range" : { + "field_lt" : { + "from" : null, + "to" : "300", + "include_lower" : true, + "include_upper" : false, + "boost" : 1.0 + } + } + }, + { + "range" : { + "field_lte" : { + "from" : null, + "to" : "400", + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + "sort" : [ + { + "urn" : { + "order" : "asc" + } + } + ] +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/SortByUrnTermsFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/SortByUrnTermsFilterQuery.json new file mode 100644 index 000000000..b71e0289d --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/SortByUrnTermsFilterQuery.json @@ -0,0 +1,36 @@ +{ + "from" : 0, + "size" : 10, + "query" : { + "bool" : { + "filter" : [ + { + "terms" : { + "key1" : [ + "value1", + "value2" + ], + "boost" : 1.0 + } + }, + { + "terms" : { + "key2" : [ + "value3" + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + "sort" : [ + { + "urn" : { + "order" : "asc" + } + } + ] +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/AndFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/AndFilterQuery.json new file mode 100644 index 000000000..50a94a8a5 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/AndFilterQuery.json @@ -0,0 +1,54 @@ +{ + "bool" : { + "must" : [ + { + "bool" : { + "should" : [ + { + "match" : { + "key1" : { + "query" : "value1", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "should" : [ + { + "match" : { + "key2" : { + "query" : "value2", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/ComplexFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/ComplexFilterQuery.json new file mode 100644 index 000000000..d1280c5cb --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/ComplexFilterQuery.json @@ -0,0 +1,69 @@ +{ + "bool" : { + "must" : [ + { + "bool" : { + "should" : [ + { + "match" : { + "key1" : { + "query" : "value1", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + }, + { + "match" : { + "key1" : { + "query" : "value2", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "bool" : { + "should" : [ + { + "match" : { + "key2" : { + "query" : "value2", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/EmptyFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/EmptyFilterQuery.json new file mode 100644 index 000000000..9f2caa384 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/EmptyFilterQuery.json @@ -0,0 +1,6 @@ +{ + "bool" : { + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/OrFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/OrFilterQuery.json new file mode 100644 index 000000000..9dae64d53 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/OrFilterQuery.json @@ -0,0 +1,46 @@ +{ + "bool" : { + "must" : [ + { + "bool" : { + "should" : [ + { + "match" : { + "key1" : { + "query" : "value1", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + }, + { + "match" : { + "key1" : { + "query" : "value2", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/RangeFilterQuery.json b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/RangeFilterQuery.json new file mode 100644 index 000000000..13e4a4819 --- /dev/null +++ b/dao-impl/elasticsearch-dao-7/src/test/resources/filterQuery/RangeFilterQuery.json @@ -0,0 +1,52 @@ +{ + "bool" : { + "must" : [ + { + "range" : { + "key1" : { + "from" : "value1", + "to" : null, + "include_lower" : false, + "include_upper" : true, + "boost" : 1.0 + } + } + }, + { + "range" : { + "key1" : { + "from" : null, + "to" : "value2", + "include_lower" : true, + "include_upper" : false, + "boost" : 1.0 + } + } + }, + { + "range" : { + "key2" : { + "from" : "value3", + "to" : null, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + }, + { + "range" : { + "key3" : { + "from" : null, + "to" : "value4", + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } +} \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 2e134f8fd..ab4429e01 100644 --- a/settings.gradle +++ b/settings.gradle @@ -3,6 +3,7 @@ include 'core-models-utils' include 'dao-api' include 'dao-impl:ebean-dao' include 'dao-impl:elasticsearch-dao' +include 'dao-impl:elasticsearch-dao-7' include 'dao-impl:neo4j-dao' include 'restli-resources' include 'testing:core-models-testing'