From c75de83baadfdd75ddccc45ea7f136d9b34415df Mon Sep 17 00:00:00 2001 From: zzzxl Date: Tue, 17 Dec 2024 11:04:37 +0800 Subject: [PATCH] [fix](inverted index) Fix Null Pointer Exception in function match (#45456) Problem Summary: Using select match_phrase('', '') in the old optimizer causes issues --- be/src/vec/functions/match.cpp | 3 + be/test/vec/function/function_match_test.cpp | 58 ++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 be/test/vec/function/function_match_test.cpp diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index 33500b61b3237d3..80e5b55a1d16852 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -129,6 +129,9 @@ void FunctionMatchBase::analyse_query_str_token(std::vector* query_ const std::string& column_name) { VLOG_DEBUG << "begin to run " << get_name() << ", parser_type: " << inverted_index_parser_type_to_string(inverted_index_ctx->parser_type); + if (inverted_index_ctx == nullptr) { + return; + } if (inverted_index_ctx->parser_type == InvertedIndexParserType::PARSER_NONE) { query_tokens->emplace_back(match_query_str); return; diff --git a/be/test/vec/function/function_match_test.cpp b/be/test/vec/function/function_match_test.cpp new file mode 100644 index 000000000000000..6d95a4632884f83 --- /dev/null +++ b/be/test/vec/function/function_match_test.cpp @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "function_test_util.h" +#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h" +#include "vec/functions/match.h" + +namespace doris::vectorized { + +TEST(FunctionMatchTest, analyse_query_str) { + FunctionMatchPhrase func_match_phrase; + + { + auto inverted_index_ctx = nullptr; + std::vector query_tokens = + func_match_phrase.analyse_query_str_token(inverted_index_ctx, "a b c", "name"); + ASSERT_EQ(query_tokens.size(), 0); + } + + { + auto inverted_index_ctx = std::make_unique(); + inverted_index_ctx->parser_type = InvertedIndexParserType::PARSER_NONE; + std::vector query_tokens = func_match_phrase.analyse_query_str_token( + inverted_index_ctx.get(), "a b c", "name"); + ASSERT_EQ(query_tokens.size(), 1); + } + + { + auto inverted_index_ctx = std::make_unique(); + inverted_index_ctx->parser_type = InvertedIndexParserType::PARSER_ENGLISH; + auto analyzer = doris::segment_v2::inverted_index::InvertedIndexAnalyzer::create_analyzer( + inverted_index_ctx.get()); + inverted_index_ctx->analyzer = analyzer.get(); + std::vector query_tokens = func_match_phrase.analyse_query_str_token( + inverted_index_ctx.get(), "a b c", "name"); + ASSERT_EQ(query_tokens.size(), 3); + } +} + +} // namespace doris::vectorized \ No newline at end of file