From 9f80414c7e45a023451ee20685eafcb4fa0d77bb Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 01/16] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Client/ClientBase.cpp | 15 +- src/Core/Settings.h | 1 + src/Interpreters/executeQuery.cpp | 19 +- src/Parsers/CMakeLists.txt | 1 + src/Parsers/Kusto/ParserKQLFilter.cpp | 39 ++++ src/Parsers/Kusto/ParserKQLFilter.h | 16 ++ src/Parsers/Kusto/ParserKQLLimit.cpp | 58 ++++++ src/Parsers/Kusto/ParserKQLLimit.h | 17 ++ src/Parsers/Kusto/ParserKQLOperators.cpp | 239 +++++++++++++++++++++++ src/Parsers/Kusto/ParserKQLOperators.h | 103 ++++++++++ src/Parsers/Kusto/ParserKQLProject.cpp | 47 +++++ src/Parsers/Kusto/ParserKQLProject.h | 22 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 123 ++++++++++++ src/Parsers/Kusto/ParserKQLQuery.h | 25 +++ src/Parsers/Kusto/ParserKQLSort.cpp | 71 +++++++ src/Parsers/Kusto/ParserKQLSort.h | 16 ++ src/Parsers/Kusto/ParserKQLStatement.cpp | 61 ++++++ src/Parsers/Kusto/ParserKQLStatement.h | 45 +++++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 162 +++++++++++++++ src/Parsers/Kusto/ParserKQLSummarize.h | 19 ++ src/Parsers/Kusto/ParserKQLTable.cpp | 68 +++++++ src/Parsers/Kusto/ParserKQLTable.h | 18 ++ src/Parsers/Lexer.cpp | 2 +- src/Parsers/Lexer.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 179 +++++++++++++++++ 25 files changed, 1359 insertions(+), 8 deletions(-) create mode 100644 src/Parsers/Kusto/ParserKQLFilter.cpp create mode 100644 src/Parsers/Kusto/ParserKQLFilter.h create mode 100644 src/Parsers/Kusto/ParserKQLLimit.cpp create mode 100644 src/Parsers/Kusto/ParserKQLLimit.h create mode 100644 src/Parsers/Kusto/ParserKQLOperators.cpp create mode 100644 src/Parsers/Kusto/ParserKQLOperators.h create mode 100644 src/Parsers/Kusto/ParserKQLProject.cpp create mode 100644 src/Parsers/Kusto/ParserKQLProject.h create mode 100644 src/Parsers/Kusto/ParserKQLQuery.cpp create mode 100644 src/Parsers/Kusto/ParserKQLQuery.h create mode 100644 src/Parsers/Kusto/ParserKQLSort.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSort.h create mode 100644 src/Parsers/Kusto/ParserKQLStatement.cpp create mode 100644 src/Parsers/Kusto/ParserKQLStatement.h create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.cpp create mode 100644 src/Parsers/Kusto/ParserKQLSummarize.h create mode 100644 src/Parsers/Kusto/ParserKQLTable.cpp create mode 100644 src/Parsers/Kusto/ParserKQLTable.h diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 396fd97368e9..656acc3db144 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -69,7 +69,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -299,7 +299,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::shared_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -308,10 +308,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const String & sql_dialect = settings.sql_dialect; + + if (sql_dialect == "kusto") + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_shared(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -321,7 +328,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f1fd9d20f004..4c784aa0f7ca 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -38,6 +38,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(String, sql_dialect, "clickhouse", "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 24649128cee5..cd257567cd51 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -70,6 +70,7 @@ #include +#include namespace ProfileEvents { @@ -406,10 +407,22 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (sql_dialect == "kusto" && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6e..73d46593e042 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 000000000000..ad7ad807d03a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; + + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 000000000000..19bb38a7fdae --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 000000000000..7811ebba9abb --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; + + if (!isNumber()) + return false; + + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 000000000000..d425659499d0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 000000000000..1db05d3c07a3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,239 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +{ + String new_expr, leftWildcards= "", rightWildcards=""; + + switch (wildcardsPos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + leftWildcards ="%"; + break; + + case WildcardsPos::right: + rightWildcards = "%"; + break; + + case WildcardsPos::both: + leftWildcards ="%"; + rightWildcards = "%"; + break; + } + + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + else + throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::getExprFromToken(IParser::Pos pos) +{ + String res; + std::vector tokens; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue opValue = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if ( token == "!" ) + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; + + String new_expr; + if (opValue == KQLOperatorValue::none) + tokens.push_back(op); + else + { + switch (opValue) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + break; + + case KQLOperatorValue::has_any: + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + break; + + case KQLOperatorValue::not_hasprefix: + break; + + case KQLOperatorValue::hasprefix_cs: + break; + + case KQLOperatorValue::not_hasprefix_cs: + break; + + case KQLOperatorValue::hassuffix: + break; + + case KQLOperatorValue::not_hassuffix: + break; + + case KQLOperatorValue::hassuffix_cs: + break; + + case KQLOperatorValue::not_hassuffix_cs: + break; + + case KQLOperatorValue::in_cs: + new_expr = "in"; + break; + + case KQLOperatorValue::not_in_cs: + new_expr = "not in"; + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + ++pos; + } + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 000000000000..9beeeda55efc --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KQLOperators { +public: + String getExprFromToken(IParser::Pos pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 000000000000..fee8cdb612b6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); + + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + pos = begin; + + return true; +} + + + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 000000000000..3ab3c82f1be3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +public: + void addColumn(String column) {columns.insert(column);} + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 000000000000..0a9fa1fc4df0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLBase :: parsePrepare(Pos & pos) +{ + op_pos.push_back(pos); + return true; +} + +String ParserKQLBase :: getExprFromToken(Pos pos) +{ + String res; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + res = res + String(pos->begin,pos->end) +" "; + ++pos; + } + return res; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto select_query = std::make_shared(); + node = select_query; + + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} + }; + + std::vector> operation_pos; + + operation_pos.push_back(std::make_pair("table",pos)); + + while (!pos->isEnd()) + { + ++pos; + if (pos->type == TokenType::PipeMark) + { + ++pos; + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(KQLoperator,pos)); + } + } + + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) + return false; + + if (!KQLproject_p.parse(pos, select_expression_list, expected)) + return false; + + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; + else + group_expression_list = KQLsummarize_p.group_expression_list; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 000000000000..25aa4e6b83c2 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); +}; + +class ParserKQLQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 000000000000..9f226c2fc824 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + ParserKeyword by("by"); + + pos = op_pos.back(); // sort only affected by last one + + if (!by.ignore(pos, expected)) + return false; + + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + String tmp(pos->begin,pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + + ++pos; + } + has_directions.push_back(has_dir); + + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + + } + } + + node = order_expression_list; + + pos =begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 000000000000..d9afefc196c8 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 000000000000..7dea87eef25d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery KQL_p; + + ASTPtr query; + bool parsed = KQL_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + // will support union next phase + ASTPtr KQLQuery; + + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + return false; + + if (KQLQuery->as()) + { + node = std::move(KQLQuery); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(KQLQuery); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 000000000000..1eed2d008451 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 000000000000..f7422c02bca1 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,162 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; + + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns + + auto begin = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + groupby = true; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } + } + ++pos; + } + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; + else + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + return false; + + if (groupby) + { + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 000000000000..426ac29fe6a9 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 000000000000..8d450799785d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ( { + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); + + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) + return false; + pos = begin; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 000000000000..1266b6e732d5 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f71..892c0ad47189 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a360..0c439ca06771 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e27413..8ffc5f77f90c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,181 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "throws Syntax error" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY Age DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "throws Syntax error" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" + } +}))); From c12d8c39169cc1adb309ba35fdbab15e97cd853c Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 02/16] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 102 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 83 insertions(+), 24 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca1..24473118dc04 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } + else exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; - } - - last_string = String(pos->begin, pos->end); } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a9..1420d5ce5198 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 608d977f372eedea4cf02f0574fc4ce1ba6e0ae3 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 03/16] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 8ffc5f77f90c..6d33ed20f333 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -430,6 +430,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From 7f6054ff5fde4dbd63042d606bacdfafcfed6a22 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 04/16] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc04..0260902c9379 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From d256101c9b0e25a84f33e8026411e999c75bac27 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 05/16] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6d33ed20f333..1ce82cab3bd8 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -432,8 +432,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -469,9 +469,5 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 15eeb5519952c9f9e0ed73c5fd8892abbf3a973f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 06/16] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1ce82cab3bd8..ee1e5fa6d8c3 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -469,5 +469,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From 18beb801c318f7ecb7fd7ab28e30317047b71a5e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 07/16] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1db05d3c07a3..726f28308eef 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -84,6 +84,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index ee1e5fa6d8c3..cb0b49aecbbf 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -408,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From 1c0a9b04fbef4152eee824f0d590e872b014ea18 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 08/16] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 34 +++++------ src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 111 insertions(+), 115 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03a..466370f5d803 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9abb..4f7eddd96625 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 726f28308eef..90b37ba8aea5 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,33 +10,33 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; - switch (wildcardsPos) + switch (wildcards_pos) { case WildcardsPos::none: break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else - throw Exception("Syntax error near " + KQLOp, ErrorCodes::SYNTAX_ERROR); + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } @@ -48,7 +48,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -88,14 +88,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -192,7 +192,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -232,8 +232,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9beeeda55efc..4a9a13cf14fa 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ class KQLOperators { {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df0..55aade6b2b92 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 9f226c2fc824..70e3283ee3e0 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25d..2afbad221314 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c9379..48544a311041 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce5198..b243f74d08f6 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ class ParserKQLSummarize : public ParserKQLBase protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From 92dd9e64ee6cf70ef9e90687c941766de4e0c593 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 09/16] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 90b37ba8aea5..260c9070d513 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -134,7 +134,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -142,7 +142,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 4a9a13cf14fa..a780e18d3339 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b6..0e25c9c4a6c3 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b92..1a850e77f483 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c2..0545cd00cd9e 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ class ParserKQLBase : public IParserBase protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785d..a7ae7fef5795 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 47b7a1c24da9a5dcce8c86c410cebd09bce11354 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 10/16] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 260c9070d513..60fa022f9bb1 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -34,7 +34,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); @@ -53,7 +53,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef5795..f1fc13d2c488 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 4187a03b780c7f164777a4985e2646be383994f2 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 14 Jun 2022 07:40:06 -0700 Subject: [PATCH 11/16] Kusto-pahse2: Add support for multiple summarize --- src/Parsers/Kusto/ParserKQLQuery.cpp | 6 +++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 49 +++++++++++++++++++++++- src/Parsers/Kusto/ParserKQLSummarize.h | 5 ++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f483..d925f66b321b 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -104,10 +105,15 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; + kql_summarize_p.setTableName(table_name); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else + { group_expression_list = kql_summarize_p.group_expression_list; + if (kql_summarize_p.tables) + tables = kql_summarize_p.tables; + } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a311041..7a88fec1988b 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -106,10 +106,57 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (op_pos.empty()) return true; - if (op_pos.size() != 1) // now only support one summarize + if (op_pos.size() > 2) // now only support max 2 summarize return false; auto begin = pos; + ASTPtr sub_qurery_table; + +// rewrite this part, make it resusable (may contains bin etc, and please inmplement summarize age= avg(Age) for sub query too): + if (op_pos.size() == 2) + { + bool groupby = false; + auto sub_pos = op_pos.front(); + String sub_aggregation; + String sub_groupby; + String sub_columns; + while (!sub_pos->isEnd() && sub_pos->type != TokenType::PipeMark && sub_pos->type != TokenType::Semicolon) + { + if (String(sub_pos->begin,sub_pos->end) == "by") + groupby = true; + else + { + if (groupby) + sub_groupby = sub_groupby + String(sub_pos->begin,sub_pos->end) +" "; + else + sub_aggregation = sub_aggregation + String(sub_pos->begin,sub_pos->end) +" "; + } + ++sub_pos; + } + + String sub_query; + if (sub_groupby.empty()) + { + sub_columns =sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name+")"; + } + else + { + if (sub_aggregation.empty()) + sub_columns = sub_groupby; + else + sub_columns = sub_groupby + "," + sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+")"; + } + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + return false; + tables = sub_qurery_table; + } + pos = op_pos.back(); String expr_aggregation; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f6..b71af138e7e6 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -9,12 +9,15 @@ class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; - + ASTPtr tables; + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; static std::pair removeLastWord(String input); static String getBinGroupbyString(String expr_bin); +private: + String table_name; }; } From dc74cde842f973c4baf2001f0e19e584277236ff Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 12/16] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/CMakeLists.txt | 1 + .../KustoFunctions/IParserKQLFunction.cpp | 33 + .../Kusto/KustoFunctions/IParserKQLFunction.h | 39 + .../KQLAggregationFunctions.cpp | 24 + .../KustoFunctions/KQLAggregationFunctions.h | 9 + .../KustoFunctions/KQLBinaryFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 9 + .../KustoFunctions/KQLCastingFunctions.cpp | 51 ++ .../KustoFunctions/KQLCastingFunctions.h | 50 ++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 24 + .../KustoFunctions/KQLDateTimeFunctions.h | 9 + .../KustoFunctions/KQLDynamicFunctions.cpp | 24 + .../KustoFunctions/KQLDynamicFunctions.h | 9 + .../KustoFunctions/KQLFunctionFactory.cpp | 742 ++++++++++++++++++ .../Kusto/KustoFunctions/KQLFunctionFactory.h | 386 +++++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 24 + .../KustoFunctions/KQLGeneralFunctions.h | 9 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLIPFunctions.h | 9 + .../KustoFunctions/KQLStringFunctions.cpp | 365 +++++++++ .../Kusto/KustoFunctions/KQLStringFunctions.h | 267 +++++++ .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 24 + .../KustoFunctions/KQLTimeSeriesFunctions.h | 9 + src/Parsers/Kusto/ParserKQLOperators.cpp | 5 +- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 24 +- src/Parsers/Kusto/ParserKQLQuery.h | 7 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 13 +- src/Parsers/Kusto/ParserKQLSummarize.h | 4 + 29 files changed, 2210 insertions(+), 10 deletions(-) create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73d46593e042..1648abdbf55d 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,6 +4,7 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 000000000000..5455f41a0c22 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,33 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 000000000000..81bf97f390ba --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +namespace DB +{ +class IParserKQLFunction //: public IParser +{ +public: + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + struct IncreaseDepthTag {}; + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + bool convert(String &out,IParser::Pos &pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; +protected: + virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 000000000000..5f43aa16d8e2 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,51 @@ + +#include +#include +#include +#include + +namespace DB +{ +bool Tobool::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDouble::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToInt::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 000000000000..ab73fb3fc218 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +namespace DB +{ +class Tobool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 000000000000..528f906e51e7 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,742 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + std::unordered_map KQLFunctionFactory::kql_functions = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + }; + + +std::unique_ptr KQLFunctionFactory::get(String &kql_function) +{ +/* if (kql_function=="strrep") + return std::make_unique(); + else if (kql_function=="strcat") + return std::make_unique(); + else + return nullptr;*/ + if (kql_functions.find(kql_function) == kql_functions.end()) + return nullptr; + + auto kql_function_id = kql_functions[kql_function]; + switch (kql_function_id) + { + case KQLFunctionValue::none: + return nullptr; + + case KQLFunctionValue::timespan: + return nullptr; + + case KQLFunctionValue::datetime: + return nullptr; + + case KQLFunctionValue::ago: + return nullptr; + + case KQLFunctionValue::datetime_add: + return nullptr; + + case KQLFunctionValue::datetime_part: + return nullptr; + + case KQLFunctionValue::datetime_diff: + return nullptr; + + case KQLFunctionValue::dayofmonth: + return nullptr; + + case KQLFunctionValue::dayofweek: + return nullptr; + + case KQLFunctionValue::dayofyear: + return nullptr; + + case KQLFunctionValue::endofday: + return nullptr; + + case KQLFunctionValue::endofweek: + return nullptr; + + case KQLFunctionValue::endofyear: + return nullptr; + + case KQLFunctionValue::format_datetime: + return nullptr; + + case KQLFunctionValue::format_timespan: + return nullptr; + + case KQLFunctionValue::getmonth: + return nullptr; + + case KQLFunctionValue::getyear: + return nullptr; + + case KQLFunctionValue::hoursofday: + return nullptr; + + case KQLFunctionValue::make_timespan: + return nullptr; + + case KQLFunctionValue::make_datetime: + return nullptr; + + case KQLFunctionValue::now: + return nullptr; + + case KQLFunctionValue::startofday: + return nullptr; + + case KQLFunctionValue::startofmonth: + return nullptr; + + case KQLFunctionValue::startofweek: + return nullptr; + + case KQLFunctionValue::startofyear: + return nullptr; + + case KQLFunctionValue::unixtime_microseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_milliseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_nanoseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_seconds_todatetime: + return nullptr; + + case KQLFunctionValue::weekofyear: + return nullptr; + + + case KQLFunctionValue::base64_encode_tostring: + return nullptr; + + case KQLFunctionValue::base64_encode_fromguid: + return nullptr; + + case KQLFunctionValue::base64_decode_tostring: + return nullptr; + + case KQLFunctionValue::base64_decode_toarray: + return nullptr; + + case KQLFunctionValue::base64_decode_toguid: + return nullptr; + + case KQLFunctionValue::countof: + return nullptr; + + case KQLFunctionValue::extract: + return nullptr; + + case KQLFunctionValue::extract_all: + return nullptr; + + case KQLFunctionValue::extractjson: + return nullptr; + + case KQLFunctionValue::has_any_index: + return nullptr; + + case KQLFunctionValue::indexof: + return nullptr; + + case KQLFunctionValue::isempty: + return nullptr; + + case KQLFunctionValue::isnotempty: + return nullptr; + + case KQLFunctionValue::isnotnull: + return nullptr; + + case KQLFunctionValue::isnull: + return nullptr; + + case KQLFunctionValue::parse_command_line: + return nullptr; + + case KQLFunctionValue::parse_csv: + return nullptr; + + case KQLFunctionValue::parse_json: + return nullptr; + + case KQLFunctionValue::parse_url: + return nullptr; + + case KQLFunctionValue::parse_urlquery: + return nullptr; + + case KQLFunctionValue::parse_version: + return nullptr; + + case KQLFunctionValue::replace_regex: + return nullptr; + + case KQLFunctionValue::reverse: + return nullptr; + + case KQLFunctionValue::split: + return nullptr; + + case KQLFunctionValue::strcat: + return std::make_unique(); + + case KQLFunctionValue::strcat_delim: + return nullptr; + + case KQLFunctionValue::strcmp: + return nullptr; + + case KQLFunctionValue::strlen: + return nullptr; + + case KQLFunctionValue::strrep: + return std::make_unique(); + + case KQLFunctionValue::substring: + return nullptr; + + case KQLFunctionValue::toupper: + return nullptr; + + case KQLFunctionValue::translate: + return nullptr; + + case KQLFunctionValue::trim: + return nullptr; + + case KQLFunctionValue::trim_end: + return nullptr; + + case KQLFunctionValue::trim_start: + return nullptr; + + case KQLFunctionValue::url_decode: + return nullptr; + + case KQLFunctionValue::url_encode: + return nullptr; + + case KQLFunctionValue::array_concat: + return nullptr; + + case KQLFunctionValue::array_iif: + return nullptr; + + case KQLFunctionValue::array_index_of: + return nullptr; + + case KQLFunctionValue::array_length: + return nullptr; + + case KQLFunctionValue::array_reverse: + return nullptr; + + case KQLFunctionValue::array_rotate_left: + return nullptr; + + case KQLFunctionValue::array_rotate_right: + return nullptr; + + case KQLFunctionValue::array_shift_left: + return nullptr; + + case KQLFunctionValue::array_shift_right: + return nullptr; + + case KQLFunctionValue::array_slice: + return nullptr; + + case KQLFunctionValue::array_sort_asc: + return nullptr; + + case KQLFunctionValue::array_sort_desc: + return nullptr; + + case KQLFunctionValue::array_split: + return nullptr; + + case KQLFunctionValue::array_sum: + return nullptr; + + case KQLFunctionValue::bag_keys: + return nullptr; + + case KQLFunctionValue::bag_merge: + return nullptr; + + case KQLFunctionValue::bag_remove_keys: + return nullptr; + + case KQLFunctionValue::jaccard_index: + return nullptr; + + case KQLFunctionValue::pack: + return nullptr; + + case KQLFunctionValue::pack_all: + return nullptr; + + case KQLFunctionValue::pack_array: + return nullptr; + + case KQLFunctionValue::repeat: + return nullptr; + + case KQLFunctionValue::set_difference: + return nullptr; + + case KQLFunctionValue::set_has_element: + return nullptr; + + case KQLFunctionValue::set_intersect: + return nullptr; + + case KQLFunctionValue::set_union: + return nullptr; + + case KQLFunctionValue::treepath: + return nullptr; + + case KQLFunctionValue::zip: + return nullptr; + + case KQLFunctionValue::tobool: + return std::make_unique(); + + case KQLFunctionValue::todatetime: + return std::make_unique(); + + case KQLFunctionValue::todouble: + return std::make_unique(); + + case KQLFunctionValue::toint: + return std::make_unique(); + + case KQLFunctionValue::tostring: + return std::make_unique(); + + case KQLFunctionValue::totimespan: + return std::make_unique(); + + case KQLFunctionValue::arg_max: + return nullptr; + + case KQLFunctionValue::arg_min: + return nullptr; + + case KQLFunctionValue::avg: + return nullptr; + + case KQLFunctionValue::avgif: + return nullptr; + + case KQLFunctionValue::binary_all_and: + return nullptr; + + case KQLFunctionValue::binary_all_or: + return nullptr; + + case KQLFunctionValue::binary_all_xor: + return nullptr; + case KQLFunctionValue::buildschema: + return nullptr; + + case KQLFunctionValue::count: + return nullptr; + + case KQLFunctionValue::countif: + return nullptr; + + case KQLFunctionValue::dcount: + return nullptr; + + case KQLFunctionValue::dcountif: + return nullptr; + + case KQLFunctionValue::make_bag: + return nullptr; + + case KQLFunctionValue::make_bag_if: + return nullptr; + + case KQLFunctionValue::make_list: + return nullptr; + + case KQLFunctionValue::make_list_if: + return nullptr; + + case KQLFunctionValue::make_list_with_nulls: + return nullptr; + + case KQLFunctionValue::make_set: + return nullptr; + + case KQLFunctionValue::make_set_if: + return nullptr; + + case KQLFunctionValue::max: + return nullptr; + + case KQLFunctionValue::maxif: + return nullptr; + + case KQLFunctionValue::min: + return nullptr; + + case KQLFunctionValue::minif: + return nullptr; + + case KQLFunctionValue::percentiles: + return nullptr; + + case KQLFunctionValue::percentiles_array: + return nullptr; + + case KQLFunctionValue::percentilesw: + return nullptr; + + case KQLFunctionValue::percentilesw_array: + return nullptr; + + case KQLFunctionValue::stdev: + return nullptr; + + case KQLFunctionValue::stdevif: + return nullptr; + + case KQLFunctionValue::sum: + return nullptr; + + case KQLFunctionValue::sumif: + return nullptr; + + case KQLFunctionValue::take_any: + return nullptr; + + case KQLFunctionValue::take_anyif: + return nullptr; + + case KQLFunctionValue::variance: + return nullptr; + + case KQLFunctionValue::varianceif: + return nullptr; + + + case KQLFunctionValue::series_fir: + return nullptr; + + case KQLFunctionValue::series_iir: + return nullptr; + + case KQLFunctionValue::series_fit_line: + return nullptr; + + case KQLFunctionValue::series_fit_line_dynamic: + return nullptr; + + case KQLFunctionValue::series_fit_2lines: + return nullptr; + + case KQLFunctionValue::series_fit_2lines_dynamic: + return nullptr; + + case KQLFunctionValue::series_outliers: + return nullptr; + + case KQLFunctionValue::series_periods_detect: + return nullptr; + + case KQLFunctionValue::series_periods_validate: + return nullptr; + + case KQLFunctionValue::series_stats_dynamic: + return nullptr; + + case KQLFunctionValue::series_stats: + return nullptr; + + case KQLFunctionValue::series_fill_backward: + return nullptr; + + case KQLFunctionValue::series_fill_const: + return nullptr; + + case KQLFunctionValue::series_fill_forward: + return nullptr; + + case KQLFunctionValue::series_fill_linear: + return nullptr; + + + case KQLFunctionValue::ipv4_compare: + return nullptr; + + case KQLFunctionValue::ipv4_is_in_range: + return nullptr; + + case KQLFunctionValue::ipv4_is_match: + return nullptr; + + case KQLFunctionValue::ipv4_is_private: + return nullptr; + + case KQLFunctionValue::ipv4_netmask_suffix: + return nullptr; + + case KQLFunctionValue::parse_ipv4: + return nullptr; + + case KQLFunctionValue::parse_ipv4_mask: + return nullptr; + + case KQLFunctionValue::ipv6_compare: + return nullptr; + + case KQLFunctionValue::ipv6_is_match: + return nullptr; + + case KQLFunctionValue::parse_ipv6: + return nullptr; + + case KQLFunctionValue::parse_ipv6_mask: + return nullptr; + + case KQLFunctionValue::format_ipv4: + return nullptr; + + case KQLFunctionValue::format_ipv4_mask: + return nullptr; + + + case KQLFunctionValue::binary_and: + return nullptr; + + case KQLFunctionValue::binary_not: + return nullptr; + + case KQLFunctionValue::binary_or: + return nullptr; + + case KQLFunctionValue::binary_shift_left: + return nullptr; + + case KQLFunctionValue::binary_shift_right: + return nullptr; + + case KQLFunctionValue::binary_xor: + return nullptr; + + case KQLFunctionValue::bitset_count_ones: + return nullptr; + + case KQLFunctionValue::bin: + return nullptr; + } +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 000000000000..86e879b4668e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,386 @@ +#pragma once + +#include +#include +#include +namespace DB +{ + enum class KQLFunctionValue : uint16_t + { none, + timespan, + datetime, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hoursofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + weekofyear, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extractjson, + has_any_index, + indexof, + isempty, + isnotempty, + isnotnull, + isnull, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + strlen, + strrep, + substring, + toupper, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tostring, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin + }; + +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(String &kql_function); + +protected: + + + static std::unordered_map kql_functions;/* = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + };*/ + +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 000000000000..851c631d1ceb --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include + +namespace DB +{ + +bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Extract::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HasAnyIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseVersion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ReplaceRegex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Reverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Split::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCat::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + std::vector args; + String res = "concat("; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + return false; +} + +bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCmp::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrLen::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrRep::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + String res = String(pos->begin,pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + ++pos; + String value = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_value; + fun = KQLFunctionFactory::get(value); + if (fun && fun->convert(func_value,pos)) + value = func_value; + } + ++pos; + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String multiplier = String(pos->begin,pos->end); + String new_multiplier; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::BareWord ) + { + String fun_multiplier; + fun = KQLFunctionFactory::get(multiplier); + if ( fun && fun->convert(fun_multiplier,pos)) + new_multiplier += fun_multiplier; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter + { + break; + } + else + new_multiplier += String(pos->begin,pos->end); + ++pos; + } + + if (!new_multiplier.empty()) + multiplier = new_multiplier; + + String delimiter ; + if (pos->type == TokenType::Comma) + { + ++pos; + delimiter = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_delimiter; + fun = KQLFunctionFactory::get(delimiter); + if (fun && fun->convert(func_delimiter,pos)) + delimiter = func_delimiter; + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!delimiter.empty()) + { + String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; + res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + else + res = "repeat("+ value + ", " + multiplier + ")"; + out = res; + return true; + } + return false; +} + +bool SubString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToUpper::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Translate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Trim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimEnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimStart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 000000000000..db7ab5077502 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,267 @@ +#pragma once + +#include +#include +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extractjson()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCsv : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrl : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrlQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 000000000000..20b4b880a83f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 000000000000..457590328262 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 60fa022f9bb1..c3d0843b1f08 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -41,11 +41,13 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +String KQLOperators::getExprFromToken(IParser::Pos &pos) { String res; std::vector tokens; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -235,6 +237,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) for (auto & token : tokens) res = res + token + " "; + pos = begin; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index a780e18d3339..9920593c7aec 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + String getExprFromToken(IParser::Pos &pos) ; protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d925f66b321b..f1348c4b3c68 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,7 +8,9 @@ #include #include #include - +#include +#include +#include namespace DB { @@ -18,12 +20,22 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) return true; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::unique_ptr kql_function; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + String new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + res = res + token +" "; ++pos; } return res; @@ -106,6 +118,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; kql_summarize_p.setTableName(table_name); + kql_summarize_p.setFilterPos(kql_filter_p.op_pos); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else @@ -113,6 +126,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) group_expression_list = kql_summarize_p.group_expression_list; if (kql_summarize_p.tables) tables = kql_summarize_p.tables; + + if (kql_summarize_p.where_expression) + where_expression = kql_summarize_p.where_expression; } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9e..42122fb6e00a 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -7,12 +7,13 @@ namespace DB class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; + virtual bool parsePrepare(Pos & pos); + std::vector op_pos; protected: - std::vector op_pos; + std::vector expressions; - virtual String getExprFromToken(Pos pos); + virtual String getExprFromToken(Pos &pos); }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad221314..cc4bece7ebf6 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,7 +4,7 @@ #include #include #include - +#include namespace DB { @@ -57,5 +57,16 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b71af138e7e6..8a92412d87c0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -10,7 +10,10 @@ class ParserKQLSummarize : public ParserKQLBase public: ASTPtr group_expression_list; ASTPtr tables; + ASTPtr where_expression; + void setTableName(String table_name_) {table_name = table_name_;} + void setFilterPos(std::vector &filter_pos_) {filter_pos = filter_pos_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -18,6 +21,7 @@ class ParserKQLSummarize : public ParserKQLBase static String getBinGroupbyString(String expr_bin); private: String table_name; + std::vector filter_pos; }; } From 02113547ea1708d761501733051e185b276fbfbc Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 13/16] Kusto-phase2: Add KQL functions parser --- .../KustoFunctions/IParserKQLFunction.cpp | 45 ++- .../Kusto/KustoFunctions/IParserKQLFunction.h | 3 +- .../KQLAggregationFunctions.cpp | 244 +++++++++++- .../KustoFunctions/KQLAggregationFunctions.h | 245 ++++++++++++ .../KustoFunctions/KQLBinaryFunctions.cpp | 48 ++- .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 48 +++ .../KustoFunctions/KQLCastingFunctions.cpp | 7 +- .../KustoFunctions/KQLCastingFunctions.h | 6 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 202 +++++++++- .../KustoFunctions/KQLDateTimeFunctions.h | 203 ++++++++++ .../KustoFunctions/KQLDynamicFunctions.cpp | 195 +++++++++- .../KustoFunctions/KQLDynamicFunctions.h | 195 ++++++++++ .../KustoFunctions/KQLFunctionFactory.cpp | 350 +++++++++--------- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 190 +--------- .../KustoFunctions/KQLGeneralFunctions.cpp | 8 +- .../KustoFunctions/KQLGeneralFunctions.h | 6 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 90 ++++- .../Kusto/KustoFunctions/KQLIPFunctions.h | 90 +++++ .../KustoFunctions/KQLStringFunctions.cpp | 92 ++--- .../Kusto/KustoFunctions/KQLStringFunctions.h | 17 +- .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 104 +++++- .../KustoFunctions/KQLTimeSeriesFunctions.h | 104 ++++++ src/Parsers/Kusto/ParserKQLFilter.cpp | 6 +- src/Parsers/Kusto/ParserKQLLimit.h | 1 - src/Parsers/Kusto/ParserKQLOperators.cpp | 43 ++- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 16 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 - src/Parsers/Kusto/ParserKQLStatement.h | 1 - src/Parsers/Kusto/ParserKQLTable.h | 1 - 30 files changed, 2080 insertions(+), 493 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 5455f41a0c22..e7134678e958 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -15,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -30,4 +30,47 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) }); } +bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) +{ + std::unique_ptr fun; + std::vector args; + + String res =ch_fn + "("; + out = res; + auto begin = pos; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + pos = begin; + return false; + } + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + + pos = begin; + return false; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 81bf97f390ba..c633f78fa335 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -4,7 +4,7 @@ #include namespace DB { -class IParserKQLFunction //: public IParser +class IParserKQLFunction { public: template @@ -33,6 +33,7 @@ class IParserKQLFunction //: public IParser virtual ~IParserKQLFunction() = default; protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; + static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 20b4b880a83f..91c3639ace40 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,249 @@ namespace DB { +bool ArgMax::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArgMin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Avg::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool AvgIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Count::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCount::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBag::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeList::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSet::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Max::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MaxIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Min::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MinIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentiles::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Stdev::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StdevIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Sum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SumIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool TakeAny::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TakeAnyIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Variance::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool VarianceIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h index 457590328262..6e7130420f4c 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -4,6 +4,251 @@ #include namespace DB { +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp index 20b4b880a83f..2a06c4e715be 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,53 @@ namespace DB { +bool BinaryAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryNot::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool BitsetCountOnes::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h index 457590328262..94ca3a5abbfe 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -4,6 +4,54 @@ #include namespace DB { +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index 5f43aa16d8e2..9129d82aa780 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -6,14 +5,14 @@ namespace DB { -bool Tobool::convertImpl(String &out,IParser::Pos &pos) +bool ToBool::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +bool ToDateTime::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -41,7 +40,7 @@ bool ToString::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +bool ToTimeSpan::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h index ab73fb3fc218..fa6a20e60687 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -4,14 +4,14 @@ #include namespace DB { -class Tobool : public IParserKQLFunction +class ToBool : public IParserKQLFunction { protected: const char * getName() const override { return "tobool()";} bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToDatetime : public IParserKQLFunction +class ToDateTime : public IParserKQLFunction { protected: const char * getName() const override { return "todatetime()";} @@ -39,7 +39,7 @@ class ToString : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToTimespan : public IParserKQLFunction +class ToTimeSpan : public IParserKQLFunction { protected: const char * getName() const override { return "totimespan()";} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 20b4b880a83f..0f098cbebda3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,207 @@ namespace DB { +bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ago::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeAdd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +}; + +bool DatetimePart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Now::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 457590328262..7627465ab5bc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -5,5 +5,208 @@ namespace DB { +class TimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hoursofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "weekofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 20b4b880a83f..a6ff0a374ebc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,200 @@ namespace DB { +bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySlice::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortAsc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortDesc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool BagKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagMerge::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagRemoveKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool JaccardIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Pack::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Repeat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetDifference::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetHasElement::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetIntersect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetUnion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TreePath::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Zip::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h index 457590328262..e36fd60eaeaf 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -4,6 +4,201 @@ #include namespace DB { +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 528f906e51e7..25e0c2af2f91 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -65,7 +64,9 @@ namespace DB {"indexof", KQLFunctionValue::indexof}, {"isempty", KQLFunctionValue::isempty}, {"isnotempty", KQLFunctionValue::isnotempty}, + {"notempty", KQLFunctionValue::isnotempty}, {"isnotnull", KQLFunctionValue::isnotnull}, + {"notnull", KQLFunctionValue::isnotnull}, {"isnull", KQLFunctionValue::isnull}, {"parse_command_line", KQLFunctionValue::parse_command_line}, {"parse_csv", KQLFunctionValue::parse_csv}, @@ -82,6 +83,7 @@ namespace DB {"strlen", KQLFunctionValue::strlen}, {"strrep", KQLFunctionValue::strrep}, {"substring", KQLFunctionValue::substring}, + {"tolower", KQLFunctionValue::tolower}, {"toupper", KQLFunctionValue::toupper}, {"translate", KQLFunctionValue::translate}, {"trim", KQLFunctionValue::trim}, @@ -206,12 +208,6 @@ namespace DB std::unique_ptr KQLFunctionFactory::get(String &kql_function) { -/* if (kql_function=="strrep") - return std::make_unique(); - else if (kql_function=="strcat") - return std::make_unique(); - else - return nullptr;*/ if (kql_functions.find(kql_function) == kql_functions.end()) return nullptr; @@ -222,293 +218,295 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return nullptr; case KQLFunctionValue::timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ago: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_add: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_part: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_diff: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::hoursofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::now: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_microseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_milliseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_nanoseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_seconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::weekofyear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::base64_encode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_encode_fromguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toarray: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extractjson: - return nullptr; + return std::make_unique(); case KQLFunctionValue::has_any_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::indexof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_command_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_csv: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_json: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_url: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_urlquery: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_version: - return nullptr; + return std::make_unique(); case KQLFunctionValue::replace_regex: - return nullptr; + return std::make_unique(); case KQLFunctionValue::reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcat: return std::make_unique(); case KQLFunctionValue::strcat_delim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcmp: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strlen: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strrep: return std::make_unique(); case KQLFunctionValue::substring: - return nullptr; + return std::make_unique(); + + case KQLFunctionValue::tolower: + return std::make_unique(); case KQLFunctionValue::toupper: - return nullptr; + return std::make_unique(); case KQLFunctionValue::translate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_end: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_start: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_decode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_encode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_concat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_iif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_index_of: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_length: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_slice: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_asc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_desc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_merge: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_remove_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::jaccard_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::repeat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_difference: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_has_element: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_intersect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_union: - return nullptr; + return std::make_unique(); case KQLFunctionValue::treepath: - return nullptr; + return std::make_unique(); case KQLFunctionValue::zip: - return nullptr; + return std::make_unique(); case KQLFunctionValue::tobool: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todatetime: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todouble: return std::make_unique(); @@ -520,222 +518,220 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return std::make_unique(); case KQLFunctionValue::totimespan: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::arg_max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::arg_min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avg: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avgif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_xor: - return nullptr; + return std::make_unique(); + case KQLFunctionValue::buildschema: - return nullptr; + return std::make_unique(); case KQLFunctionValue::count: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcount: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcountif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_with_nulls: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::maxif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::minif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdev: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdevif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sumif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_any: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_anyif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::variance: - return nullptr; + return std::make_unique(); case KQLFunctionValue::varianceif: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::series_fir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_iir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_outliers: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_detect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_validate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_backward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_const: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_forward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_linear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::ipv4_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_in_range: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_private: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_netmask_suffix: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4_mask: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::binary_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_not: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_xor: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bitset_count_ones: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bin: - return nullptr; + return std::make_unique(); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 86e879b4668e..8f57133c0713 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -69,6 +69,7 @@ namespace DB strlen, strrep, substring, + tolower, toupper, translate, trim, @@ -187,199 +188,14 @@ namespace DB bin }; - + class KQLFunctionFactory { public: static std::unique_ptr get(String &kql_function); protected: - - - static std::unordered_map kql_functions;/* = - { - {"datetime", KQLFunctionValue::datetime}, - {"ago", KQLFunctionValue::ago}, - {"datetime_add", KQLFunctionValue::datetime_add}, - {"datetime_part", KQLFunctionValue::datetime_part}, - {"datetime_diff", KQLFunctionValue::datetime_diff}, - {"dayofmonth", KQLFunctionValue::dayofmonth}, - {"dayofweek", KQLFunctionValue::dayofweek}, - {"dayofyear", KQLFunctionValue::dayofyear}, - {"endofday", KQLFunctionValue::endofday}, - {"endofweek", KQLFunctionValue::endofweek}, - {"endofyear", KQLFunctionValue::endofyear}, - {"format_datetime", KQLFunctionValue::format_datetime}, - {"format_timespan", KQLFunctionValue::format_timespan}, - {"getmonth", KQLFunctionValue::getmonth}, - {"getyear", KQLFunctionValue::getyear}, - {"hoursofday", KQLFunctionValue::hoursofday}, - {"make_timespan", KQLFunctionValue::make_timespan}, - {"make_datetime", KQLFunctionValue::make_datetime}, - {"now", KQLFunctionValue::now}, - {"startofday", KQLFunctionValue::startofday}, - {"startofmonth", KQLFunctionValue::startofmonth}, - {"startofweek", KQLFunctionValue::startofweek}, - {"startofyear", KQLFunctionValue::startofyear}, - {"todatetime", KQLFunctionValue::todatetime}, - {"totimespan", KQLFunctionValue::totimespan}, - {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, - {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, - {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, - {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, - {"weekofyear", KQLFunctionValue::weekofyear}, - - {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, - {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, - {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, - {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, - {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, - {"countof", KQLFunctionValue::countof}, - {"extract", KQLFunctionValue::extract}, - {"extract_all", KQLFunctionValue::extract_all}, - {"extractjson", KQLFunctionValue::extractjson}, - {"has_any_index", KQLFunctionValue::has_any_index}, - {"indexof", KQLFunctionValue::indexof}, - {"isempty", KQLFunctionValue::isempty}, - {"isnotempty", KQLFunctionValue::isnotempty}, - {"isnotnull", KQLFunctionValue::isnotnull}, - {"isnull", KQLFunctionValue::isnull}, - {"parse_command_line", KQLFunctionValue::parse_command_line}, - {"parse_csv", KQLFunctionValue::parse_csv}, - {"parse_json", KQLFunctionValue::parse_json}, - {"parse_url", KQLFunctionValue::parse_url}, - {"parse_urlquery", KQLFunctionValue::parse_urlquery}, - {"parse_version", KQLFunctionValue::parse_version}, - {"replace_regex", KQLFunctionValue::replace_regex}, - {"reverse", KQLFunctionValue::reverse}, - {"split", KQLFunctionValue::split}, - {"strcat", KQLFunctionValue::strcat}, - {"strcat_delim", KQLFunctionValue::strcat_delim}, - {"strcmp", KQLFunctionValue::strcmp}, - {"strlen", KQLFunctionValue::strlen}, - {"strrep", KQLFunctionValue::strrep}, - {"substring", KQLFunctionValue::substring}, - {"toupper", KQLFunctionValue::toupper}, - {"translate", KQLFunctionValue::translate}, - {"trim", KQLFunctionValue::trim}, - {"trim_end", KQLFunctionValue::trim_end}, - {"trim_start", KQLFunctionValue::trim_start}, - {"url_decode", KQLFunctionValue::url_decode}, - {"url_encode", KQLFunctionValue::url_encode}, - - {"array_concat", KQLFunctionValue::array_concat}, - {"array_iif", KQLFunctionValue::array_iif}, - {"array_index_of", KQLFunctionValue::array_index_of}, - {"array_length", KQLFunctionValue::array_length}, - {"array_reverse", KQLFunctionValue::array_reverse}, - {"array_rotate_left", KQLFunctionValue::array_rotate_left}, - {"array_rotate_right", KQLFunctionValue::array_rotate_right}, - {"array_shift_left", KQLFunctionValue::array_shift_left}, - {"array_shift_right", KQLFunctionValue::array_shift_right}, - {"array_slice", KQLFunctionValue::array_slice}, - {"array_sort_asc", KQLFunctionValue::array_sort_asc}, - {"array_sort_desc", KQLFunctionValue::array_sort_desc}, - {"array_split", KQLFunctionValue::array_split}, - {"array_sum", KQLFunctionValue::array_sum}, - {"bag_keys", KQLFunctionValue::bag_keys}, - {"bag_merge", KQLFunctionValue::bag_merge}, - {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, - {"jaccard_index", KQLFunctionValue::jaccard_index}, - {"pack", KQLFunctionValue::pack}, - {"pack_all", KQLFunctionValue::pack_all}, - {"pack_array", KQLFunctionValue::pack_array}, - {"repeat", KQLFunctionValue::repeat}, - {"set_difference", KQLFunctionValue::set_difference}, - {"set_has_element", KQLFunctionValue::set_has_element}, - {"set_intersect", KQLFunctionValue::set_intersect}, - {"set_union", KQLFunctionValue::set_union}, - {"treepath", KQLFunctionValue::treepath}, - {"zip", KQLFunctionValue::zip}, - - {"tobool", KQLFunctionValue::tobool}, - {"toboolean", KQLFunctionValue::tobool}, - {"todouble", KQLFunctionValue::todouble}, - {"toint", KQLFunctionValue::toint}, - {"toreal", KQLFunctionValue::todouble}, - {"tostring", KQLFunctionValue::tostring}, - {"totimespan", KQLFunctionValue::totimespan}, - - {"arg_max", KQLFunctionValue::arg_max}, - {"arg_min", KQLFunctionValue::arg_min}, - {"avg", KQLFunctionValue::avg}, - {"avgif", KQLFunctionValue::avgif}, - {"binary_all_and", KQLFunctionValue::binary_all_and}, - {"binary_all_or", KQLFunctionValue::binary_all_or}, - {"binary_all_xor", KQLFunctionValue::binary_all_xor}, - {"buildschema", KQLFunctionValue::buildschema}, - {"count", KQLFunctionValue::count}, - {"countif", KQLFunctionValue::countif}, - {"dcount", KQLFunctionValue::dcount}, - {"dcountif", KQLFunctionValue::dcountif}, - {"make_bag", KQLFunctionValue::make_bag}, - {"make_bag_if", KQLFunctionValue::make_bag_if}, - {"make_list", KQLFunctionValue::make_list}, - {"make_list_if", KQLFunctionValue::make_list_if}, - {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, - {"make_set", KQLFunctionValue::make_set}, - {"make_set_if", KQLFunctionValue::make_set_if}, - {"max", KQLFunctionValue::max}, - {"maxif", KQLFunctionValue::maxif}, - {"min", KQLFunctionValue::min}, - {"minif", KQLFunctionValue::minif}, - {"percentiles", KQLFunctionValue::percentiles}, - {"percentiles_array", KQLFunctionValue::percentiles_array}, - {"percentilesw", KQLFunctionValue::percentilesw}, - {"percentilesw_array", KQLFunctionValue::percentilesw_array}, - {"stdev", KQLFunctionValue::stdev}, - {"stdevif", KQLFunctionValue::stdevif}, - {"sum", KQLFunctionValue::sum}, - {"sumif", KQLFunctionValue::sumif}, - {"take_any", KQLFunctionValue::take_any}, - {"take_anyif", KQLFunctionValue::take_anyif}, - {"variance", KQLFunctionValue::variance}, - {"varianceif", KQLFunctionValue::varianceif}, - - {"series_fir", KQLFunctionValue::series_fir}, - {"series_iir", KQLFunctionValue::series_iir}, - {"series_fit_line", KQLFunctionValue::series_fit_line}, - {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, - {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, - {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, - {"series_outliers", KQLFunctionValue::series_outliers}, - {"series_periods_detect", KQLFunctionValue::series_periods_detect}, - {"series_periods_validate", KQLFunctionValue::series_periods_validate}, - {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, - {"series_stats", KQLFunctionValue::series_stats}, - {"series_fill_backward", KQLFunctionValue::series_fill_backward}, - {"series_fill_const", KQLFunctionValue::series_fill_const}, - {"series_fill_forward", KQLFunctionValue::series_fill_forward}, - {"series_fill_linear", KQLFunctionValue::series_fill_linear}, - - {"ipv4_compare", KQLFunctionValue::ipv4_compare}, - {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, - {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, - {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, - {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, - {"parse_ipv4", KQLFunctionValue::parse_ipv4}, - {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, - {"ipv6_compare", KQLFunctionValue::ipv6_compare}, - {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, - {"parse_ipv6", KQLFunctionValue::parse_ipv6}, - {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, - {"format_ipv4", KQLFunctionValue::format_ipv4}, - {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, - - {"binary_and", KQLFunctionValue::binary_and}, - {"binary_not", KQLFunctionValue::binary_not}, - {"binary_or", KQLFunctionValue::binary_or}, - {"binary_shift_left", KQLFunctionValue::binary_shift_left}, - {"binary_shift_right", KQLFunctionValue::binary_shift_right}, - {"binary_xor", KQLFunctionValue::binary_xor}, - {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, - {"bin", KQLFunctionValue::bin} - };*/ - + static std::unordered_map kql_functions; }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 20b4b880a83f..253292a7d9df 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,11 @@ namespace DB { - +bool Bin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h index 457590328262..802fd152333f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -4,6 +4,12 @@ #include namespace DB { +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 20b4b880a83f..f271d924affc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,95 @@ namespace DB { +bool Ipv4Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsInRange::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4NetmaskSuffix::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool ParseIpv6::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv6Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h index 457590328262..3ee5dda4c839 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -4,6 +4,96 @@ #include namespace DB { +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 851c631d1ceb..a7f7c373566d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -8,16 +8,12 @@ namespace DB bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Encode"); } bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) @@ -85,23 +81,17 @@ bool IndexOf::convertImpl(String &out,IParser::Pos &pos) bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"empty"); } bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"notEmpty"); } bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNotNull"); } bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) @@ -113,12 +103,10 @@ bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) bool IsNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNull"); } -bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +bool ParseCSV::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -132,14 +120,14 @@ bool ParseJson::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +bool ParseURL::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +bool ParseURLQuery::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -176,39 +164,7 @@ bool Split::convertImpl(String &out,IParser::Pos &pos) bool StrCat::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - std::vector args; - String res = "concat("; - - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; - return false; - } - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; - - res += ")"; - out = res; - return true; - } - args.push_back(tmp_arg); - } - return false; + return directMapping(out,pos,"concat"); } bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) @@ -227,9 +183,7 @@ bool StrCmp::convertImpl(String &out,IParser::Pos &pos) bool StrLen::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"lengthUTF8"); } bool StrRep::convertImpl(String &out,IParser::Pos &pos) @@ -265,6 +219,8 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) fun = KQLFunctionFactory::get(multiplier); if ( fun && fun->convert(fun_multiplier,pos)) new_multiplier += fun_multiplier; + else + new_multiplier = multiplier; } else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter { @@ -313,11 +269,15 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) return false; } +bool ToLower::convertImpl(String &out,IParser::Pos &pos) +{ + return directMapping(out,pos,"lower"); +} + + bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"upper"); } bool Translate::convertImpl(String &out,IParser::Pos &pos) @@ -348,18 +308,14 @@ bool TrimStart::convertImpl(String &out,IParser::Pos &pos) return false; } -bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +bool URLDecode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"decodeURLComponent"); } -bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +bool URLEncode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"encodeURLComponent"); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h index db7ab5077502..43840c1253f1 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -116,7 +116,7 @@ class ParseCommandLine : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseCsv : public IParserKQLFunction +class ParseCSV : public IParserKQLFunction { protected: const char * getName() const override { return "parse_csv()"; } @@ -130,14 +130,14 @@ class ParseJson : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrl : public IParserKQLFunction +class ParseURL : public IParserKQLFunction { protected: const char * getName() const override { return "parse_url()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrlQuery : public IParserKQLFunction +class ParseURLQuery : public IParserKQLFunction { protected: const char * getName() const override { return "parse_urlquery()"; } @@ -214,6 +214,13 @@ class SubString : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class ToUpper : public IParserKQLFunction { protected: @@ -249,14 +256,14 @@ class TrimStart : public IParserKQLFunction bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlDecode : public IParserKQLFunction +class URLDecode : public IParserKQLFunction { protected: const char * getName() const override { return "url_decode()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlEncode : public IParserKQLFunction +class URLEncode : public IParserKQLFunction { protected: const char * getName() const override { return "url_encode()"; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp index 20b4b880a83f..74b7811f29ef 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,109 @@ namespace DB { +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h index 457590328262..fa97dec151c7 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -4,6 +4,110 @@ #include namespace DB { +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d803..ceb59f1d86ed 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -15,14 +15,12 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Pos begin = pos; String expr; - KQLOperators convetor; - for (auto op_po : op_pos) { if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; + expr = "(" + getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; + expr = expr + " and (" + getExprFromToken(op_po) +")"; } Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h index d425659499d0..1585805f0fc4 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.h +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLLimit : public ParserKQLBase { - protected: const char * getName() const override { return "KQL limit"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index c3d0843b1f08..b68d27a10f00 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -33,22 +35,33 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + String tmp_arg = String(token_pos->begin,token_pos->end); + if (token_pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,token_pos)) + tmp_arg = new_arg; + } + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + "', " + tmp_arg +", '"+ right_wildcards + "'))"; + } else throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos &pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; - auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -89,8 +102,13 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; if (op_value == KQLOperatorValue::none) @@ -231,14 +249,9 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - pos = begin; - return res; + return false; } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9920593c7aec..969a1e5c48a0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos &pos) ; + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index f1348c4b3c68..0334722041fb 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -23,21 +23,27 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; + std::vector tokens; std::unique_ptr kql_function; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (pos->type == TokenType::BareWord ) + if (!KQLOperators().convert(tokens,pos)) { - kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) - token = new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + tokens.push_back(token); } - res = res + token +" "; ++pos; } + for (auto token:tokens) + res = res + token +" "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index cc4bece7ebf6..6ce29b8024f9 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,16 +57,5 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d008451..aa974504d92f 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -19,7 +19,6 @@ class ParserKQLStatement : public IParserBase {} }; - class ParserKQLWithOutput : public IParserBase { protected: diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d5..b5302897adaa 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -12,7 +12,6 @@ class ParserKQLTable : public ParserKQLBase const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool parsePrepare(Pos &pos) override; - }; } From 4f0438f914e881f08fbcf36b636539e818b32069 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 22 Jun 2022 12:00:47 -0700 Subject: [PATCH 14/16] Kusto-phase2: Add common function to get argument for function convertion --- .../KustoFunctions/IParserKQLFunction.cpp | 52 +++++++++ .../Kusto/KustoFunctions/IParserKQLFunction.h | 2 + .../KustoFunctions/KQLStringFunctions.cpp | 103 +++++++++--------- 3 files changed, 103 insertions(+), 54 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index e7134678e958..ed90c865f511 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -19,6 +19,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] @@ -73,4 +79,50 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin return false; } +String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) +{ + String converted_arg; + std::unique_ptr fun; + + if (pos->type == TokenType::ClosingRoundBracket) + return converted_arg; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near " + fn_name, ErrorCodes::SYNTAX_ERROR); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String token = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + converted_arg += converted; + else + converted_arg += token; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) + { + break; + } + else + converted_arg += token; + ++pos; + } + return converted_arg; +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) +{ + String fn_name = String(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index c633f78fa335..8af2623a984d 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -34,6 +34,8 @@ class IParserKQLFunction protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); + static String getConvertedArgument(const String &fn_name, IParser::Pos &pos); + static String getKQLFunctionName(IParser::Pos &pos); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index a7f7c373566d..0c8a0891a013 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -188,84 +188,80 @@ bool StrLen::convertImpl(String &out,IParser::Pos &pos) bool StrRep::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - String res = String(pos->begin,pos->end); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; + String fn_name = getKQLFunctionName(pos); //String(pos->begin,pos->end); + + if (fn_name.empty()) return false; - } - ++pos; - String value = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_value; - fun = KQLFunctionFactory::get(value); - if (fun && fun->convert(func_value,pos)) - value = func_value; - } + + auto begin = pos; + ++pos; + String value = getConvertedArgument(fn_name,pos); if (pos->type != TokenType::Comma) return false; ++pos; - String multiplier = String(pos->begin,pos->end); - String new_multiplier; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (pos->type == TokenType::BareWord ) - { - String fun_multiplier; - fun = KQLFunctionFactory::get(multiplier); - if ( fun && fun->convert(fun_multiplier,pos)) - new_multiplier += fun_multiplier; - else - new_multiplier = multiplier; - } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter - { - break; - } - else - new_multiplier += String(pos->begin,pos->end); - ++pos; - } - - if (!new_multiplier.empty()) - multiplier = new_multiplier; + String multiplier = getConvertedArgument(fn_name,pos); - String delimiter ; + String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_delimiter; - fun = KQLFunctionFactory::get(delimiter); - if (fun && fun->convert(func_delimiter,pos)) - delimiter = func_delimiter; - } - ++pos; + delimiter = getConvertedArgument(fn_name,pos); } + if (pos->type == TokenType::ClosingRoundBracket) { if (!delimiter.empty()) { String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; - res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; } else - res = "repeat("+ value + ", " + multiplier + ")"; - out = res; + out = "repeat("+ value + ", " + multiplier + ")"; + return true; } + + pos = begin; return false; } + bool SubString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + String source = getConvertedArgument(fn_name,pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String startingIndex = getConvertedArgument(fn_name,pos); + + String length; + if (pos->type == TokenType::Comma) + { + ++pos; + length = getConvertedArgument(fn_name,pos); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (length.empty()) + out = "substr("+ source + "," + startingIndex +" + 1)"; + else + out = "substr("+ source + ", " + startingIndex +" + 1, " + length + ")"; + return true; + } + pos = begin; return false; } @@ -274,7 +270,6 @@ bool ToLower::convertImpl(String &out,IParser::Pos &pos) return directMapping(out,pos,"lower"); } - bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { return directMapping(out,pos,"upper"); From d2dcb71eff18893158ef969407b80a8ca16f1e5d Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 23 Jun 2022 14:26:37 -0700 Subject: [PATCH 15/16] Kusto-phase2: add kusto_auto dialect --- src/Client/ClientBase.cpp | 16 +++- src/Interpreters/executeQuery.cpp | 15 +++- .../KustoFunctions/IParserKQLFunction.cpp | 78 ++++++++++--------- src/Parsers/Kusto/ParserKQLOperators.cpp | 1 + 4 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 656acc3db144..fb90e6ae3baa 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -300,11 +300,14 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { std::shared_ptr parser; + ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; + auto begin = pos; + if (!allow_multi_statements) max_length = settings.max_query_size; @@ -322,13 +325,22 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; + if (sql_dialect != "kusto") + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; + } } } else { res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res && sql_dialect != "kusto") + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index cd257567cd51..c766f81df218 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -408,13 +408,26 @@ static std::tuple executeQueryImpl( try { const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto"); + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto" || sql_dialect == "kusto_auto"); if (sql_dialect == "kusto" && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else if (sql_dialect == "kusto_auto" && !internal) + { + try { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + catch(...) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } } else { diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index ed90c865f511..73472a42010e 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -38,41 +39,36 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) { - std::unique_ptr fun; - std::vector args; + std::vector arguments; - String res =ch_fn + "("; - out = res; - auto begin = pos; + String fn_name = getKQLFunctionName(pos); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - pos = begin; + if (fn_name.empty()) return false; - } + String res; + auto begin = pos; + ++pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; + String argument = getConvertedArgument(fn_name,pos); + arguments.push_back(argument); + if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : arguments) + { + if (res.empty()) + res = ch_fn + "(" + arg; + else + res = res + ", "+ arg; + } res += ")"; + out = res; return true; } - args.push_back(tmp_arg); + ++pos; } pos = begin; @@ -82,6 +78,7 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) { String converted_arg; + std::vector tokens; std::unique_ptr fun; if (pos->type == TokenType::ClosingRoundBracket) @@ -93,23 +90,32 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) + String new_token; + if (!KQLOperators().convert(tokens,pos)) { - String converted; - fun = KQLFunctionFactory::get(token); - if ( fun && fun->convert(converted,pos)) - converted_arg += converted; + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + tokens.push_back(converted); + else + tokens.push_back(token); + } + else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + break; + } else - converted_arg += token; + tokens.push_back(token); } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) - { - break; - } - else - converted_arg += token; ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + break; } + for (auto token : tokens) + converted_arg = converted_arg + token +" "; + return converted_arg; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index b68d27a10f00..3fe018ad6d98 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -251,6 +251,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) } return true; } + pos = begin; return false; } From 61ba208a45e5132a8f52b446767b5bd7706d595a Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 24 Jun 2022 13:05:52 -0700 Subject: [PATCH 16/16] Kusto-phase2: Add alias support --- src/Parsers/Kusto/ParserKQLProject.cpp | 18 ---------------- src/Parsers/Kusto/ParserKQLQuery.cpp | 29 +++++++++++++++++++++++++- src/Parsers/tests/gtest_Parser.cpp | 4 ---- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c3..47ecbbfce3e0 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -11,25 +11,7 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (op_pos.empty()) expr = "*"; else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } expr = getExprFromToken(op_pos.back()); - } Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0334722041fb..d54344e9ea98 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -25,12 +25,23 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) String res; std::vector tokens; std::unique_ptr kql_function; + String alias; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (!KQLOperators().convert(tokens,pos)) + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) { if (pos->type == TokenType::BareWord ) { @@ -40,8 +51,24 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) } tokens.push_back(token); } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + for (auto token:tokens) res = res + token +" "; return res; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index cb0b49aecbbf..5ba7fbdc3fd8 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -326,10 +326,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" - }, { "Customers | sort by FirstName desc", "SELECT *\nFROM Customers\nORDER BY FirstName DESC"