diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index 7c48a3d39..dd1fa9e27 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -299,18 +299,30 @@ set(SOURCE_FILES_clp_s_unitTest src/clp_s/SchemaTree.hpp src/clp_s/SchemaWriter.cpp src/clp_s/SchemaWriter.hpp + src/clp_s/search/AddTimestampConditions.cpp + src/clp_s/search/AddTimestampConditions.hpp src/clp_s/search/AndExpr.cpp src/clp_s/search/AndExpr.hpp src/clp_s/search/BooleanLiteral.cpp src/clp_s/search/BooleanLiteral.hpp + src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp + src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp + src/clp_s/search/clp_search/Grep.cpp + src/clp_s/search/clp_search/Grep.hpp src/clp_s/search/clp_search/Query.cpp src/clp_s/search/clp_search/Query.hpp src/clp_s/search/ColumnDescriptor.cpp src/clp_s/search/ColumnDescriptor.hpp + src/clp_s/search/ConstantProp.cpp + src/clp_s/search/ConstantProp.hpp + src/clp_s/search/ConvertToExists.cpp + src/clp_s/search/ConvertToExists.hpp src/clp_s/search/DateLiteral.cpp src/clp_s/search/DateLiteral.hpp src/clp_s/search/EmptyExpr.cpp src/clp_s/search/EmptyExpr.hpp + src/clp_s/search/EvaluateTimestampIndex.cpp + src/clp_s/search/EvaluateTimestampIndex.hpp src/clp_s/search/Expression.cpp src/clp_s/search/Expression.hpp src/clp_s/search/FilterExpr.cpp @@ -319,12 +331,22 @@ set(SOURCE_FILES_clp_s_unitTest src/clp_s/search/Integral.cpp src/clp_s/search/Integral.hpp src/clp_s/search/Literal.hpp + src/clp_s/search/NarrowTypes.cpp + src/clp_s/search/NarrowTypes.hpp src/clp_s/search/NullLiteral.cpp src/clp_s/search/NullLiteral.hpp src/clp_s/search/OrExpr.cpp src/clp_s/search/OrExpr.hpp src/clp_s/search/OrOfAndForm.cpp src/clp_s/search/OrOfAndForm.hpp + src/clp_s/search/Output.cpp + src/clp_s/search/Output.hpp + src/clp_s/search/OutputHandler.cpp + src/clp_s/search/OutputHandler.hpp + src/clp_s/search/Projection.cpp + src/clp_s/search/Projection.hpp + src/clp_s/search/SchemaMatch.cpp + src/clp_s/search/SchemaMatch.hpp src/clp_s/search/SearchUtils.cpp src/clp_s/search/SearchUtils.hpp src/clp_s/search/StringLiteral.cpp @@ -349,7 +371,29 @@ set(SOURCE_FILES_clp_s_unitTest src/clp_s/ZstdCompressor.hpp src/clp_s/ZstdDecompressor.cpp src/clp_s/ZstdDecompressor.hpp -) + ) + +set(SOURCE_FILES_reducer_unitTest + src/reducer/BufferedSocketWriter.cpp + src/reducer/BufferedSocketWriter.hpp + src/reducer/ConstRecordIterator.hpp + src/reducer/CountOperator.cpp + src/reducer/CountOperator.hpp + src/reducer/DeserializedRecordGroup.cpp + src/reducer/DeserializedRecordGroup.hpp + src/reducer/GroupTags.hpp + src/reducer/network_utils.cpp + src/reducer/network_utils.hpp + src/reducer/Operator.cpp + src/reducer/Operator.hpp + src/reducer/Pipeline.cpp + src/reducer/Pipeline.hpp + src/reducer/Record.hpp + src/reducer/RecordGroup.hpp + src/reducer/RecordGroupIterator.hpp + src/reducer/RecordTypedKeyIterator.hpp + src/reducer/types.hpp + ) set(SOURCE_FILES_unitTest src/clp/Array.hpp @@ -491,6 +535,8 @@ set(SOURCE_FILES_unitTest src/clp/MySQLParamBindings.hpp src/clp/MySQLPreparedStatement.cpp src/clp/MySQLPreparedStatement.hpp + src/clp/networking/socket_utils.cpp + src/clp/networking/socket_utils.hpp src/clp/NetworkReader.cpp src/clp/NetworkReader.hpp src/clp/PageAllocatedVector.hpp @@ -576,10 +622,12 @@ set(SOURCE_FILES_unitTest submodules/sqlite3/sqlite3.h submodules/sqlite3/sqlite3ext.h tests/LogSuppressor.hpp + tests/TestOutputCleaner.hpp tests/test-Array.cpp tests/test-BoundedReader.cpp tests/test-BufferedFileReader.cpp tests/test-clp_s-end_to_end.cpp + tests/test-clp_s-search.cpp tests/test-EncodedVariableInterpreter.cpp tests/test-encoding_methods.cpp tests/test-error_handling.cpp @@ -610,7 +658,11 @@ set(SOURCE_FILES_unitTest tests/test-utf8_utils.cpp tests/test-Utils.cpp ) -add_executable(unitTest ${SOURCE_FILES_unitTest} ${SOURCE_FILES_clp_s_unitTest}) +add_executable(unitTest + ${SOURCE_FILES_unitTest} + ${SOURCE_FILES_clp_s_unitTest} + ${SOURCE_FILES_reducer_unitTest} + ) target_include_directories(unitTest PRIVATE ${CMAKE_SOURCE_DIR}/submodules diff --git a/components/core/src/clp_s/search/OutputHandler.hpp b/components/core/src/clp_s/search/OutputHandler.hpp index aeadf1f26..79cb2585f 100644 --- a/components/core/src/clp_s/search/OutputHandler.hpp +++ b/components/core/src/clp_s/search/OutputHandler.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,54 @@ class CountByTimeOutputHandler : public OutputHandler { std::map m_bucket_counts; int64_t m_count_by_time_bucket_size; }; + +/** + * Output handler that records all results in a provided vector. + */ +class VectorOutputHandler : public OutputHandler { +public: + // Types + struct QueryResult { + // Constructors + QueryResult( + std::string_view message, + epochtime_t timestamp, + std::string_view archive_id, + int64_t log_event_idx + ) + : message{message}, + timestamp{timestamp}, + archive_id{archive_id}, + log_event_idx{log_event_idx} {} + + std::string message; + epochtime_t timestamp; + std::string archive_id; + int64_t log_event_idx; + }; + + // Constructors + VectorOutputHandler(std::vector& output) + : OutputHandler{true, true}, + m_output(output) {} + + // Methods inherited from OutputHandler + void write( + std::string_view message, + epochtime_t timestamp, + std::string_view archive_id, + int64_t log_event_idx + ) override { + m_output.emplace_back(message, timestamp, archive_id, log_event_idx); + } + + void write(std::string_view message) override { + m_output.emplace_back(message, epochtime_t{}, std::string_view{}, int64_t{}); + } + +private: + std::vector& m_output; +}; } // namespace clp_s::search #endif // CLP_S_SEARCH_OUTPUTHANDLER_HPP diff --git a/components/core/tests/LogSuppressor.hpp b/components/core/tests/LogSuppressor.hpp index db20cca26..0c0b7d902 100644 --- a/components/core/tests/LogSuppressor.hpp +++ b/components/core/tests/LogSuppressor.hpp @@ -13,16 +13,17 @@ class LogSuppressor { spdlog::default_logger()->set_level(spdlog::level::off); } - LogSuppressor(LogSuppressor const& other) = default; - LogSuppressor(LogSuppressor&& other) noexcept = default; - - LogSuppressor& operator=(LogSuppressor const& other) = default; - LogSuppressor& operator=(LogSuppressor&& other) noexcept = default; - ~LogSuppressor() { spdlog::default_logger()->set_level(m_previous_logging_level); } + // Delete copy & move constructors and assignment operators + LogSuppressor(LogSuppressor const& other) = delete; + LogSuppressor(LogSuppressor&& other) = delete; + auto operator=(LogSuppressor const& other) -> LogSuppressor& = delete; + auto operator=(LogSuppressor&& other) -> LogSuppressor& = delete; + private: spdlog::level::level_enum m_previous_logging_level; }; #endif // TESTS_LOGSUPPRESSOR_HPP + diff --git a/components/core/tests/TestOutputCleaner.hpp b/components/core/tests/TestOutputCleaner.hpp new file mode 100644 index 000000000..db8efdf66 --- /dev/null +++ b/components/core/tests/TestOutputCleaner.hpp @@ -0,0 +1,36 @@ +#ifndef TESTS_TESTOUTPUTCLEANER_HPP +#define TESTS_TESTOUTPUTCLEANER_HPP + +#include +#include +#include + +/** + * A class that deletes the directories and files created by test cases, both before and after each + * test case where the class is instantiated. + */ +class TestOutputCleaner { +public: + explicit TestOutputCleaner(std::vector const& paths) : m_paths(paths) { + delete_paths(); + } + + ~TestOutputCleaner() { delete_paths(); } + + // Delete copy & move constructors and assignment operators + TestOutputCleaner(TestOutputCleaner const&) = delete; + TestOutputCleaner(TestOutputCleaner&&) = delete; + auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete; + auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete; + +private: + void delete_paths() const { + for (auto const& path : m_paths) { + std::filesystem::remove_all(path); + } + } + + std::vector m_paths; +}; + +#endif // TESTS_TESTOUTPUTCLEANER_HPP diff --git a/components/core/tests/test-clp_s-end_to_end.cpp b/components/core/tests/test-clp_s-end_to_end.cpp index 259b46b93..0e7e3e880 100644 --- a/components/core/tests/test-clp_s-end_to_end.cpp +++ b/components/core/tests/test-clp_s-end_to_end.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -12,6 +11,7 @@ #include "../src/clp_s/InputConfig.hpp" #include "../src/clp_s/JsonConstructor.hpp" #include "../src/clp_s/JsonParser.hpp" +#include "TestOutputCleaner.hpp" constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"}; constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"}; @@ -20,33 +20,9 @@ constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"}; constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.jsonl"}; namespace { -/** - * A class that deletes the directories and files created by test cases, both before and after each - * test case where the class is instantiated. - */ -class TestOutputCleaner { -public: - TestOutputCleaner() { delete_files(); } - - ~TestOutputCleaner() { delete_files(); } - - // Delete copy & move constructors and assignment operators - TestOutputCleaner(TestOutputCleaner const&) = delete; - TestOutputCleaner(TestOutputCleaner&&) = delete; - auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete; - auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete; - -private: - static void delete_files() { - std::filesystem::remove_all(cTestEndToEndArchiveDirectory); - std::filesystem::remove_all(cTestEndToEndOutputDirectory); - std::filesystem::remove(cTestEndToEndOutputSortedJson); - } -}; - auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path; auto get_test_input_local_path() -> std::string; -void compress(bool structurize_arrays); +void compress(bool structurize_arrays, bool single_file_archive); auto extract() -> std::filesystem::path; void compare(std::filesystem::path const& extracted_json_path); @@ -60,7 +36,7 @@ auto get_test_input_local_path() -> std::string { return (tests_dir / get_test_input_path_relative_to_tests_dir()).string(); } -void compress(bool structurize_arrays) { +void compress(bool structurize_arrays, bool single_file_archive) { constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB @@ -82,6 +58,7 @@ void compress(bool structurize_arrays) { parser_option.compression_level = cDefaultCompressionLevel; parser_option.print_archive_stats = cDefaultPrintArchiveStats; parser_option.structurize_arrays = structurize_arrays; + parser_option.single_file_archive = single_file_archive; clp_s::JsonParser parser{parser_option}; REQUIRE(parser.parse()); @@ -102,11 +79,6 @@ auto extract() -> std::filesystem::path { constructor_option.ordered = cDefaultOrdered; constructor_option.target_ordered_chunk_size = cDefaultTargetOrderedChunkSize; for (auto const& entry : std::filesystem::directory_iterator(cTestEndToEndArchiveDirectory)) { - if (false == entry.is_directory()) { - // Skip non-directories - continue; - } - constructor_option.archive_path = clp_s::Path{ .source{clp_s::InputSource::Filesystem}, .path{entry.path().string()} @@ -153,10 +125,15 @@ void compare(std::filesystem::path const& extracted_json_path) { TEST_CASE("clp-s-compress-extract-no-floats", "[clp-s][end-to-end]") { auto structurize_arrays = GENERATE(true, false); + auto single_file_archive = GENERATE(true, false); - TestOutputCleaner const test_cleanup; + TestOutputCleaner const test_cleanup{ + {std::string{cTestEndToEndArchiveDirectory}, + std::string{cTestEndToEndOutputDirectory}, + std::string{cTestEndToEndOutputSortedJson}} + }; - compress(structurize_arrays); + compress(structurize_arrays, single_file_archive); auto extracted_json_path = extract(); diff --git a/components/core/tests/test-clp_s-search.cpp b/components/core/tests/test-clp_s-search.cpp new file mode 100644 index 000000000..80b0a6a59 --- /dev/null +++ b/components/core/tests/test-clp_s-search.cpp @@ -0,0 +1,193 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../src/clp_s/ArchiveReader.hpp" +#include "../src/clp_s/InputConfig.hpp" +#include "../src/clp_s/JsonParser.hpp" +#include "../src/clp_s/search/ConvertToExists.hpp" +#include "../src/clp_s/search/EmptyExpr.hpp" +#include "../src/clp_s/search/EvaluateTimestampIndex.hpp" +#include "../src/clp_s/search/Expression.hpp" +#include "../src/clp_s/search/kql/kql.hpp" +#include "../src/clp_s/search/NarrowTypes.hpp" +#include "../src/clp_s/search/OrOfAndForm.hpp" +#include "../src/clp_s/search/Output.hpp" +#include "../src/clp_s/search/OutputHandler.hpp" +#include "../src/clp_s/search/Projection.hpp" +#include "../src/clp_s/search/SchemaMatch.hpp" +#include "../src/clp_s/Utils.hpp" +#include "TestOutputCleaner.hpp" + +constexpr std::string_view cTestSearchArchiveDirectory{"test-clp-s-search-archive"}; +constexpr std::string_view cTestInputFileDirectory{"test_log_files"}; +constexpr std::string_view cTestSearchInputFile{"test_search.jsonl"}; +constexpr std::string_view cTestIdxKey{"idx"}; + +namespace { +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path; +auto get_test_input_local_path() -> std::string; +void compress(bool structurize_arrays, bool single_file_archive); +void +search(std::string const& query, bool ignore_case, std::vector const& expected_results); +void validate_results( + std::vector const& results, + std::vector const& expected_results +); + +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path { + return std::filesystem::path{cTestInputFileDirectory} / cTestSearchInputFile; +} + +auto get_test_input_local_path() -> std::string { + std::filesystem::path const current_file_path{__FILE__}; + auto const tests_dir{current_file_path.parent_path()}; + return (tests_dir / get_test_input_path_relative_to_tests_dir()).string(); +} + +void compress(bool structurize_arrays, bool single_file_archive) { + constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB + constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB + constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB + constexpr auto cDefaultCompressionLevel = 3; + constexpr auto cDefaultPrintArchiveStats = false; + + std::filesystem::create_directory(cTestSearchArchiveDirectory); + REQUIRE((std::filesystem::is_directory(cTestSearchArchiveDirectory))); + + clp_s::JsonParserOption parser_option{}; + parser_option.input_paths.emplace_back(clp_s::Path{ + .source = clp_s::InputSource::Filesystem, + .path = get_test_input_local_path() + }); + parser_option.archives_dir = cTestSearchArchiveDirectory; + parser_option.target_encoded_size = cDefaultTargetEncodedSize; + parser_option.max_document_size = cDefaultMaxDocumentSize; + parser_option.min_table_size = cDefaultMinTableSize; + parser_option.compression_level = cDefaultCompressionLevel; + parser_option.print_archive_stats = cDefaultPrintArchiveStats; + parser_option.structurize_arrays = structurize_arrays; + parser_option.single_file_archive = single_file_archive; + + clp_s::JsonParser parser{parser_option}; + REQUIRE(parser.parse()); + parser.store(); + + REQUIRE((false == std::filesystem::is_empty(cTestSearchArchiveDirectory))); +} + +void validate_results( + std::vector const& results, + std::vector const& expected_results +) { + std::set results_set; + bool results_are_valid_json = true; + for (auto const& result : results) { + try { + auto json = nlohmann::json::parse(result.message); + results_set.insert(json[cTestIdxKey].template get()); + } catch (std::exception const& e) { + FAIL(fmt::format("Invalid JSON in result: {}", result.message)); + return; + } + } + std::set expected_results_set{expected_results.begin(), expected_results.end()}; + REQUIRE(results_set == expected_results_set); + REQUIRE(results.size() == expected_results.size()); +} + +void +search(std::string const& query, bool ignore_case, std::vector const& expected_results) { + REQUIRE(expected_results.size() > 0); + auto query_stream = std::istringstream{query}; + auto expr = clp_s::search::kql::parse_kql_expression(query_stream); + REQUIRE(nullptr != expr); + REQUIRE(nullptr == std::dynamic_pointer_cast(expr)); + + clp_s::search::OrOfAndForm standardize_pass; + expr = standardize_pass.run(expr); + REQUIRE(nullptr != expr); + + clp_s::search::NarrowTypes narrow_pass; + expr = narrow_pass.run(expr); + REQUIRE(nullptr != expr); + + clp_s::search::ConvertToExists convert_pass; + expr = convert_pass.run(expr); + REQUIRE(nullptr != expr); + + std::vector results; + for (auto const& entry : std::filesystem::directory_iterator(cTestSearchArchiveDirectory)) { + auto archive_reader = std::make_shared(); + auto archive_path = clp_s::Path{ + .source{clp_s::InputSource::Filesystem}, + .path{entry.path().string()} + }; + archive_reader->open(archive_path, clp_s::NetworkAuthOption{}); + + auto archive_expr = expr->copy(); + + auto timestamp_dict = archive_reader->get_timestamp_dictionary(); + clp_s::search::EvaluateTimestampIndex timestamp_index_pass(timestamp_dict); + REQUIRE(clp_s::EvaluatedValue::False != timestamp_index_pass.run(archive_expr)); + + clp_s::search::SchemaMatch match_pass( + archive_reader->get_schema_tree(), + archive_reader->get_schema_map() + ); + archive_expr = match_pass.run(archive_expr); + REQUIRE(nullptr != archive_expr); + + auto output_handler = std::make_unique(results); + clp_s::search::Output output_pass( + match_pass, + archive_expr, + archive_reader, + timestamp_dict, + std::move(output_handler), + ignore_case + ); + output_pass.filter(); + archive_reader->close(); + } + + validate_results(results, expected_results); +} +} // namespace + +TEST_CASE("clp-s-search", "[clp-s][search]") { + std::vector>> queries_and_results{ + {R"aa(NOT a: b)aa", {0}}, + {R"aa(msg: "Msg 1: \"Abc123\"")aa", {1}}, + {R"aa(msg: "Msg 2: 'Abc123'")aa", {2}}, + {R"aa(msg: "Msg 3: \nAbc123")aa", {3}}, + // CLP incorrectly generates no subqueries in Grep::process_raw_query for the following + // query, so we skip it for now. + //{R"aa(msg: "Msg 4: \\Abc123")aa", {4}} + {R"aa(msg: "Msg 5: \rAbc123")aa", {5}}, + {R"aa(msg: "Msg 6: \tAbc123")aa", {6}}, + {R"aa(msg: "*Abc123*")aa", {1, 2, 3, 5, 6}}, + {R"aa(arr.b > 1000)aa", {7, 8}} + }; + auto structurize_arrays = GENERATE(true, false); + auto single_file_archive = GENERATE(true, false); + + TestOutputCleaner const test_cleanup{{std::string{cTestSearchArchiveDirectory}}}; + + compress(structurize_arrays, single_file_archive); + + for (auto const& [query, expected_results] : queries_and_results) { + search(query, false, expected_results); + } +} diff --git a/components/core/tests/test_log_files/test_search.jsonl b/components/core/tests/test_log_files/test_search.jsonl new file mode 100644 index 000000000..63910c04b --- /dev/null +++ b/components/core/tests/test_log_files/test_search.jsonl @@ -0,0 +1,9 @@ +{"idx": 0, "a": "clp string"} +{"idx": 1, "msg": "Msg 1: \"Abc123\""} +{"idx": 2, "msg": "Msg 2: 'Abc123'"} +{"idx": 3, "msg": "Msg 3: \nAbc123"} +{"idx": 4, "skip_msg": "Msg 4: \\Abc123"} +{"idx": 5, "msg": "Msg 5: \rAbc123"} +{"idx": 6, "msg": "Msg 6: \tAbc123"} +{"idx": 7, "arr": [{"a": 999}, {"b": 1001}]} +{"idx": 8, "arr": {"a": 999, "b": 1001}}