From 8015a57319c34ba672017a0b31e96f36381ebfa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Gro=C3=9F?= Date: Mon, 23 Dec 2024 20:44:34 +0100 Subject: [PATCH] New Result Serialization (#26) * added result serializer * fixed some bugs for exotic types * use internal function for type names * added missing cmath include * Added e2e tests * Cleanup * Moved up clang-format and tidy config so IDE and make format picks up on the right formatting rules * Moved most logic into new compact serializer * Added some docs for contributing * Ignore pycache * Cleanup --------- Co-authored-by: Niclas Haderer --- .clang-format | 1 + .clang-tidy | 1 + .gitignore | 2 + CMakeLists.txt | 25 +- docs/README.md | 37 +++ requirements.txt | 2 + src/httpserver_extension.cpp | 127 ++------- src/include/httpserver_extension.hpp | 1 - src/include/query_stats.hpp | 12 + src/include/result_serializer.hpp | 46 ++++ .../result_serializer_compact_json.hpp | 61 +++++ src/result_serializer.cpp | 253 ++++++++++++++++++ test_http_api/__init__.py | 0 test_http_api/client.py | 55 ++++ test_http_api/conftest.py | 32 +++ test_http_api/const.py | 11 + test_http_api/responses/all_types_compact.py | 233 ++++++++++++++++ test_http_api/test_json_compact_all_types.py | 10 + 18 files changed, 784 insertions(+), 125 deletions(-) create mode 120000 .clang-format create mode 120000 .clang-tidy create mode 100644 requirements.txt create mode 100644 src/include/query_stats.hpp create mode 100644 src/include/result_serializer.hpp create mode 100644 src/include/result_serializer_compact_json.hpp create mode 100644 src/result_serializer.cpp create mode 100644 test_http_api/__init__.py create mode 100644 test_http_api/client.py create mode 100644 test_http_api/conftest.py create mode 100644 test_http_api/const.py create mode 100644 test_http_api/responses/all_types_compact.py create mode 100644 test_http_api/test_json_compact_all_types.py diff --git a/.clang-format b/.clang-format new file mode 120000 index 0000000..9a13bb6 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +duckdb/.clang-format \ No newline at end of file diff --git a/.clang-tidy b/.clang-tidy new file mode 120000 index 0000000..b438d44 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1 @@ +duckdb/.clang-tidy \ No newline at end of file diff --git a/.gitignore b/.gitignore index b9f264b..5496fe3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ duckdb_unittest_tempdir/ testext test/python/__pycache__/ .Rhistory +__pycache__ +venv \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 435c3dc..ed43a9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,24 +6,20 @@ set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) project(${TARGET_NAME}) -include_directories( - src/include - ${CMAKE_CURRENT_BINARY_DIR} - duckdb/third_party/httplib - duckdb/parquet/include -) +include_directories(src/include ${CMAKE_CURRENT_BINARY_DIR} + duckdb/third_party/httplib duckdb/parquet/include) # Embed ./src/assets/index.html as a C++ header add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp - COMMAND ${CMAKE_COMMAND} -P ${PROJECT_SOURCE_DIR}/embed.cmake ${PROJECT_SOURCE_DIR}/src/assets/index.html ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp playgroundContent - DEPENDS ${PROJECT_SOURCE_DIR}/src/assets/index.html -) + COMMAND + ${CMAKE_COMMAND} -P ${PROJECT_SOURCE_DIR}/embed.cmake + ${PROJECT_SOURCE_DIR}/src/assets/index.html + ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp playgroundContent + DEPENDS ${PROJECT_SOURCE_DIR}/src/assets/index.html) -set(EXTENSION_SOURCES - src/httpserver_extension.cpp - ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp -) +set(EXTENSION_SOURCES src/httpserver_extension.cpp src/result_serializer.cpp + ${CMAKE_CURRENT_BINARY_DIR}/playground.hpp) if(MINGW) set(OPENSSL_USE_STATIC_LIBS TRUE) @@ -36,7 +32,8 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) include_directories(${OPENSSL_INCLUDE_DIR}) -target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_mbedtls ${OPENSSL_LIBRARIES}) +target_link_libraries(${LOADABLE_EXTENSION_NAME} duckdb_mbedtls + ${OPENSSL_LIBRARIES}) target_link_libraries(${EXTENSION_NAME} duckdb_mbedtls ${OPENSSL_LIBRARIES}) if(MINGW) diff --git a/docs/README.md b/docs/README.md index 532468b..7a89d7f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -203,6 +203,43 @@ Check out this flocking macro from fellow _Italo-Amsterdammer_ @carlopi @ DuckDB
+## Development + +### Cloning the Repository + +Clone the repository and all its submodules + +```bash +git clone +git submodule update --init --recursive +``` + +### Setting up CLion +**Opening project:** +Configuring CLion with the extension template requires a little work. Firstly, make sure that the DuckDB submodule is available. +Then make sure to open `./duckdb/CMakeLists.txt` (so not the top level `CMakeLists.txt` file from this repo) as a project in CLion. +Now to fix your project path go to `tools->CMake->Change Project Root`([docs](https://www.jetbrains.com/help/clion/change-project-root-directory.html)) to set the project root to the root dir of this repo. + +**Debugging:** +To set up debugging in CLion, there are two simple steps required. Firstly, in `CLion -> Settings / Preferences -> Build, Execution, Deploy -> CMake` you will need to add the desired builds (e.g. Debug, Release, RelDebug, etc). There's different ways to configure this, but the easiest is to leave all empty, except the `build path`, which needs to be set to `../build/{build type}`. Now on a clean repository you will first need to run `make {build type}` to initialize the CMake build directory. After running make, you will be able to (re)build from CLion by using the build target we just created. If you use the CLion editor, you can create a CLion CMake profiles matching the CMake variables that are described in the makefile, and then you don't need to invoke the Makefile. + +The second step is to configure the unittest runner as a run/debug configuration. To do this, go to `Run -> Edit Configurations` and click `+ -> Cmake Application`. The target and executable should be `unittest`. This will run all the DuckDB tests. To specify only running the extension specific tests, add `--test-dir ../../.. [sql]` to the `Program Arguments`. Note that it is recommended to use the `unittest` executable for testing/development within CLion. The actual DuckDB CLI currently does not reliably work as a run target in CLion. + + +### Testing + +To run the E2E test install all packages necessary: + +```bash +pip install -r requirements.txt +``` + +Then run the test suite: + +```bash +pytest pytest test_http_api +``` + ##### :black_joker: Disclaimers [^1]: DuckDB ® is a trademark of DuckDB Foundation. All rights reserved by their respective owners. [^1] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a0effd9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +httpx==0.28.1 +pytest==8.3.4 \ No newline at end of file diff --git a/src/httpserver_extension.cpp b/src/httpserver_extension.cpp index 65a9aa0..6bda310 100644 --- a/src/httpserver_extension.cpp +++ b/src/httpserver_extension.cpp @@ -1,32 +1,30 @@ #define DUCKDB_EXTENSION_MAIN +#define CPPHTTPLIB_OPENSSL_SUPPORT + +#include +#include +#include #include "httpserver_extension.hpp" +#include "query_stats.hpp" #include "duckdb.hpp" #include "duckdb/common/exception.hpp" -#include "duckdb/common/string_util.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/main/extension_util.hpp" -#include "duckdb/common/atomic.hpp" -#include "duckdb/common/exception/http_exception.hpp" #include "duckdb/common/allocator.hpp" -#include -#include -#include -#include - -#ifndef _WIN32 -#include -#endif - -#define CPPHTTPLIB_OPENSSL_SUPPORT +#include "result_serializer.hpp" +#include "result_serializer_compact_json.hpp" #include "httplib.hpp" #include "yyjson.hpp" - #include "playground.hpp" -using namespace duckdb_yyjson; // NOLINT +#ifndef _WIN32 +#include +#endif namespace duckdb { +using namespace duckdb_yyjson; // NOLINT(*-build-using-namespace) + struct HttpServerState { std::unique_ptr server; std::unique_ptr server_thread; @@ -40,98 +38,6 @@ struct HttpServerState { static HttpServerState global_state; -std::string GetColumnType(MaterializedQueryResult &result, idx_t column) { - if (result.RowCount() == 0) { - return "String"; - } - switch (result.types[column].id()) { - case LogicalTypeId::FLOAT: - return "Float"; - case LogicalTypeId::DOUBLE: - return "Double"; - case LogicalTypeId::INTEGER: - return "Int32"; - case LogicalTypeId::BIGINT: - return "Int64"; - case LogicalTypeId::UINTEGER: - return "UInt32"; - case LogicalTypeId::UBIGINT: - return "UInt64"; - case LogicalTypeId::VARCHAR: - return "String"; - case LogicalTypeId::TIME: - return "DateTime"; - case LogicalTypeId::DATE: - return "Date"; - case LogicalTypeId::TIMESTAMP: - return "DateTime"; - case LogicalTypeId::BOOLEAN: - return "Int8"; - default: - return "String"; - } - return "String"; -} - -struct ReqStats { - float elapsed_sec; - int64_t read_bytes; - int64_t read_rows; -}; - -// Convert the query result to JSON format -static std::string ConvertResultToJSON(MaterializedQueryResult &result, ReqStats &req_stats) { - auto doc = yyjson_mut_doc_new(nullptr); - auto root = yyjson_mut_obj(doc); - yyjson_mut_doc_set_root(doc, root); - // Add meta information - auto meta_array = yyjson_mut_arr(doc); - for (idx_t col = 0; col < result.ColumnCount(); ++col) { - auto column_obj = yyjson_mut_obj(doc); - yyjson_mut_obj_add_str(doc, column_obj, "name", result.ColumnName(col).c_str()); - yyjson_mut_arr_append(meta_array, column_obj); - std::string tp(GetColumnType(result, col)); - yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); - } - yyjson_mut_obj_add_val(doc, root, "meta", meta_array); - - // Add data - auto data_array = yyjson_mut_arr(doc); - for (idx_t row = 0; row < result.RowCount(); ++row) { - auto row_array = yyjson_mut_arr(doc); - for (idx_t col = 0; col < result.ColumnCount(); ++col) { - Value value = result.GetValue(col, row); - if (value.IsNull()) { - yyjson_mut_arr_append(row_array, yyjson_mut_null(doc)); - } else { - std::string value_str = value.ToString(); - yyjson_mut_arr_append(row_array, yyjson_mut_strncpy(doc, value_str.c_str(), value_str.length())); - } - } - yyjson_mut_arr_append(data_array, row_array); - } - yyjson_mut_obj_add_val(doc, root, "data", data_array); - - // Add row count - yyjson_mut_obj_add_int(doc, root, "rows", result.RowCount()); - //"statistics":{"elapsed":0.00031403,"rows_read":1,"bytes_read":0}} - auto stat_obj = yyjson_mut_obj_add_obj(doc, root, "statistics"); - yyjson_mut_obj_add_real(doc, stat_obj, "elapsed", req_stats.elapsed_sec); - yyjson_mut_obj_add_int(doc, stat_obj, "rows_read", req_stats.read_rows); - yyjson_mut_obj_add_int(doc, stat_obj, "bytes_read", req_stats.read_bytes); - // Write to string - auto data = yyjson_mut_write(doc, 0, nullptr); - if (!data) { - yyjson_mut_doc_free(doc); - throw InternalException("Failed to render the result as JSON, yyjson failed"); - } - - std::string json_output(data); - free(data); - yyjson_mut_doc_free(doc); - return json_output; -} - // New: Base64 decoding function std::string base64_decode(const std::string &in) { std::string out; @@ -300,7 +206,8 @@ void HandleHttpRequest(const duckdb_httplib_openssl::Request& req, duckdb_httpli std::string json_output = ConvertResultToNDJSON(*result); res.set_content(json_output, "application/x-ndjson"); } else if (format == "JSONCompact") { - std::string json_output = ConvertResultToJSON(*result, stats); + ResultSerializerCompactJson serializer; + std::string json_output = serializer.Serialize(*result, stats); res.set_content(json_output, "application/json"); } else { // Default to NDJSON for DuckDB's own queries @@ -325,9 +232,9 @@ void HttpServerStart(DatabaseInstance& db, string_t host, int32_t port, string_t global_state.is_running = true; global_state.auth_token = auth.GetString(); - // Custom basepath, defaults to root / + // Custom basepath, defaults to root / const char* base_path_env = std::getenv("DUCKDB_HTTPSERVER_BASEPATH"); - std::string base_path = "/"; + std::string base_path = "/"; if (base_path_env && base_path_env[0] == '/' && strlen(base_path_env) > 1) { base_path = std::string(base_path_env); diff --git a/src/include/httpserver_extension.hpp b/src/include/httpserver_extension.hpp index 432d1c0..fff923d 100644 --- a/src/include/httpserver_extension.hpp +++ b/src/include/httpserver_extension.hpp @@ -1,7 +1,6 @@ #pragma once #include "duckdb.hpp" -#include "duckdb/common/file_system.hpp" namespace duckdb { diff --git a/src/include/query_stats.hpp b/src/include/query_stats.hpp new file mode 100644 index 0000000..acf4aac --- /dev/null +++ b/src/include/query_stats.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace duckdb { + +struct ReqStats { + float elapsed_sec; + uint64_t read_bytes; + uint64_t read_rows; +}; + +} // namespace duckdb diff --git a/src/include/result_serializer.hpp b/src/include/result_serializer.hpp new file mode 100644 index 0000000..7217f28 --- /dev/null +++ b/src/include/result_serializer.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "duckdb/main/query_result.hpp" +#include "yyjson.hpp" + +namespace duckdb { +using namespace duckdb_yyjson; // NOLINT(*-build-using-namespace) + +class ResultSerializer { +public: + explicit ResultSerializer(const bool _set_invalid_values_to_null = false) + : set_invalid_values_to_null(_set_invalid_values_to_null) { + doc = yyjson_mut_doc_new(nullptr); + } + + virtual ~ResultSerializer() { + yyjson_mut_doc_free(doc); + } + + std::string YY_ToString() { + auto data = yyjson_mut_write(doc, 0, nullptr); + if (!data) { + throw SerializationException("Could not render yyjson document"); + } + std::string json_output(data); + free(data); + return json_output; + } + +protected: + void SerializeInternal(QueryResult &query_result, yyjson_mut_val *append_root, bool values_as_array); + + void SerializeChunk(const DataChunk &chunk, vector &names, vector &types, + yyjson_mut_val *append_root, bool values_as_array); + + yyjson_mut_val *SerializeRowAsArray(const DataChunk &chunk, idx_t row_idx, vector &types); + + yyjson_mut_val *SerializeRowAsObject(const DataChunk &chunk, idx_t row_idx, vector &names, + vector &types); + + void SerializeValue(yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type); + + yyjson_mut_doc *doc; + bool set_invalid_values_to_null; +}; +} // namespace duckdb diff --git a/src/include/result_serializer_compact_json.hpp b/src/include/result_serializer_compact_json.hpp new file mode 100644 index 0000000..7d4410c --- /dev/null +++ b/src/include/result_serializer_compact_json.hpp @@ -0,0 +1,61 @@ +#pragma once +#include "query_stats.hpp" +#include "result_serializer.hpp" + +namespace duckdb { + +class ResultSerializerCompactJson final : public ResultSerializer { +public: + explicit ResultSerializerCompactJson(const bool _set_invalid_values_to_null = false) + : ResultSerializer(_set_invalid_values_to_null) { + root = yyjson_mut_obj(doc); + D_ASSERT(root); + yyjson_mut_doc_set_root(doc, root); + } + + std::string Serialize(MaterializedQueryResult &query_result, const ReqStats &stats) { + // Metadata about the query result + yyjson_mut_val *yy_meta = GetMeta(query_result); + yyjson_mut_obj_add_val(doc, root, "meta", yy_meta); + + // Actual query data + yyjson_mut_val *yy_data_array = yyjson_mut_arr(doc); + SerializeInternal(query_result, yy_data_array, true); + yyjson_mut_obj_add_val(doc, root, "data", yy_data_array); + + // Number of rows + yyjson_mut_obj_add_uint(doc, root, "rows", query_result.RowCount()); + + // Query statistics + yyjson_mut_val *yy_stats = GetStats(stats); + yyjson_mut_obj_add_val(doc, root, "statistics", yy_stats); + + return YY_ToString(); + } + +private: + yyjson_mut_val *GetMeta(QueryResult &query_result) { + auto meta_array = yyjson_mut_arr(doc); + for (idx_t col = 0; col < query_result.ColumnCount(); ++col) { + auto column_obj = yyjson_mut_obj(doc); + yyjson_mut_obj_add_strcpy(doc, column_obj, "name", query_result.ColumnName(col).c_str()); + yyjson_mut_arr_append(meta_array, column_obj); + // @paul Did you find out if result.RowCount() == 0 is needed? + std::string tp(query_result.types[col].ToString()); + yyjson_mut_obj_add_strcpy(doc, column_obj, "type", tp.c_str()); + } + + return meta_array; + } + + yyjson_mut_val *GetStats(const ReqStats &stats) { + auto stat_obj = yyjson_mut_obj(doc); + yyjson_mut_obj_add_real(doc, stat_obj, "elapsed", stats.elapsed_sec); + yyjson_mut_obj_add_int(doc, stat_obj, "rows_read", stats.read_rows); + yyjson_mut_obj_add_int(doc, stat_obj, "bytes_read", stats.read_bytes); + return stat_obj; + } + + yyjson_mut_val *root; +}; +} // namespace duckdb diff --git a/src/result_serializer.cpp b/src/result_serializer.cpp new file mode 100644 index 0000000..5150b6c --- /dev/null +++ b/src/result_serializer.cpp @@ -0,0 +1,253 @@ +#include "result_serializer.hpp" + +#include "duckdb/common/extra_type_info.hpp" +#include "duckdb/common/types/uuid.hpp" + +#include + +namespace duckdb { + +#define YY_APPEND_FAIL(success) \ + if (!success) { \ + throw SerializationException("Failed to append in " __FILE__, __LINE__); \ + } + +void ResultSerializer::SerializeInternal(QueryResult &query_result, yyjson_mut_val *append_root, + const bool values_as_array) { + auto chunk = query_result.Fetch(); + auto names = query_result.names; + auto types = query_result.types; + + while (chunk) { + SerializeChunk(*chunk, names, types, append_root, values_as_array); + chunk = query_result.Fetch(); + } +} + +void ResultSerializer::SerializeChunk(const DataChunk &chunk, vector &names, vector &types, + yyjson_mut_val *append_root, const bool values_as_array) { + D_ASSERT(yyjson_mut_is_arr(append_root)); + + const auto row_count = chunk.size(); + + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + + // Which itself contains an object + yyjson_mut_val *obj; + + if (values_as_array) { + obj = SerializeRowAsArray(chunk, row_idx, types); + } else { + obj = SerializeRowAsObject(chunk, row_idx, names, types); + } + + YY_APPEND_FAIL(yyjson_mut_arr_append(append_root, obj)); + } +} + +yyjson_mut_val *ResultSerializer::SerializeRowAsArray(const DataChunk &chunk, const idx_t row_idx, + vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_arr(doc); + + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, nullptr, type); + } + + return obj; +} + +yyjson_mut_val *ResultSerializer::SerializeRowAsObject(const DataChunk &chunk, const idx_t row_idx, + vector &names, vector &types) { + const auto column_count = chunk.ColumnCount(); + auto obj = yyjson_mut_obj(doc); + + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto value = chunk.GetValue(col_idx, row_idx); + auto &type = types[col_idx]; + SerializeValue(obj, value, names[col_idx], type); + } + + return obj; +} + +void ResultSerializer::SerializeValue( // NOLINT(*-no-recursion) + yyjson_mut_val *parent, const Value &value, optional_ptr name, const LogicalType &type) { + yyjson_mut_val *val = nullptr; + + if (value.IsNull()) { + goto null_handle; + } + + switch (type.id()) { + case LogicalTypeId::SQLNULL: + null_handle: + val = yyjson_mut_null(doc); + break; + case LogicalTypeId::BOOLEAN: + val = yyjson_mut_bool(doc, value.GetValue()); + break; + case LogicalTypeId::TINYINT: + case LogicalTypeId::SMALLINT: + case LogicalTypeId::INTEGER: + case LogicalTypeId::BIGINT: + case LogicalTypeId::INTEGER_LITERAL: + val = yyjson_mut_int(doc, value.GetValue()); + break; + case LogicalTypeId::UTINYINT: + case LogicalTypeId::USMALLINT: + case LogicalTypeId::UINTEGER: + case LogicalTypeId::UBIGINT: + val = yyjson_mut_uint(doc, value.GetValue()); + break; + + // format to big numbers as strings + case LogicalTypeId::UHUGEINT: { + const uhugeint_t uHugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, uHugeIntNumber.ToString().c_str()); + break; + } + case LogicalTypeId::HUGEINT: { + const hugeint_t hugeIntNumber = value.GetValue(); + val = yyjson_mut_strcpy(doc, hugeIntNumber.ToString().c_str()); + break; + } + + case LogicalTypeId::FLOAT: + case LogicalTypeId::DOUBLE: + case LogicalTypeId::DECIMAL: { + const auto real_val = value.GetValue(); + if (std::isnan(real_val) || std::isinf(real_val)) { + if (set_invalid_values_to_null) { + goto null_handle; + } else { + const auto castedValue = value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue(); + val = yyjson_mut_strcpy(doc, castedValue.c_str()); + break; + } + } else { + val = yyjson_mut_real(doc, real_val); + break; + } + } + // Data + time + case LogicalTypeId::DATE: + case LogicalTypeId::TIME: + case LogicalTypeId::TIMESTAMP_SEC: + case LogicalTypeId::TIMESTAMP_MS: + case LogicalTypeId::TIMESTAMP: + case LogicalTypeId::TIMESTAMP_NS: + case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIME_TZ: + // Enum + case LogicalTypeId::ENUM: + // Strings + case LogicalTypeId::CHAR: + case LogicalTypeId::VARCHAR: + case LogicalTypeId::STRING_LITERAL: + val = yyjson_mut_strcpy(doc, value.GetValue().c_str()); + break; + case LogicalTypeId::VARINT: + val = yyjson_mut_strcpy(doc, value.DefaultCastAs(LogicalTypeId::VARCHAR).GetValue().c_str()); + break; + // UUID + case LogicalTypeId::UUID: { + const auto uuid_int = value.GetValue(); + const auto uuid = UUID::ToString(uuid_int); + val = yyjson_mut_strcpy(doc, uuid.c_str()); + break; + } + // Weird special types that are just serialized to string + case LogicalTypeId::INTERVAL: + // TODO perhaps base64 encode blob? + case LogicalTypeId::BLOB: + case LogicalTypeId::BIT: + val = yyjson_mut_strcpy(doc, value.ToString().c_str()); + break; + case LogicalTypeId::UNION: { + auto &union_val = UnionValue::GetValue(value); + SerializeValue(parent, union_val, name, union_val.type()); + return; + } + case LogicalTypeId::ARRAY: + case LogicalTypeId::LIST: { + const auto get_children = LogicalTypeId::LIST == type.id() ? ListValue::GetChildren : ArrayValue::GetChildren; + auto &children = get_children(value); + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + break; + } + case LogicalTypeId::STRUCT: { + const auto &children = StructValue::GetChildren(value); + const auto &type_info = value.type().AuxInfo()->Cast(); + + auto all_keys_are_empty = true; + for (uint64_t idx = 0; idx < children.size(); ++idx) { + if (!type_info.child_types[idx].first.empty()) { + all_keys_are_empty = false; + break; + } + } + + // Unnamed struct -> just create tuples + if (all_keys_are_empty) { + val = yyjson_mut_arr(doc); + for (auto &child : children) { + SerializeValue(val, child, nullptr, child.type()); + } + } else { + val = yyjson_mut_obj(doc); + for (uint64_t idx = 0; idx < children.size(); ++idx) { + string struct_name = type_info.child_types[idx].first; + SerializeValue(val, children[idx], struct_name, type_info.child_types[idx].second); + } + } + + break; + } + // Not implemented types + case LogicalTypeId::MAP: { + auto &children = ListValue::GetChildren(value); + val = yyjson_mut_obj(doc); + for (auto &item : children) { + auto &key_value = StructValue::GetChildren(item); + D_ASSERT(key_value.size() == 2); + auto key_str = key_value[0].GetValue(); + SerializeValue(val, key_value[1], key_str, key_value[1].type()); + } + break; + } + + // Unsupported types + case LogicalTypeId::TABLE: + case LogicalTypeId::POINTER: + case LogicalTypeId::VALIDITY: + case LogicalTypeId::AGGREGATE_STATE: + case LogicalTypeId::LAMBDA: + case LogicalTypeId::USER: + case LogicalTypeId::ANY: + case LogicalTypeId::UNKNOWN: + case LogicalTypeId::INVALID: + if (set_invalid_values_to_null) { + goto null_handle; + } + throw InvalidTypeException("Type " + type.ToString() + " not supported"); + } + + if (!val) { + throw SerializationException("Could not serialize value of type " + type.ToString()); + } + if (!name) { + YY_APPEND_FAIL(yyjson_mut_arr_append(parent, val)); + } else { + yyjson_mut_val *key = yyjson_mut_strcpy(doc, name->c_str()); + D_ASSERT(key); + YY_APPEND_FAIL(yyjson_mut_obj_add(parent, key, val)); + } +} + +} // namespace duckdb diff --git a/test_http_api/__init__.py b/test_http_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_http_api/client.py b/test_http_api/client.py new file mode 100644 index 0000000..669cd9f --- /dev/null +++ b/test_http_api/client.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import time +from enum import Enum + +import httpx +from httpx import BasicAuth + + +class ResponseFormat(Enum): + ND_JSON = "JSONEachRow" + COMPACT_JSON = "JSONCompact" + + +class Client: + def __init__(self, url: str, basic_auth: str | None = None, token_auth: str | None = None): + assert basic_auth is not None or token_auth is not None, "Set either basic_auth xor token_auth" + assert not (basic_auth is not None and token_auth is not None), "Set either basic_auth xor token_auth" + + self._url = url + self._basic_auth = basic_auth + self._token_auth = token_auth + + def execute_query(self, sql: str, response_format: ResponseFormat) -> dict: + headers = {"format": response_format.value} + + if self._token_auth: + headers["X-API-Key"] = self._token_auth + + auth = None + if self._basic_auth: + username, password = self._basic_auth.split(":") + auth = BasicAuth(username, password) + + with httpx.Client() as client: + response = client.get(self._url, params={"q": sql}, headers=headers, auth=auth) + response.raise_for_status() + return response.json() + + + def ping(self) -> None: + with httpx.Client() as client: + response = client.get(f"{self._url}/ping") + response.raise_for_status() + + def on_ready(self, timeout = 5) -> None: + end_time = time.time() + timeout + while time.time() < end_time: + try: + self.ping() + return + except Exception: + pass + + raise TimeoutError("Server is not ready") \ No newline at end of file diff --git a/test_http_api/conftest.py b/test_http_api/conftest.py new file mode 100644 index 0000000..eb7552a --- /dev/null +++ b/test_http_api/conftest.py @@ -0,0 +1,32 @@ +import subprocess +from typing import Iterator + +import pytest + +from .client import Client +from .const import DEBUG_SHELL, HOST, PORT, API_KEY + + +@pytest.fixture +def http_duck_with_token() -> Iterator[Client]: + process = subprocess.Popen( + [ + DEBUG_SHELL, + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=2^16 + ) + + # Load the extension + process.stdin.write("LOAD httpserver;\n") + cmd = f"SELECT httpserve_start('{HOST}', {PORT}, '{API_KEY}');\n" + process.stdin.write(cmd) + + client = Client(f"http://{HOST}:{PORT}", token_auth=API_KEY) + client.on_ready() + yield client + + process.kill() diff --git a/test_http_api/const.py b/test_http_api/const.py new file mode 100644 index 0000000..5a4c0b5 --- /dev/null +++ b/test_http_api/const.py @@ -0,0 +1,11 @@ +import os + +PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +DEBUG_SHELL = f"{PROJECT_DIR}/build/debug/duckdb" +RELEASE_SHELL = f"{PROJECT_DIR}/build/release/duckdb" + +HOST = "localhost" +PORT = 9999 +API_KEY = "my_api_key" +BASIC_AUTH = "admin:admin" diff --git a/test_http_api/responses/all_types_compact.py b/test_http_api/responses/all_types_compact.py new file mode 100644 index 0000000..3724067 --- /dev/null +++ b/test_http_api/responses/all_types_compact.py @@ -0,0 +1,233 @@ +ALL_TYPES_COMPACT = { + "meta": [ + {"name": "bool", "type": "BOOLEAN"}, + {"name": "tinyint", "type": "TINYINT"}, + {"name": "smallint", "type": "SMALLINT"}, + {"name": "int", "type": "INTEGER"}, + {"name": "bigint", "type": "BIGINT"}, + {"name": "hugeint", "type": "HUGEINT"}, + {"name": "uhugeint", "type": "UHUGEINT"}, + {"name": "utinyint", "type": "UTINYINT"}, + {"name": "usmallint", "type": "USMALLINT"}, + {"name": "uint", "type": "UINTEGER"}, + {"name": "ubigint", "type": "UBIGINT"}, + {"name": "varint", "type": "VARINT"}, + {"name": "date", "type": "DATE"}, + {"name": "time", "type": "TIME"}, + {"name": "timestamp", "type": "TIMESTAMP"}, + {"name": "timestamp_s", "type": "TIMESTAMP_S"}, + {"name": "timestamp_ms", "type": "TIMESTAMP_MS"}, + {"name": "timestamp_ns", "type": "TIMESTAMP_NS"}, + {"name": "time_tz", "type": "TIME WITH TIME ZONE"}, + {"name": "timestamp_tz", "type": "TIMESTAMP WITH TIME ZONE"}, + {"name": "float", "type": "FLOAT"}, + {"name": "double", "type": "DOUBLE"}, + {"name": "dec_4_1", "type": "DECIMAL(4,1)"}, + {"name": "dec_9_4", "type": "DECIMAL(9,4)"}, + {"name": "dec_18_6", "type": "DECIMAL(18,6)"}, + {"name": "dec38_10", "type": "DECIMAL(38,10)"}, + {"name": "uuid", "type": "UUID"}, + {"name": "interval", "type": "INTERVAL"}, + {"name": "varchar", "type": "VARCHAR"}, + {"name": "blob", "type": "BLOB"}, + {"name": "bit", "type": "BIT"}, + {"name": "small_enum", "type": "ENUM('DUCK_DUCK_ENUM', 'GOOSE')"}, + { + "name": "medium_enum", + "type": "ENUM('enum_0', 'enum_1', 'enum_2', 'enum_3', 'enum_4', 'enum_5', 'enum_6', 'enum_7', 'enum_8', 'enum_9', 'enum_10', 'enum_11', 'enum_12', 'enum_13', 'enum_14', 'enum_15', 'enum_16', 'enum_17', 'enum_18', 'enum_19', 'enum_20', 'enum_21', 'enum_22', 'enum_23', 'enum_24', 'enum_25', 'enum_26', 'enum_27', 'enum_28', 'enum_29', 'enum_30', 'enum_31', 'enum_32', 'enum_33', 'enum_34', 'enum_35', 'enum_36', 'enum_37', 'enum_38', 'enum_39', 'enum_40', 'enum_41', 'enum_42', 'enum_43', 'enum_44', 'enum_45', 'enum_46', 'enum_47', 'enum_48', 'enum_49', 'enum_50', 'enum_51', 'enum_52', 'enum_53', 'enum_54', 'enum_55', 'enum_56', 'enum_57', 'enum_58', 'enum_59', 'enum_60', 'enum_61', 'enum_62', 'enum_63', 'enum_64', 'enum_65', 'enum_66', 'enum_67', 'enum_68', 'enum_69', 'enum_70', 'enum_71', 'enum_72', 'enum_73', 'enum_74', 'enum_75', 'enum_76', 'enum_77', 'enum_78', 'enum_79', 'enum_80', 'enum_81', 'enum_82', 'enum_83', 'enum_84', 'enum_85', 'enum_86', 'enum_87', 'enum_88', 'enum_89', 'enum_90', 'enum_91', 'enum_92', 'enum_93', 'enum_94', 'enum_95', 'enum_96', 'enum_97', 'enum_98', 'enum_99', 'enum_100', 'enum_101', 'enum_102', 'enum_103', 'enum_104', 'enum_105', 'enum_106', 'enum_107', 'enum_108', 'enum_109', 'enum_110', 'enum_111', 'enum_112', 'enum_113', 'enum_114', 'enum_115', 'enum_116', 'enum_117', 'enum_118', 'enum_119', 'enum_120', 'enum_121', 'enum_122', 'enum_123', 'enum_124', 'enum_125', 'enum_126', 'enum_127', 'enum_128', 'enum_129', 'enum_130', 'enum_131', 'enum_132', 'enum_133', 'enum_134', 'enum_135', 'enum_136', 'enum_137', 'enum_138', 'enum_139', 'enum_140', 'enum_141', 'enum_142', 'enum_143', 'enum_144', 'enum_145', 'enum_146', 'enum_147', 'enum_148', 'enum_149', 'enum_150', 'enum_151', 'enum_152', 'enum_153', 'enum_154', 'enum_155', 'enum_156', 'enum_157', 'enum_158', 'enum_159', 'enum_160', 'enum_161', 'enum_162', 'enum_163', 'enum_164', 'enum_165', 'enum_166', 'enum_167', 'enum_168', 'enum_169', 'enum_170', 'enum_171', 'enum_172', 'enum_173', 'enum_174', 'enum_175', 'enum_176', 'enum_177', 'enum_178', 'enum_179', 'enum_180', 'enum_181', 'enum_182', 'enum_183', 'enum_184', 'enum_185', 'enum_186', 'enum_187', 'enum_188', 'enum_189', 'enum_190', 'enum_191', 'enum_192', 'enum_193', 'enum_194', 'enum_195', 'enum_196', 'enum_197', 'enum_198', 'enum_199', 'enum_200', 'enum_201', 'enum_202', 'enum_203', 'enum_204', 'enum_205', 'enum_206', 'enum_207', 'enum_208', 'enum_209', 'enum_210', 'enum_211', 'enum_212', 'enum_213', 'enum_214', 'enum_215', 'enum_216', 'enum_217', 'enum_218', 'enum_219', 'enum_220', 'enum_221', 'enum_222', 'enum_223', 'enum_224', 'enum_225', 'enum_226', 'enum_227', 'enum_228', 'enum_229', 'enum_230', 'enum_231', 'enum_232', 'enum_233', 'enum_234', 'enum_235', 'enum_236', 'enum_237', 'enum_238', 'enum_239', 'enum_240', 'enum_241', 'enum_242', 'enum_243', 'enum_244', 'enum_245', 'enum_246', 'enum_247', 'enum_248', 'enum_249', 'enum_250', 'enum_251', 'enum_252', 'enum_253', 'enum_254', 'enum_255', 'enum_256', 'enum_257', 'enum_258', 'enum_259', 'enum_260', 'enum_261', 'enum_262', 'enum_263', 'enum_264', 'enum_265', 'enum_266', 'enum_267', 'enum_268', 'enum_269', 'enum_270', 'enum_271', 'enum_272', 'enum_273', 'enum_274', 'enum_275', 'enum_276', 'enum_277', 'enum_278', 'enum_279', 'enum_280', 'enum_281', 'enum_282', 'enum_283', 'enum_284', 'enum_285', 'enum_286', 'enum_287', 'enum_288', 'enum_289', 'enum_290', 'enum_291', 'enum_292', 'enum_293', 'enum_294', 'enum_295', 'enum_296', 'enum_297', 'enum_298', 'enum_299')", + }, + {"name": "large_enum", "type": "ENUM('enum_0', 'enum_69999')"}, + {"name": "int_array", "type": "INTEGER[]"}, + {"name": "double_array", "type": "DOUBLE[]"}, + {"name": "date_array", "type": "DATE[]"}, + {"name": "timestamp_array", "type": "TIMESTAMP[]"}, + {"name": "timestamptz_array", "type": "TIMESTAMP WITH TIME ZONE[]"}, + {"name": "varchar_array", "type": "VARCHAR[]"}, + {"name": "nested_int_array", "type": "INTEGER[][]"}, + {"name": "struct", "type": "STRUCT(a INTEGER, b VARCHAR)"}, + {"name": "struct_of_arrays", "type": "STRUCT(a INTEGER[], b VARCHAR[])"}, + {"name": "array_of_structs", "type": "STRUCT(a INTEGER, b VARCHAR)[]"}, + {"name": "map", "type": "MAP(VARCHAR, VARCHAR)"}, + {"name": "union", "type": 'UNION("name" VARCHAR, age SMALLINT)'}, + {"name": "fixed_int_array", "type": "INTEGER[3]"}, + {"name": "fixed_varchar_array", "type": "VARCHAR[3]"}, + {"name": "fixed_nested_int_array", "type": "INTEGER[3][3]"}, + {"name": "fixed_nested_varchar_array", "type": "VARCHAR[3][3]"}, + {"name": "fixed_struct_array", "type": "STRUCT(a INTEGER, b VARCHAR)[3]"}, + {"name": "struct_of_fixed_array", "type": "STRUCT(a INTEGER[3], b VARCHAR[3])"}, + {"name": "fixed_array_of_int_list", "type": "INTEGER[][3]"}, + {"name": "list_of_fixed_int_array", "type": "INTEGER[3][]"}, + ], + "data": [ + [ + False, + -128, + -32768, + -2147483648, + -9223372036854775808, + "-170141183460469231731687303715884105728", + "0", + 0, + 0, + 0, + 0, + "-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368", + "5877642-06-25 (BC)", + "00:00:00", + "290309-12-22 (BC) 00:00:00", + "290309-12-22 (BC) 00:00:00", + "290309-12-22 (BC) 00:00:00", + "1677-09-22 00:00:00", + "00:00:00+15:59:59", + "290309-12-22 (BC) 00:00:00+00", + -3.4028234663852886e38, + -1.7976931348623157e308, + -999.9, + -99999.9999, + -1000000000000.0, + -1e28, + "00000000-0000-0000-0000-000000000000", + "00:00:00", + "🦆🦆🦆🦆🦆🦆", + "thisisalongblob\\x00withnullbytes", + "0010001001011100010101011010111", + "DUCK_DUCK_ENUM", + "enum_0", + "enum_0", + [], + [], + [], + [], + [], + [], + [], + {"a": None, "b": None}, + {"a": None, "b": None}, + [], + {}, + "Frank", + [None, 2, 3], + ["a", None, "c"], + [[None, 2, 3], None, [None, 2, 3]], + [["a", None, "c"], None, ["a", None, "c"]], + [{"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, {"a": None, "b": None}], + {"a": [None, 2, 3], "b": ["a", None, "c"]}, + [[], [42, 999, None, None, -42], []], + [[None, 2, 3], [4, 5, 6], [None, 2, 3]], + ], + [ + True, + 127, + 32767, + 2147483647, + 9223372036854775807, + "170141183460469231731687303715884105727", + "340282366920938463463374607431768211455", + 255, + 65535, + 4294967295, + 18446744073709551615, + "179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368", + "5881580-07-10", + "24:00:00", + "294247-01-10 04:00:54.775806", + "294247-01-10 04:00:54", + "294247-01-10 04:00:54.775", + "2262-04-11 23:47:16.854775806", + "24:00:00-15:59:59", + "294247-01-10 04:00:54.775806+00", + 3.4028234663852886e38, + 1.7976931348623157e308, + 999.9, + 99999.9999, + 1000000000000.0, + 1e28, + "ffffffff-ffff-ffff-ffff-ffffffffffff", + "83 years 3 months 999 days 00:16:39.999999", + "goo", + "\\x00\\x00\\x00a", + "10101", + "GOOSE", + "enum_299", + "enum_69999", + [42, 999, None, None, -42], + [42.0, "nan", "inf", "-inf", None, -42.0], + ["1970-01-01", "infinity", "-infinity", None, "2022-05-12"], + ["1970-01-01 00:00:00", "infinity", "-infinity", None, "2022-05-12 16:23:45"], + ["1970-01-01 00:00:00+00", "infinity", "-infinity", None, "2022-05-12 23:23:45+00"], + ["🦆🦆🦆🦆🦆🦆", "goose", None, ""], + [[], [42, 999, None, None, -42], None, [], [42, 999, None, None, -42]], + {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, + {"a": [42, 999, None, None, -42], "b": ["🦆🦆🦆🦆🦆🦆", "goose", None, ""]}, + [{"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, None], + {"key1": "🦆🦆🦆🦆🦆🦆", "key2": "goose"}, + 5, + [4, 5, 6], + ["d", "e", "f"], + [[4, 5, 6], [None, 2, 3], [4, 5, 6]], + [["d", "e", "f"], ["a", None, "c"], ["d", "e", "f"]], + [{"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}, {"a": None, "b": None}, {"a": 42, "b": "🦆🦆🦆🦆🦆🦆"}], + {"a": [4, 5, 6], "b": ["d", "e", "f"]}, + [[42, 999, None, None, -42], [], [42, 999, None, None, -42]], + [[4, 5, 6], [None, 2, 3], [4, 5, 6]], + ], + [ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + ], + ], + "rows": 3, + "statistics": {"elapsed": 0.06300000101327896, "rows_read": 0, "bytes_read": 0}, +} diff --git a/test_http_api/test_json_compact_all_types.py b/test_http_api/test_json_compact_all_types.py new file mode 100644 index 0000000..b2783e9 --- /dev/null +++ b/test_http_api/test_json_compact_all_types.py @@ -0,0 +1,10 @@ +from .client import Client, ResponseFormat +from .responses.all_types_compact import ALL_TYPES_COMPACT + + +def test_json_compact_all_types(http_duck_with_token: Client): + res = http_duck_with_token.execute_query("FROM test_all_types()", response_format=ResponseFormat.COMPACT_JSON) + + assert res["meta"] == ALL_TYPES_COMPACT["meta"] + assert res["data"] == ALL_TYPES_COMPACT["data"] + assert res["rows"] == ALL_TYPES_COMPACT["rows"]