From e0d90b4a7d07a2ec8394819e00c0e0ba6c24e5a3 Mon Sep 17 00:00:00 2001 From: auxten Date: Fri, 12 Apr 2024 17:36:48 +0800 Subject: [PATCH] Add simple StoragePython to fix compile flags --- src/CMakeLists.txt | 47 +++++++++++++++++++++++++++++++++ src/Storages/StoragePython.cpp | 48 ++++++++++++++++++++++++++++++++++ src/Storages/StoragePython.h | 38 +++++++++++++++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 src/Storages/StoragePython.cpp create mode 100644 src/Storages/StoragePython.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5c4d38ff662..3fba236d378 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -264,6 +264,53 @@ target_link_libraries (dbms PRIVATE ch_contrib::libdivide) if (TARGET ch_contrib::jemalloc) target_link_libraries (dbms PRIVATE ch_contrib::jemalloc) endif() + +# Include path from shell cmd "python3 -m pybind11 --includes" +execute_process(COMMAND python3 -m pybind11 --includes + OUTPUT_VARIABLE PYBIND11_INCLUDES + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +# Extract and set include directories specifically for StoragePython.cpp +string(REGEX MATCHALL "-I([^ ]+)" INCLUDE_DIRS_MATCHES ${PYBIND11_INCLUDES}) +set(PYTHON_INCLUDE_DIRS "") +foreach(INCLUDE_DIR_MATCH ${INCLUDE_DIRS_MATCHES}) + string(REGEX REPLACE "-I" "" INCLUDE_DIR_MATCH ${INCLUDE_DIR_MATCH}) + # Accumulate all include directories + set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIRS};${INCLUDE_DIR_MATCH}") +endforeach() + +# Apply the include directories to only StoragePython.cpp +set_source_files_properties(Storages/StoragePython.cpp PROPERTIES INCLUDE_DIRECTORIES "${PYTHON_INCLUDE_DIRS}") + +# get python version, something like python3.x +execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))" + OUTPUT_VARIABLE PYTHON_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +# remove all warning, because pybind11 will generate a lot of warning +if (OS_LINUX) + # pybind11 will try to find x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h + # use -idirafter to make it find the right one and not polute the include path + # set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS + # "-w -idirafter /usr/include -include x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h" + # ) + if (PYTHON_VERSION STREQUAL "python3.6" OR PYTHON_VERSION STREQUAL "python3.7" OR PYTHON_VERSION STREQUAL "python3.8") + set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS + "-w -idirafter /usr/include -include crypt.h" + ) + else() + set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS + "-w" + ) + endif() +elseif (OS_DARWIN) + set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS + "-w" + ) +endif() + set (all_modules dbms) macro (dbms_target_include_directories) diff --git a/src/Storages/StoragePython.cpp b/src/Storages/StoragePython.cpp new file mode 100644 index 00000000000..e6f08cc4368 --- /dev/null +++ b/src/Storages/StoragePython.cpp @@ -0,0 +1,48 @@ +#include "StoragePython.h" +#include "pybind11/embed.h" + +namespace py = pybind11; + +namespace DB +{ + +StoragePython::StoragePython( + const StorageID & table_id_, const String & python_class_name_, const ColumnsDescription & columns_, ContextPtr context_) + : IStorage(table_id_), python_class_name(python_class_name_) +{ + // Initialize the Python interpreter and pybind11 + py::scoped_interpreter guard{}; // Ensure the Python interpreter is initialized only once + + // Load the user's Python class + py::module_ user_module = py::module_::import("user_module_name"); + python_class_instance = user_module.attr(python_class_name.c_str())(); +} + +Pipe StoragePython::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum, + size_t max_block_size, + size_t /*num_streams*/) +{ + // Here, a simple call to the Python `read` method would be made, and its results used. + // Actual implementation would depend on how you wish to handle the data conversion. + py::bytes result = python_class_instance.attr("read")(max_block_size); + // Transform `result` to a ClickHouse `Pipe` object + // This part is left as an exercise for the reader +} + +SinkToStoragePtr StoragePython::write( + const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool /*async_insert*/) +{ + // Similarly, a simple call to the Python `write` method would be made here. + // This example does not include error handling or data transformation for brevity. + // python_class_instance.attr("write")(data_to_write); + + // For demonstration, this does not actually write but shows how you might call the method. + // Actual data writing logic and conversion to suitable types would need to be implemented. +} + +} diff --git a/src/Storages/StoragePython.h b/src/Storages/StoragePython.h new file mode 100644 index 00000000000..d4a9698782d --- /dev/null +++ b/src/Storages/StoragePython.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include "pybind11/pybind11.h" + +namespace py = pybind11; + +namespace DB +{ + +class StoragePython final : public IStorage +{ +public: + StoragePython(const StorageID & table_id_, const String & python_class_name_, const ColumnsDescription & columns_, ContextPtr context_); + + std::string getName() const override { return "Python"; } + + // Override the read method in IStorage + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + // Override the write method in IStorage + SinkToStoragePtr + write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override; + +private: + String python_class_name; + py::object python_class_instance; // To store the instance of the Python class +}; + +}