Skip to content

Commit

Permalink
Add simple StoragePython to fix compile flags
Browse files Browse the repository at this point in the history
  • Loading branch information
auxten committed Apr 12, 2024
1 parent 5cd83a1 commit e0d90b4
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 0 deletions.
47 changes: 47 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,53 @@ target_link_libraries (dbms PRIVATE ch_contrib::libdivide)
if (TARGET ch_contrib::jemalloc)
target_link_libraries (dbms PRIVATE ch_contrib::jemalloc)
endif()

# Include path from shell cmd "python3 -m pybind11 --includes"
execute_process(COMMAND python3 -m pybind11 --includes
OUTPUT_VARIABLE PYBIND11_INCLUDES
OUTPUT_STRIP_TRAILING_WHITESPACE
)

# Extract and set include directories specifically for StoragePython.cpp
string(REGEX MATCHALL "-I([^ ]+)" INCLUDE_DIRS_MATCHES ${PYBIND11_INCLUDES})
set(PYTHON_INCLUDE_DIRS "")
foreach(INCLUDE_DIR_MATCH ${INCLUDE_DIRS_MATCHES})
string(REGEX REPLACE "-I" "" INCLUDE_DIR_MATCH ${INCLUDE_DIR_MATCH})
# Accumulate all include directories
set(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIRS};${INCLUDE_DIR_MATCH}")
endforeach()

# Apply the include directories to only StoragePython.cpp
set_source_files_properties(Storages/StoragePython.cpp PROPERTIES INCLUDE_DIRECTORIES "${PYTHON_INCLUDE_DIRS}")

# get python version, something like python3.x
execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))"
OUTPUT_VARIABLE PYTHON_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
)

# remove all warning, because pybind11 will generate a lot of warning
if (OS_LINUX)
# pybind11 will try to find x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h
# use -idirafter to make it find the right one and not polute the include path
# set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS
# "-w -idirafter /usr/include -include x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h"
# )
if (PYTHON_VERSION STREQUAL "python3.6" OR PYTHON_VERSION STREQUAL "python3.7" OR PYTHON_VERSION STREQUAL "python3.8")
set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS
"-w -idirafter /usr/include -include crypt.h"
)
else()
set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS
"-w"
)
endif()
elseif (OS_DARWIN)
set_source_files_properties(Storages/StoragePython.cpp PROPERTIES COMPILE_FLAGS
"-w"
)
endif()

set (all_modules dbms)

macro (dbms_target_include_directories)
Expand Down
48 changes: 48 additions & 0 deletions src/Storages/StoragePython.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "StoragePython.h"
#include "pybind11/embed.h"

namespace py = pybind11;

namespace DB
{

StoragePython::StoragePython(
const StorageID & table_id_, const String & python_class_name_, const ColumnsDescription & columns_, ContextPtr context_)
: IStorage(table_id_), python_class_name(python_class_name_)
{
// Initialize the Python interpreter and pybind11
py::scoped_interpreter guard{}; // Ensure the Python interpreter is initialized only once

// Load the user's Python class
py::module_ user_module = py::module_::import("user_module_name");
python_class_instance = user_module.attr(python_class_name.c_str())();
}

Pipe StoragePython::read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum,
size_t max_block_size,
size_t /*num_streams*/)
{
// Here, a simple call to the Python `read` method would be made, and its results used.
// Actual implementation would depend on how you wish to handle the data conversion.
py::bytes result = python_class_instance.attr("read")(max_block_size);
// Transform `result` to a ClickHouse `Pipe` object
// This part is left as an exercise for the reader
}

SinkToStoragePtr StoragePython::write(
const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool /*async_insert*/)
{
// Similarly, a simple call to the Python `write` method would be made here.
// This example does not include error handling or data transformation for brevity.
// python_class_instance.attr("write")(data_to_write);

// For demonstration, this does not actually write but shows how you might call the method.
// Actual data writing logic and conversion to suitable types would need to be implemented.
}

}
38 changes: 38 additions & 0 deletions src/Storages/StoragePython.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <memory>
#include <Storages/IStorage.h>
#include "pybind11/pybind11.h"

namespace py = pybind11;

namespace DB
{

class StoragePython final : public IStorage
{
public:
StoragePython(const StorageID & table_id_, const String & python_class_name_, const ColumnsDescription & columns_, ContextPtr context_);

std::string getName() const override { return "Python"; }

// Override the read method in IStorage
Pipe read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams) override;

// Override the write method in IStorage
SinkToStoragePtr
write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override;

private:
String python_class_name;
py::object python_class_instance; // To store the instance of the Python class
};

}

0 comments on commit e0d90b4

Please sign in to comment.