Skip to content

Commit

Permalink
GIL less scanDataToChunk
Browse files Browse the repository at this point in the history
  • Loading branch information
auxten committed May 31, 2024
1 parent c136178 commit 2f2397a
Show file tree
Hide file tree
Showing 7 changed files with 298 additions and 107 deletions.
33 changes: 18 additions & 15 deletions src/Common/PythonUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,32 +79,32 @@ const char * ConvertPyUnicodeToUtf8(const void * input, int kind, size_t codepoi
return output_buffer;
}

const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
const char * GetPyUtf8StrData(PyObject * obj, size_t & buf_len)
{
// See: https://github.com/python/cpython/blob/3.9/Include/cpython/unicodeobject.h#L81
if (PyUnicode_IS_COMPACT_ASCII(obj.ptr()))
if (PyUnicode_IS_COMPACT_ASCII(obj))
{
const char * data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj.ptr()));
buf_len = PyUnicode_GET_LENGTH(obj.ptr());
const char * data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj));
buf_len = PyUnicode_GET_LENGTH(obj);
return data;
}
else
{
PyCompactUnicodeObject * unicode = reinterpret_cast<PyCompactUnicodeObject *>(obj.ptr());
PyCompactUnicodeObject * unicode = reinterpret_cast<PyCompactUnicodeObject *>(obj);
if (unicode->utf8 != nullptr)
{
// It's utf8 string, treat it like ASCII
const char * data = reinterpret_cast<const char *>(unicode->utf8);
buf_len = unicode->utf8_length;
return data;
}
else if (PyUnicode_IS_COMPACT(obj.ptr()))
else if (PyUnicode_IS_COMPACT(obj))
{
auto kind = PyUnicode_KIND(obj.ptr());
auto kind = PyUnicode_KIND(obj);
// if (kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND)
// {
// // always convert it to utf8
// const char * data = PyUnicode_AsUTF8AndSize(obj.ptr(), &unicode->utf8_length);
// const char * data = PyUnicode_AsUTF8AndSize(obj, &unicode->utf8_length);
// buf_len = unicode->utf8_length;
// // set the utf8 buffer back
// unicode->utf8 = const_cast<char *>(data);
Expand All @@ -114,16 +114,16 @@ const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
size_t codepoint_cnt;

if (kind == PyUnicode_1BYTE_KIND)
data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj.ptr()));
data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj));
else if (kind == PyUnicode_2BYTE_KIND)
data = reinterpret_cast<const char *>(PyUnicode_2BYTE_DATA(obj.ptr()));
data = reinterpret_cast<const char *>(PyUnicode_2BYTE_DATA(obj));
else if (kind == PyUnicode_4BYTE_KIND)
data = reinterpret_cast<const char *>(PyUnicode_4BYTE_DATA(obj.ptr()));
data = reinterpret_cast<const char *>(PyUnicode_4BYTE_DATA(obj));
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported unicode kind {}", kind);
// always convert it to utf8, and we can't use as function provided by CPython because it requires GIL
// holded by the caller. So we have to do it manually with libicu
codepoint_cnt = PyUnicode_GET_LENGTH(obj.ptr());
codepoint_cnt = PyUnicode_GET_LENGTH(obj);
data = ConvertPyUnicodeToUtf8(data, kind, codepoint_cnt, buf_len);
unicode->utf8 = const_cast<char *>(data);
unicode->utf8_length = buf_len;
Expand All @@ -133,7 +133,7 @@ const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
{
// always convert it to utf8, but this case is rare, here goes the slow path
py::gil_scoped_acquire acquire;
const char * data = PyUnicode_AsUTF8AndSize(obj.ptr(), &unicode->utf8_length);
const char * data = PyUnicode_AsUTF8AndSize(obj, &unicode->utf8_length);
buf_len = unicode->utf8_length;
// set the utf8 buffer back
unicode->utf8 = const_cast<char *>(data);
Expand Down Expand Up @@ -167,8 +167,9 @@ const void * tryGetPyArray(const py::object & obj, py::handle & result, std::str
{
// Return the handle of py::array directly
row_count = py::len(obj);
result = obj;
return obj.cast<py::array>().data();
py::array array = obj.cast<py::array>();
result = array;
return array.data();
}
else if (type_name == "Series")
{
Expand All @@ -187,6 +188,8 @@ const void * tryGetPyArray(const py::object & obj, py::handle & result, std::str
return array.data();
}

// chdb todo: maybe convert list to py::array?

return nullptr;
}
}
4 changes: 2 additions & 2 deletions src/Common/PythonUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ auto execWithGIL(Func func, Args &&... args) -> decltype(func(std::forward<Args>
// 4 for 4-byte characters (Assume UCS-4/UTF-32)
const char * ConvertPyUnicodeToUtf8(const void * input, int kind, size_t codepoint_cnt, size_t & output_size);

const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len);
const char * GetPyUtf8StrData(PyObject * obj, size_t & buf_len);


inline const char * GetPyUtf8StrDataWithGIL(const py::handle & obj, size_t & buf_len)
inline const char * GetPyUtf8StrDataWithGIL(PyObject * obj, size_t & buf_len)
{
return execWithGIL([&]() { return GetPyUtf8StrData(obj, buf_len); });
}
Expand Down
Loading

0 comments on commit 2f2397a

Please sign in to comment.