Skip to content

Commit 2f2397a

Browse files
committed
GIL less scanDataToChunk
1 parent c136178 commit 2f2397a

File tree

7 files changed

+298
-107
lines changed

7 files changed

+298
-107
lines changed

src/Common/PythonUtils.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -79,32 +79,32 @@ const char * ConvertPyUnicodeToUtf8(const void * input, int kind, size_t codepoi
7979
return output_buffer;
8080
}
8181

82-
const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
82+
const char * GetPyUtf8StrData(PyObject * obj, size_t & buf_len)
8383
{
8484
// See: https://github.com/python/cpython/blob/3.9/Include/cpython/unicodeobject.h#L81
85-
if (PyUnicode_IS_COMPACT_ASCII(obj.ptr()))
85+
if (PyUnicode_IS_COMPACT_ASCII(obj))
8686
{
87-
const char * data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj.ptr()));
88-
buf_len = PyUnicode_GET_LENGTH(obj.ptr());
87+
const char * data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj));
88+
buf_len = PyUnicode_GET_LENGTH(obj);
8989
return data;
9090
}
9191
else
9292
{
93-
PyCompactUnicodeObject * unicode = reinterpret_cast<PyCompactUnicodeObject *>(obj.ptr());
93+
PyCompactUnicodeObject * unicode = reinterpret_cast<PyCompactUnicodeObject *>(obj);
9494
if (unicode->utf8 != nullptr)
9595
{
9696
// It's utf8 string, treat it like ASCII
9797
const char * data = reinterpret_cast<const char *>(unicode->utf8);
9898
buf_len = unicode->utf8_length;
9999
return data;
100100
}
101-
else if (PyUnicode_IS_COMPACT(obj.ptr()))
101+
else if (PyUnicode_IS_COMPACT(obj))
102102
{
103-
auto kind = PyUnicode_KIND(obj.ptr());
103+
auto kind = PyUnicode_KIND(obj);
104104
// if (kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND)
105105
// {
106106
// // always convert it to utf8
107-
// const char * data = PyUnicode_AsUTF8AndSize(obj.ptr(), &unicode->utf8_length);
107+
// const char * data = PyUnicode_AsUTF8AndSize(obj, &unicode->utf8_length);
108108
// buf_len = unicode->utf8_length;
109109
// // set the utf8 buffer back
110110
// unicode->utf8 = const_cast<char *>(data);
@@ -114,16 +114,16 @@ const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
114114
size_t codepoint_cnt;
115115

116116
if (kind == PyUnicode_1BYTE_KIND)
117-
data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj.ptr()));
117+
data = reinterpret_cast<const char *>(PyUnicode_1BYTE_DATA(obj));
118118
else if (kind == PyUnicode_2BYTE_KIND)
119-
data = reinterpret_cast<const char *>(PyUnicode_2BYTE_DATA(obj.ptr()));
119+
data = reinterpret_cast<const char *>(PyUnicode_2BYTE_DATA(obj));
120120
else if (kind == PyUnicode_4BYTE_KIND)
121-
data = reinterpret_cast<const char *>(PyUnicode_4BYTE_DATA(obj.ptr()));
121+
data = reinterpret_cast<const char *>(PyUnicode_4BYTE_DATA(obj));
122122
else
123123
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported unicode kind {}", kind);
124124
// always convert it to utf8, and we can't use as function provided by CPython because it requires GIL
125125
// holded by the caller. So we have to do it manually with libicu
126-
codepoint_cnt = PyUnicode_GET_LENGTH(obj.ptr());
126+
codepoint_cnt = PyUnicode_GET_LENGTH(obj);
127127
data = ConvertPyUnicodeToUtf8(data, kind, codepoint_cnt, buf_len);
128128
unicode->utf8 = const_cast<char *>(data);
129129
unicode->utf8_length = buf_len;
@@ -133,7 +133,7 @@ const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len)
133133
{
134134
// always convert it to utf8, but this case is rare, here goes the slow path
135135
py::gil_scoped_acquire acquire;
136-
const char * data = PyUnicode_AsUTF8AndSize(obj.ptr(), &unicode->utf8_length);
136+
const char * data = PyUnicode_AsUTF8AndSize(obj, &unicode->utf8_length);
137137
buf_len = unicode->utf8_length;
138138
// set the utf8 buffer back
139139
unicode->utf8 = const_cast<char *>(data);
@@ -167,8 +167,9 @@ const void * tryGetPyArray(const py::object & obj, py::handle & result, std::str
167167
{
168168
// Return the handle of py::array directly
169169
row_count = py::len(obj);
170-
result = obj;
171-
return obj.cast<py::array>().data();
170+
py::array array = obj.cast<py::array>();
171+
result = array;
172+
return array.data();
172173
}
173174
else if (type_name == "Series")
174175
{
@@ -187,6 +188,8 @@ const void * tryGetPyArray(const py::object & obj, py::handle & result, std::str
187188
return array.data();
188189
}
189190

191+
// chdb todo: maybe convert list to py::array?
192+
190193
return nullptr;
191194
}
192195
}

src/Common/PythonUtils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ auto execWithGIL(Func func, Args &&... args) -> decltype(func(std::forward<Args>
3939
// 4 for 4-byte characters (Assume UCS-4/UTF-32)
4040
const char * ConvertPyUnicodeToUtf8(const void * input, int kind, size_t codepoint_cnt, size_t & output_size);
4141

42-
const char * GetPyUtf8StrData(const py::handle & obj, size_t & buf_len);
42+
const char * GetPyUtf8StrData(PyObject * obj, size_t & buf_len);
4343

4444

45-
inline const char * GetPyUtf8StrDataWithGIL(const py::handle & obj, size_t & buf_len)
45+
inline const char * GetPyUtf8StrDataWithGIL(PyObject * obj, size_t & buf_len)
4646
{
4747
return execWithGIL([&]() { return GetPyUtf8StrData(obj, buf_len); });
4848
}

0 commit comments

Comments
 (0)