1
+ #include < Columns/ColumnDecimal.h>
1
2
#include < Columns/ColumnString.h>
2
3
#include < Columns/IColumn.h>
4
+ #include < DataTypes/DataTypeDecimalBase.h>
3
5
#include < DataTypes/DataTypeString.h>
6
+ #include < DataTypes/DataTypesDecimal.h>
4
7
#include < DataTypes/DataTypesNumber.h>
8
+ #include < DataTypes/IDataType.h>
5
9
#include < Processors/Sources/PythonSource.h>
6
10
#include < Storages/StoragePython.h>
11
+ #include < base/Decimal.h>
7
12
#include < pybind11/gil.h>
8
13
#include < pybind11/pytypes.h>
9
14
#include < Common/Exception.h>
10
15
#include < Common/logger_useful.h>
16
+ #include < base/Decimal_fwd.h>
17
+ #include < base/types.h>
11
18
12
19
namespace DB
13
20
{
@@ -18,70 +25,50 @@ PythonSource::PythonSource(std::shared_ptr<PyReader> reader_, const Block & samp
18
25
}
19
26
20
27
template <typename T>
21
- ColumnPtr convert_and_insert (py::object obj)
28
+ ColumnPtr convert_and_insert (py::object obj, UInt32 scale = 0 )
22
29
{
23
- auto column = ColumnVector<T>::create ();
24
- // if obj is a list
25
- if (py::isinstance<py::list>(obj))
26
- {
27
- py::list list = obj.cast <py::list>();
28
- for (auto && i : list)
29
- column->insert (i.cast <T>());
30
- // free the list
31
- list.dec_ref ();
32
- }
33
- else if (py::isinstance<py::array>(obj)) // if obj is a numpy array
34
- {
35
- py::array array = obj.cast <py::array>();
36
- // chdb: array is a numpy array, so we can directly cast it to a vector?
37
- for (auto && i : array)
38
- column->insert (i.cast <T>());
39
- // free the array, until we implement with zero copy
40
- array.dec_ref ();
41
- }
30
+ MutableColumnPtr column;
31
+ if constexpr (std::is_same_v<T, DateTime64> || std::is_same_v<T, Decimal128> || std::is_same_v<T, Decimal256>)
32
+ column = ColumnDecimal<T>::create (0 , scale);
33
+ else if constexpr (std::is_same_v<T, String>)
34
+ column = ColumnString::create ();
42
35
else
43
- {
44
- throw Exception (ErrorCodes::BAD_TYPE_OF_FIELD, " Unsupported type {}" , obj.get_type ().attr (" __name__" ).cast <std::string>());
45
- }
46
- return column;
47
- }
36
+ column = ColumnVector<T>::create ();
48
37
49
- template <>
50
- ColumnPtr convert_and_insert<String>(py::object obj)
51
- {
52
- auto column = ColumnString::create ();
53
38
if (py::isinstance<py::list>(obj))
54
39
{
55
40
py::list list = obj.cast <py::list>();
56
41
for (auto && i : list)
57
- column->insert (i.cast <String>());
58
- // free the list
42
+ column->insert (i.cast <T>());
59
43
list.dec_ref ();
60
44
}
61
45
else if (py::isinstance<py::array>(obj))
62
46
{
63
47
py::array array = obj.cast <py::array>();
64
48
for (auto && i : array)
65
- column->insert (i.cast <String>());
66
- // free the array, until we implement with zero copy
49
+ column->insert (i.cast <T>());
67
50
array.dec_ref ();
68
51
}
69
52
else
70
53
{
71
- throw Exception (ErrorCodes::BAD_TYPE_OF_FIELD, " Unsupported type {}" , obj.get_type ().attr (" __name__" ).cast <std::string>());
54
+ throw Exception (
55
+ ErrorCodes::BAD_TYPE_OF_FIELD,
56
+ " Unsupported type {} for value {}" ,
57
+ obj.get_type ().attr (" __name__" ).cast <std::string>(),
58
+ py::str (obj).cast <std::string>());
72
59
}
73
60
return column;
74
61
}
75
62
76
63
Chunk PythonSource::generate ()
77
64
{
78
65
size_t num_rows = 0 ;
79
-
66
+ std::vector<py::object> data;
80
67
try
81
68
{
82
69
// GIL is held when called from Python code. Release it to avoid deadlock
83
70
py::gil_scoped_release release;
84
- std::vector<py::object> data = reader->read (description.sample_block .getNames (), max_block_size);
71
+ data = reader->read (description.sample_block .getNames (), max_block_size);
85
72
86
73
LOG_DEBUG (logger, " Read {} columns" , data.size ());
87
74
LOG_DEBUG (logger, " Need {} columns" , description.sample_block .columns ());
@@ -122,31 +109,58 @@ Chunk PythonSource::generate()
122
109
num_rows = py::len (data[i]);
123
110
const auto & column = data[i];
124
111
const auto & type = description.sample_block .getByPosition (i).type ;
112
+ WhichDataType which (type);
125
113
126
- if (type-> equals (*std::make_shared<DataTypeUInt8>() ))
114
+ if (which. isUInt8 ( ))
127
115
columns[i] = convert_and_insert<UInt8>(column);
128
- else if (type-> equals (*std::make_shared<DataTypeUInt16>() ))
116
+ else if (which. isUInt16 ( ))
129
117
columns[i] = convert_and_insert<UInt16>(column);
130
- else if (type-> equals (*std::make_shared<DataTypeUInt32>() ))
118
+ else if (which. isUInt32 ( ))
131
119
columns[i] = convert_and_insert<UInt32>(column);
132
- else if (type-> equals (*std::make_shared<DataTypeUInt64>() ))
120
+ else if (which. isUInt64 ( ))
133
121
columns[i] = convert_and_insert<UInt64>(column);
134
- else if (type->equals (*std::make_shared<DataTypeInt8>()))
122
+ else if (which.isUInt128 ())
123
+ columns[i] = convert_and_insert<UInt128>(column);
124
+ else if (which.isUInt256 ())
125
+ columns[i] = convert_and_insert<UInt256>(column);
126
+ else if (which.isInt8 ())
135
127
columns[i] = convert_and_insert<Int8>(column);
136
- else if (type-> equals (*std::make_shared<DataTypeInt16>() ))
128
+ else if (which. isInt16 ( ))
137
129
columns[i] = convert_and_insert<Int16>(column);
138
- else if (type-> equals (*std::make_shared<DataTypeInt32>() ))
130
+ else if (which. isInt32 ( ))
139
131
columns[i] = convert_and_insert<Int32>(column);
140
- else if (type-> equals (*std::make_shared<DataTypeInt64>() ))
132
+ else if (which. isInt64 ( ))
141
133
columns[i] = convert_and_insert<Int64>(column);
142
- else if (type->equals (*std::make_shared<DataTypeFloat32>()))
134
+ else if (which.isInt128 ())
135
+ columns[i] = convert_and_insert<Int128>(column);
136
+ else if (which.isInt256 ())
137
+ columns[i] = convert_and_insert<Int256>(column);
138
+ else if (which.isFloat32 ())
143
139
columns[i] = convert_and_insert<Float32>(column);
144
- else if (type-> equals (*std::make_shared<DataTypeFloat64>() ))
140
+ else if (which. isFloat64 ( ))
145
141
columns[i] = convert_and_insert<Float64>(column);
146
- else if (type->equals (*std::make_shared<DataTypeString>()))
142
+ else if (which.isDecimal128 ())
143
+ {
144
+ const auto & dtype = typeid_cast<const DataTypeDecimal<Decimal128> *>(type.get ());
145
+ columns[i] = convert_and_insert<Decimal128>(column, dtype->getScale ());
146
+ }
147
+ else if (which.isDecimal256 ())
148
+ {
149
+ const auto & dtype = typeid_cast<const DataTypeDecimal<Decimal256> *>(type.get ());
150
+ columns[i] = convert_and_insert<Decimal256>(column, dtype->getScale ());
151
+ }
152
+ else if (which.isDateTime ())
153
+ columns[i] = convert_and_insert<UInt32>(column);
154
+ else if (which.isDateTime64 ())
155
+ columns[i] = convert_and_insert<DateTime64>(column);
156
+ else if (which.isString ())
147
157
columns[i] = convert_and_insert<String>(column);
148
158
else
149
- throw Exception (ErrorCodes::BAD_TYPE_OF_FIELD, " Unsupported type {}" , type->getName ());
159
+ throw Exception (
160
+ ErrorCodes::BAD_TYPE_OF_FIELD,
161
+ " Unsupported type {} for column {}" ,
162
+ type->getName (),
163
+ description.sample_block .getByPosition (i).name );
150
164
}
151
165
// Set data vector to empty to avoid trigger py::object destructor without GIL
152
166
// Note: we have already manually decremented the reference count of the list or array in `convert_and_insert` function
0 commit comments