27
27
import org .apache .arrow .c .CometSchemaImporter ;
28
28
import org .apache .arrow .memory .BufferAllocator ;
29
29
import org .apache .arrow .memory .RootAllocator ;
30
- import org .apache .arrow .vector .FieldVector ;
31
- import org .apache .arrow .vector .dictionary .Dictionary ;
32
- import org .apache .arrow .vector .types .pojo .DictionaryEncoding ;
33
30
import org .apache .parquet .column .ColumnDescriptor ;
34
- import org .apache .parquet .column .page .*;
35
- import org .apache .parquet .schema .LogicalTypeAnnotation ;
31
+ import org .apache .parquet .schema .Type ;
36
32
import org .apache .spark .sql .types .DataType ;
37
33
38
34
import org .apache .comet .vector .*;
@@ -65,6 +61,7 @@ public class NativeColumnReader extends AbstractColumnReader {
65
61
boolean hadNull ;
66
62
67
63
private final CometSchemaImporter importer ;
64
+ private final NativeUtil nativeUtil ;
68
65
69
66
private ArrowArray array = null ;
70
67
private ArrowSchema schema = null ;
@@ -76,14 +73,17 @@ public NativeColumnReader(
76
73
long nativeBatchHandle ,
77
74
int columnNum ,
78
75
DataType type ,
76
+ Type fieldType ,
79
77
ColumnDescriptor descriptor ,
80
78
CometSchemaImporter importer ,
79
+ NativeUtil nativeUtil ,
81
80
int batchSize ,
82
81
boolean useDecimal128 ,
83
82
boolean useLegacyDateTimestamp ) {
84
- super (type , descriptor , useDecimal128 , useLegacyDateTimestamp );
83
+ super (type , fieldType , descriptor , useDecimal128 , useLegacyDateTimestamp );
85
84
assert batchSize > 0 : "Batch size must be positive, found " + batchSize ;
86
85
this .batchSize = batchSize ;
86
+ this .nativeUtil = nativeUtil ;
87
87
this .importer = importer ;
88
88
this .nativeBatchHandle = nativeBatchHandle ;
89
89
this .columnNum = columnNum ;
@@ -94,13 +94,13 @@ public NativeColumnReader(
94
94
// Override in order to avoid creation of JVM side column readers
95
95
protected void initNative () {
96
96
LOG .debug (
97
- "Native column reader " + String .join ("." , this .descriptor . getPath ()) + " is initialized" );
97
+ "Native column reader {} is initialized" , String .join ("." , this .type . catalogString ()));
98
98
nativeHandle = 0 ;
99
99
}
100
100
101
101
@ Override
102
102
public void readBatch (int total ) {
103
- LOG .debug ("Reading column batch of size = " + total );
103
+ LOG .debug ("Reading column batch of size = {}" , total );
104
104
105
105
this .currentNumValues = total ;
106
106
}
@@ -131,10 +131,7 @@ public CometDecodedVector loadVector() {
131
131
currentVector .close ();
132
132
}
133
133
134
- LogicalTypeAnnotation logicalTypeAnnotation =
135
- descriptor .getPrimitiveType ().getLogicalTypeAnnotation ();
136
- boolean isUuid =
137
- logicalTypeAnnotation instanceof LogicalTypeAnnotation .UUIDLogicalTypeAnnotation ;
134
+ // TODO: ARROW NATIVE : Handle Uuid?
138
135
139
136
array = ArrowArray .allocateNew (ALLOCATOR );
140
137
schema = ArrowSchema .allocateNew (ALLOCATOR );
@@ -144,47 +141,19 @@ public CometDecodedVector loadVector() {
144
141
145
142
Native .currentColumnBatch (nativeBatchHandle , columnNum , arrayAddr , schemaAddr );
146
143
147
- FieldVector vector = importer .importVector (array , schema );
144
+ ArrowArray [] arrays = {array };
145
+ ArrowSchema [] schemas = {schema };
148
146
149
- DictionaryEncoding dictionaryEncoding = vector .getField ().getDictionary ();
150
-
151
- CometPlainVector cometVector = new CometPlainVector (vector , useDecimal128 );
147
+ CometDecodedVector cometVector =
148
+ (CometDecodedVector )
149
+ scala .collection .JavaConverters .seqAsJavaList (nativeUtil .importVector (arrays , schemas ))
150
+ .get (0 );
152
151
153
152
// Update whether the current vector contains any null values. This is used in the following
154
153
// batch(s) to determine whether we can skip loading the native vector.
155
154
hadNull = cometVector .hasNull ();
156
155
157
- if (dictionaryEncoding == null ) {
158
- if (dictionary != null ) {
159
- // This means the column was using dictionary encoding but now has fall-back to plain
160
- // encoding, on the native side. Setting 'dictionary' to null here, so we can use it as
161
- // a condition to check if we can re-use vector later.
162
- dictionary = null ;
163
- }
164
- // Either the column is not dictionary encoded, or it was using dictionary encoding but
165
- // a new data page has switched back to use plain encoding. For both cases we should
166
- // return plain vector.
167
- currentVector = cometVector ;
168
- return currentVector ;
169
- }
170
-
171
- // We should already re-initiate `CometDictionary` here because `Data.importVector` API will
172
- // release the previous dictionary vector and create a new one.
173
- Dictionary arrowDictionary = importer .getProvider ().lookup (dictionaryEncoding .getId ());
174
- CometPlainVector dictionaryVector =
175
- new CometPlainVector (arrowDictionary .getVector (), useDecimal128 , isUuid );
176
- if (dictionary != null ) {
177
- dictionary .setDictionaryVector (dictionaryVector );
178
- } else {
179
- dictionary = new CometDictionary (dictionaryVector );
180
- }
181
-
182
- currentVector =
183
- new CometDictionaryVector (
184
- cometVector , dictionary , importer .getProvider (), useDecimal128 , false , isUuid );
185
-
186
- currentVector =
187
- new CometDictionaryVector (cometVector , dictionary , importer .getProvider (), useDecimal128 );
156
+ currentVector = cometVector ;
188
157
return currentVector ;
189
158
}
190
159
}
0 commit comments