@@ -26,7 +26,7 @@ from math import isnan
26
26
# Python imports
27
27
import bson
28
28
import numpy as np
29
- from pyarrow import timestamp, struct , field, scalar, FixedSizeBinaryScalar, StructScalar, array
29
+ from pyarrow import timestamp, struct , field
30
30
from pyarrow.lib import (
31
31
tobytes, StructType, int32, int64, float64, string, bool_, list_
32
32
)
@@ -485,15 +485,11 @@ cdef class ObjectIdBuilder(_ArrayBuilderBase):
485
485
cdef shared_ptr[CArray] out
486
486
with nogil:
487
487
self .builder.get().Finish(& out)
488
- result = pyarrow_wrap_array(out)
489
- for x in result:
490
- print (" CORRECT: " , result.type, type (result), x.type, type (x))
491
488
return pyarrow_wrap_array(out).cast(ObjectIdType())
492
489
493
490
cdef shared_ptr[CFixedSizeBinaryBuilder] unwrap(self ):
494
491
return self .builder
495
492
496
-
497
493
cdef class Int32Builder(_ArrayBuilderBase):
498
494
cdef:
499
495
shared_ptr[CInt32Builder] builder
@@ -725,6 +721,8 @@ cdef object get_field_builder(object field, object tzinfo):
725
721
field_builder = Decimal128Builder()
726
722
elif getattr (field_type, ' _type_marker' ) == _BsonArrowTypes.binary:
727
723
field_builder = BinaryBuilder(field_type.subtype)
724
+ elif getattr (field_type, ' _type_marker' ) == _BsonArrowTypes.code:
725
+ field_builder = CodeBuilder()
728
726
else :
729
727
field_builder = StringBuilder()
730
728
return field_builder
@@ -735,6 +733,7 @@ cdef class DocumentBuilder(_ArrayBuilderBase):
735
733
shared_ptr[CStructBuilder] builder
736
734
object dtype
737
735
object context
736
+ object builder_map
738
737
739
738
def __cinit__ (self , StructType dtype , tzinfo = None , MemoryPool memory_pool = None ):
740
739
cdef StringBuilder field_builder
@@ -747,11 +746,11 @@ cdef class DocumentBuilder(_ArrayBuilderBase):
747
746
748
747
self .context = context = PyMongoArrowContext(None , {})
749
748
context.tzinfo = tzinfo
750
- builder_map = context.builder_map
749
+ self . builder_map = context.builder_map
751
750
752
751
for field in dtype:
753
752
field_builder = < StringBuilder> get_field_builder(field, tzinfo)
754
- builder_map[field.name.encode(' utf-8' )] = field_builder
753
+ self . builder_map[field.name.encode(' utf-8' )] = field_builder
755
754
c_field_builders.push_back(< shared_ptr[CArrayBuilder]> field_builder.builder)
756
755
757
756
self .builder.reset(new CStructBuilder(pyarrow_unwrap_data_type(dtype), pool, c_field_builders))
@@ -784,26 +783,29 @@ cdef class DocumentBuilder(_ArrayBuilderBase):
784
783
cdef shared_ptr[CArray] out
785
784
with nogil:
786
785
self .builder.get().Finish(& out)
787
- wrapped = pyarrow_wrap_array(out)
788
- python_out = []
789
- for original in wrapped :
786
+
787
+ struct_array = pyarrow_wrap_array(out)
788
+ for struct_def in struct_array :
790
789
new_types = []
791
- new_names = list (original.keys())
792
- for fname, ftype in original.items():
793
- # new_names.append(fname)
794
- if isinstance (ftype, FixedSizeBinaryScalar) and ftype.type.byte_width == 12 : # ObjectIdType
795
- print (" TYPE: " , ftype, ftype.type, type (ftype))
790
+ new_names = list (struct_def.keys())
791
+ for fname, ftype in struct_def.items():
792
+ if type (self .builder_map[fname.encode(' utf-8' )]).__name__ == ObjectIdBuilder.__name__ : # ObjectIdType
796
793
new_ftype = ObjectIdType()
797
- # print("TYPE: ", new_ftype, new_ftype.storage_type, type(new_ftype))
798
- # print("ARRAY: ", array([(fname, ftype)]))
794
+ new_types.append(new_ftype)
795
+ elif type (self .builder_map[fname.encode(' utf-8' )]).__name__ == Decimal128Builder.__name__ : # Decimal128Type
796
+ new_ftype = Decimal128Type_()
797
+ new_types.append(new_ftype)
798
+ elif type (self .builder_map[fname.encode(' utf-8' )]).__name__ == BinaryBuilder.__name__ : # BinaryType
799
+ new_ftype = BinaryType(self .dtype.field(fname).type.subtype)
800
+ new_types.append(new_ftype)
801
+ elif type (self .builder_map[fname.encode(' utf-8' )]).__name__ == CodeBuilder.__name__ : # CodeType
802
+ new_ftype = CodeType()
799
803
new_types.append(new_ftype)
800
804
else :
801
805
new_types.append(ftype.type)
802
- python_out.append(struct (zip (new_names, new_types)))
803
- print (" BEFORE: " , wrapped, wrapped.type, type (wrapped))
804
- print (" AFTER: " , python_out[0 ], python_out, type (python_out[0 ]))
805
- print (" AFTER AFTER: " , array([], type = python_out[0 ]))
806
- return wrapped
806
+
807
+ new_dtype = struct (dict (zip (new_names, new_types)))
808
+ return struct_array.cast(new_dtype)
807
809
808
810
cdef shared_ptr[CStructBuilder] unwrap(self ):
809
811
return self .builder
0 commit comments