Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arrow batches fields #1925

Open
wants to merge 41 commits into
base: SNOW-873466-arrow-batches
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
4d64dab
Initial version
sfc-gh-astachowski Aug 23, 2024
986967d
Initial tests
sfc-gh-astachowski Aug 26, 2024
c9f6287
Formatting
sfc-gh-astachowski Aug 26, 2024
0fc7b5a
Import formatting
sfc-gh-astachowski Aug 26, 2024
b409510
Added missing interface definitions
sfc-gh-astachowski Aug 26, 2024
1de2c39
Implemented review feedback
sfc-gh-astachowski Aug 27, 2024
969c59c
Further review feedback
sfc-gh-astachowski Aug 28, 2024
f12fdeb
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Aug 30, 2024
a1cfe08
Added handling of remaining types
sfc-gh-astachowski Sep 3, 2024
53638df
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Sep 3, 2024
0fd7d0f
Add null time zone check
sfc-gh-astachowski Sep 3, 2024
73d6b4d
Removed timestamp support
sfc-gh-astachowski Sep 3, 2024
7424acf
Added timestamp support
sfc-gh-astachowski Sep 3, 2024
3dd20a9
Formatting
sfc-gh-astachowski Sep 3, 2024
28fb57a
Removed old comments
sfc-gh-astachowski Sep 3, 2024
afa1142
Fixed memory leak and added assertion of no leaks in tests.
sfc-gh-astachowski Sep 3, 2024
7c9ab7a
Fixed memory leaks and added assertions of no memory leaks.
sfc-gh-astachowski Sep 3, 2024
31bb11b
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Sep 6, 2024
ec255ee
Merge branch 'arrow-batches-all-simple-types' into arrow-batches-time…
sfc-gh-astachowski Sep 6, 2024
69cfe59
Merge fixes
sfc-gh-astachowski Sep 6, 2024
e26e56c
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Sep 6, 2024
1c34bb9
Added null check
sfc-gh-astachowski Sep 6, 2024
de52645
Merge branch 'arrow-batches-all-simple-types' into arrow-batches-time…
sfc-gh-astachowski Sep 6, 2024
ef5238c
Formatting
sfc-gh-astachowski Sep 6, 2024
f4ace2d
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Sep 9, 2024
660c0de
Merge branch 'arrow-batches-all-simple-types' into arrow-batches-time…
sfc-gh-astachowski Sep 9, 2024
45de13f
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-all-simpl…
sfc-gh-astachowski Sep 20, 2024
44e608d
Merge fixes
sfc-gh-astachowski Sep 20, 2024
400fd13
Added try-with-resources statements
sfc-gh-astachowski Sep 20, 2024
56eb83e
Merge branch 'arrow-batches-all-simple-types' into arrow-batches-time…
sfc-gh-astachowski Sep 20, 2024
318bc4f
Merge fixes and introduced constants
sfc-gh-astachowski Sep 24, 2024
c5af779
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-timestamps
sfc-gh-astachowski Sep 25, 2024
82a68b7
Added closing vector in finally block and try-with-resources
sfc-gh-astachowski Sep 25, 2024
5cd55a7
Added extra tests and related fixes
sfc-gh-astachowski Sep 25, 2024
23b79d3
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-timestamps
sfc-gh-astachowski Oct 16, 2024
52e641d
Added proper field handling
sfc-gh-astachowski Oct 17, 2024
66f4bc7
Merge branch 'SNOW-873466-arrow-batches' into arrow-batches-fields
sfc-gh-astachowski Oct 17, 2024
113ffc2
Added FieldType to VarCharVector init
sfc-gh-astachowski Oct 22, 2024
245d420
Added a test for correct field types in map conversion
sfc-gh-astachowski Oct 22, 2024
39ecf66
Fix offset buffer issue in ListConverter
sfc-gh-astachowski Oct 22, 2024
c55f461
Formatting
sfc-gh-astachowski Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class BigIntVectorConverter extends SimpleArrowFullVectorConverter<BigIntVector> {
Expand All @@ -21,14 +23,20 @@ public BigIntVectorConverter(
super(allocator, vector, context, session, idx);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Int(64, true), null);
}

@Override
protected boolean matchingType() {
return (vector instanceof BigIntVector);
}

@Override
protected BigIntVector initVector() {
BigIntVector resultVector = new BigIntVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
BigIntVector resultVector =
new BigIntVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class BinaryVectorConverter extends SimpleArrowFullVectorConverter<VarBinaryVector> {
Expand All @@ -20,14 +22,20 @@ public BinaryVectorConverter(
super(allocator, vector, context, session, idx);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Binary(), null);
}

@Override
protected boolean matchingType() {
return vector instanceof VarBinaryVector;
}

@Override
protected VarBinaryVector initVector() {
VarBinaryVector resultVector = new VarBinaryVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
VarBinaryVector resultVector =
new VarBinaryVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class BitVectorConverter extends SimpleArrowFullVectorConverter<BitVector> {
Expand All @@ -21,14 +23,19 @@ public BitVectorConverter(
super(allocator, vector, context, session, idx);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Bool(), null);
}

@Override
protected boolean matchingType() {
return vector instanceof BitVector;
}

@Override
protected BitVector initVector() {
BitVector resultVector = new BitVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
BitVector resultVector = new BitVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class DateVectorConverter extends SimpleArrowFullVectorConverter<DateDayVector> {
Expand All @@ -25,14 +28,20 @@ public DateVectorConverter(
this.timeZone = timeZone;
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Date(DateUnit.DAY), null);
}

@Override
protected boolean matchingType() {
return vector instanceof DateDayVector;
}

@Override
protected DateDayVector initVector() {
DateDayVector resultVector = new DateDayVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
DateDayVector resultVector =
new DateDayVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class DecimalVectorConverter extends SimpleArrowFullVectorConverter<DecimalVector> {
Expand All @@ -21,6 +23,10 @@ public DecimalVectorConverter(
super(allocator, vector, context, session, idx);
}

private static FieldType getFieldType(boolean nullable, int scale, int precision) {
return new FieldType(nullable, new ArrowType.Decimal(precision, scale, 128), null);
}

@Override
protected boolean matchingType() {
return (vector instanceof DecimalVector);
Expand All @@ -32,7 +38,9 @@ protected DecimalVector initVector() {
String precisionString = vector.getField().getMetadata().get("precision");
int scale = Integer.parseInt(scaleString);
int precision = Integer.parseInt(precisionString);
DecimalVector resultVector = new DecimalVector(vector.getName(), allocator, precision, scale);
boolean nullable = vector.getField().isNullable();
DecimalVector resultVector =
new DecimalVector(vector.getName(), getFieldType(nullable, scale, precision), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class FixedSizeListVectorConverter extends AbstractFullVectorConverter {
Expand Down Expand Up @@ -41,6 +43,10 @@ public class FixedSizeListVectorConverter extends AbstractFullVectorConverter {
this.valueTargetType = valueTargetType;
}

private static FieldType getFieldType(boolean nullable, int size) {
return new FieldType(nullable, new ArrowType.FixedSizeList(size), null);
}

@Override
protected FieldVector convertVector()
throws SFException, SnowflakeSQLException, SFArrowException {
Expand All @@ -50,8 +56,12 @@ protected FieldVector convertVector()
FieldVector convertedDataVector =
ArrowFullVectorConverterUtil.convert(
allocator, dataVector, context, session, timeZoneToUse, 0, valueTargetType);

boolean nullable = vector.getField().isNullable();
int listSize = listVector.getListSize();
FixedSizeListVector convertedListVector =
FixedSizeListVector.empty(listVector.getName(), listVector.getListSize(), allocator);
new FixedSizeListVector(
listVector.getName(), allocator, getFieldType(nullable, listSize), null);
ArrayList<Field> fields = new ArrayList<>();
fields.add(convertedDataVector.getField());
convertedListVector.initializeChildrenFromFields(fields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class FloatVectorConverter extends SimpleArrowFullVectorConverter<Float8Vector> {
Expand All @@ -26,9 +29,16 @@ protected boolean matchingType() {
return vector instanceof Float8Vector;
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(
nullable, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
}

@Override
protected Float8Vector initVector() {
Float8Vector resultVector = new Float8Vector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
Float8Vector resultVector =
new Float8Vector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class IntVectorConverter extends SimpleArrowFullVectorConverter<IntVector> {
Expand All @@ -26,9 +28,14 @@ protected boolean matchingType() {
return (vector instanceof IntVector);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Int(32, true), null);
}

@Override
protected IntVector initVector() {
IntVector resultVector = new IntVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
IntVector resultVector = new IntVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class ListVectorConverter extends AbstractFullVectorConverter {
Expand Down Expand Up @@ -41,8 +43,13 @@ public class ListVectorConverter extends AbstractFullVectorConverter {
this.valueTargetType = valueTargetType;
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.List(), null);
}

protected ListVector initVector(String name, Field field) {
ListVector convertedListVector = ListVector.empty(name, allocator);
boolean nullable = vector.getField().isNullable();
ListVector convertedListVector = new ListVector(name, allocator, getFieldType(nullable), null);
ArrayList<Field> fields = new ArrayList<>();
fields.add(field);
convertedListVector.initializeChildrenFromFields(fields);
Expand All @@ -58,8 +65,7 @@ protected FieldVector convertVector()
FieldVector convertedDataVector =
ArrowFullVectorConverterUtil.convert(
allocator, dataVector, context, session, timeZoneToUse, 0, valueTargetType);
// TODO: change to convertedDataVector and make all necessary changes to make it work
ListVector convertedListVector = initVector(vector.getName(), dataVector.getField());
ListVector convertedListVector = initVector(vector.getName(), convertedDataVector.getField());
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the line that required the whole change - if we don't keep nullability properly, mapVector will complain about it's grandchildren being nullable. As the grandchildren may be of any type, all converters were affected.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's add some tests to show the improvement

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a unit test checking if FieldTypes behave as expected when converting a MapVector

convertedListVector.allocateNew();
convertedListVector.setValueCount(listVector.getValueCount());
convertedListVector.getOffsetBuffer().setBytes(0, listVector.getOffsetBuffer());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class MapVectorConverter extends ListVectorConverter {
Expand All @@ -25,9 +27,15 @@ public class MapVectorConverter extends ListVectorConverter {
super(allocator, vector, context, session, timeZoneToUse, idx, valueTargetType);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Map(false), null);
}

@Override
protected ListVector initVector(String name, Field field) {
MapVector convertedMapVector = MapVector.empty(name, allocator, false);
boolean nullable = vector.getField().isNullable();
MapVector convertedMapVector =
new MapVector(vector.getName(), allocator, getFieldType(nullable), null);
ArrayList<Field> fields = new ArrayList<>();
fields.add(field);
convertedMapVector.initializeChildrenFromFields(fields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.FieldType;

@SnowflakeJdbcInternalApi
public class SmallIntVectorConverter extends SimpleArrowFullVectorConverter<SmallIntVector> {
Expand All @@ -21,14 +23,20 @@ public SmallIntVectorConverter(
super(allocator, vector, context, session, idx);
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Int(16, true), null);
}

@Override
protected boolean matchingType() {
return (vector instanceof SmallIntVector);
}

@Override
protected SmallIntVector initVector() {
SmallIntVector resultVector = new SmallIntVector(vector.getName(), allocator);
boolean nullable = vector.getField().isNullable();
SmallIntVector resultVector =
new SmallIntVector(vector.getName(), getFieldType(nullable), allocator);
resultVector.allocateNew(vector.getValueCount());
return resultVector;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.TransferPair;

@SnowflakeJdbcInternalApi
Expand Down Expand Up @@ -45,6 +47,10 @@ public class StructVectorConverter extends AbstractFullVectorConverter {
this.targetTypes = targetTypes;
}

private static FieldType getFieldType(boolean nullable) {
return new FieldType(nullable, new ArrowType.Struct(), null);
}

protected FieldVector convertVector()
throws SFException, SnowflakeSQLException, SFArrowException {
try {
Expand All @@ -63,7 +69,10 @@ protected FieldVector convertVector()

List<Field> convertedFields =
convertedVectors.stream().map(ValueVector::getField).collect(Collectors.toList());
StructVector converted = StructVector.empty(vector.getName(), allocator);

boolean nullable = vector.getField().isNullable();
StructVector converted =
new StructVector(vector.getName(), allocator, getFieldType(nullable), null);
converted.allocateNew();
converted.initializeChildrenFromFields(convertedFields);
for (FieldVector convertedVector : convertedVectors) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.TimeMicroVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.TimeUnit;

@SnowflakeJdbcInternalApi
public class TimeMicroVectorConverter extends TimeVectorConverter<TimeMicroVector> {
Expand All @@ -12,9 +13,20 @@ public TimeMicroVectorConverter(RootAllocator allocator, ValueVector vector) {
super(allocator, vector);
}

@Override
protected TimeUnit getTimeUnit() {
return TimeUnit.MICROSECOND;
}

@Override
protected int getWidth() {
return 64;
}

@Override
protected TimeMicroVector initVector() {
return new TimeMicroVector(vector.getName(), allocator);
return new TimeMicroVector(
vector.getName(), getFieldType(vector.getField().isNullable()), allocator);
}

@Override
Expand Down
Loading