From 5d55874959c45c5c23e6f25f6cae30a589662509 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 30 Oct 2023 21:56:37 -0400 Subject: [PATCH] Add test with null bitmap --- tests/ffi.test.ts | 41 ++++++++++++++++++++++++++++++++- tests/pyarrow_generate_data.py | 10 ++++++++ tests/table.arrow | Bin 3250 -> 3426 bytes tests/utils.ts | 29 +++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/tests/ffi.test.ts b/tests/ffi.test.ts index a5818f4..c3be397 100644 --- a/tests/ffi.test.ts +++ b/tests/ffi.test.ts @@ -2,7 +2,12 @@ import { readFileSync } from "fs"; import { describe, it, expect } from "vitest"; import * as arrow from "apache-arrow"; import * as wasm from "rust-arrow-ffi"; -import { arrowTableToFFI, arraysEqual, loadIPCTableFromDisk } from "./utils"; +import { + arrowTableToFFI, + arraysEqual, + loadIPCTableFromDisk, + validityEqual, +} from "./utils"; import { parseField, parseVector } from "../src"; import { Type } from "../src/types"; @@ -622,3 +627,37 @@ describe("date32", (t) => { // expect(originalVector.get(i), wasmVector.get(i)); // } // }); + +describe("nullable int", (t) => { + function test(copy: boolean) { + let columnIndex = TEST_TABLE.schema.fields.findIndex( + (field) => field.name == "nullable_int" + ); + + const originalField = TEST_TABLE.schema.fields[columnIndex]; + // declare it's not null + const originalVector = TEST_TABLE.getChildAt(columnIndex) as arrow.Vector; + const fieldPtr = FFI_TABLE.schemaAddr(columnIndex); + const field = parseField(WASM_MEMORY.buffer, fieldPtr); + + expect(field.name).toStrictEqual(originalField.name); + expect(field.typeId).toStrictEqual(originalField.typeId); + expect(field.nullable).toStrictEqual(originalField.nullable); + + const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex); + const wasmVector = parseVector( + WASM_MEMORY.buffer, + arrayPtr, + field.type, + copy + ); + + expect( + validityEqual(originalVector, wasmVector), + "validity should be equal" + ).toBeTruthy(); + } + + it("copy=false", () => test(false)); + it("copy=true", () => test(true)); +}); diff --git a/tests/pyarrow_generate_data.py b/tests/pyarrow_generate_data.py index 03c6606..a5613f6 100644 --- a/tests/pyarrow_generate_data.py +++ b/tests/pyarrow_generate_data.py @@ -115,6 +115,15 @@ def timestamp_array() -> pa.Array: return arr +def nullable_int() -> pa.Array: + # True means null + mask = [True, False, True] + arr = pa.array([1, 2, 3], type=pa.uint8(), mask=mask) + assert isinstance(arr, pa.UInt8Array) + assert not arr[0].is_valid + return arr + + class MyExtensionType(pa.ExtensionType): """ Refer to https://arrow.apache.org/docs/python/extending_types.html for @@ -160,6 +169,7 @@ def table() -> pa.Table: "date32": date32_array(), "date64": date64_array(), "timestamp": timestamp_array(), + "nullable_int": nullable_int(), } ) diff --git a/tests/table.arrow b/tests/table.arrow index 2326863470bd956f100a7594e0728ee073d23070..ec0d119b13d9b433ce67c158e5a642483a96e375 100644 GIT binary patch delta 476 zcmdla`ACY_F(}AC+>n9cKM;6K>ff0nlBL2n7I}N>QEw delta 285 zcmaDPwMmlKF(}AC+>n9cKM)8^0SO_~lkaetL@I!s1_TaJvp}vu29&v; K%K~UTBt!t{syQ( return true; } + +export function validityEqual(v1: arrow.Vector, v2: arrow.Vector): boolean { + if (v1.length !== v2.length) { + return false; + } + + if (v1.data.length !== v2.data.length) { + console.log("todo: support different data lengths"); + return false; + } + for (let i = 0; i < v1.data.length; i++) { + const d1 = v1.data[i]; + const d2 = v2.data[i]; + // Check that null bitmaps have same length + if (d1 !== null && d2 !== null) { + if (d1.nullBitmap.length !== d2.nullBitmap.length) { + return false; + } + } + } + + for (let i = 0; i < v1.length; i++) { + if (v1.isValid(i) !== v2.isValid(i)) { + return false; + } + } + + return true; +}