From a072c38a8a1624c9b8f5dd1a19c46c537a42f0e5 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 3 Dec 2023 22:39:30 -0500 Subject: [PATCH] Set copy to true by default (#71) --- README.md | 46 ++++++++++++++++++++++++++++++++++++++------- src/index.ts | 2 +- src/record-batch.ts | 38 ++++++++++++++++++++++++------------- src/vector.ts | 31 +++++++++++++++++++++++++++--- 4 files changed, 93 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index df6accb..9ccfa40 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,24 @@ const WASM_MEMORY: WebAssembly.Memory = ... const field = parseField(WASM_MEMORY.buffer, fieldPtr); ``` +### `parseData` + +Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct into an [`arrow.Data`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Data.html) instance. Multiple `Data` instances can be joined to make an [`arrow.Vector`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Vector.html). + +- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. +- `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located. +- `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Data` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example + +```ts +const WASM_MEMORY: WebAssembly.Memory = ... +const copiedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type); +// Make zero-copy views instead of copying array contents +const viewedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type, false); +``` + ### `parseVector` Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct into an [`arrow.Vector`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Vector.html) instance. Multiple `Vector` instances can be joined to make an [`arrow.Table`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Table.html). @@ -31,13 +49,15 @@ Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html - `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. - `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located. - `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`. -- `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example ```ts const WASM_MEMORY: WebAssembly.Memory = ... -const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type); -// Copy arrays into JS instead of creating views -const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type, true); +const copiedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type); +// Make zero-copy views instead of copying array contents +const viewedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type, false); ``` ### `parseRecordBatch` @@ -47,12 +67,24 @@ Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html - `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. - `arrayPtr` (`number`): The numeric pointer in `buffer` where the _array_ C struct is located. - `schemaPtr` (`number`): The numeric pointer in `buffer` where the _field_ C struct is located. -- `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example ```ts const WASM_MEMORY: WebAssembly.Memory = ... -// Pass `true` to copy arrays across the boundary instead of creating views. -const recordBatch = parseRecordBatch(WASM_MEMORY.buffer, arrayPtr, fieldPtr, true); +const copiedRecordBatch = parseRecordBatch( + WASM_MEMORY.buffer, + arrayPtr, + fieldPtr +); +// Pass `false` to view arrays across the boundary instead of creating copies. +const viewedRecordBatch = parseRecordBatch( + WASM_MEMORY.buffer, + arrayPtr, + fieldPtr, + false +); ``` ## Type Support diff --git a/src/index.ts b/src/index.ts index 97964c7..92c71ea 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,3 @@ -export { parseVector } from "./vector"; +export { parseVector, parseData } from "./vector"; export { parseField } from "./field"; export { parseRecordBatch } from "./record-batch"; diff --git a/src/record-batch.ts b/src/record-batch.ts index 02278a6..a677196 100644 --- a/src/record-batch.ts +++ b/src/record-batch.ts @@ -3,24 +3,36 @@ import { parseField } from "./field"; import { parseData } from "./vector"; /** - * Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct _plus_ an [`ArrowSchema`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure) C FFI struct into an `arrow.RecordBatch` instance. Note that the underlying array and field **must** be a `Struct` type. In essence a `Struct` array is used to mimic a `RecordBatch` while only being one array. - * - * - `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. - * - `arrayPtr` (`number`): The numeric pointer in `buffer` where the _array_ C struct is located. - * - `schemaPtr` (`number`): The numeric pointer in `buffer` where the _field_ C struct is located. - * - `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. - * - * ```ts - * const WASM_MEMORY: WebAssembly.Memory = ... - * // Pass `true` to copy arrays across the boundary instead of creating views. - * const recordBatch = parseRecordBatch(WASM_MEMORY.buffer, arrayPtr, fieldPtr, true); - * ``` +Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct _plus_ an [`ArrowSchema`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure) C FFI struct into an `arrow.RecordBatch` instance. Note that the underlying array and field **must** be a `Struct` type. In essence a `Struct` array is used to mimic a `RecordBatch` while only being one array. + +- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. +- `arrayPtr` (`number`): The numeric pointer in `buffer` where the _array_ C struct is located. +- `schemaPtr` (`number`): The numeric pointer in `buffer` where the _field_ C struct is located. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example + +```ts +const WASM_MEMORY: WebAssembly.Memory = ... +const copiedRecordBatch = parseRecordBatch( + WASM_MEMORY.buffer, + arrayPtr, + fieldPtr +); +// Pass `false` to view arrays across the boundary instead of creating copies. +const viewedRecordBatch = parseRecordBatch( + WASM_MEMORY.buffer, + arrayPtr, + fieldPtr, + false +); +``` */ export function parseRecordBatch( buffer: ArrayBuffer, arrayPtr: number, schemaPtr: number, - copy: boolean = false, + copy: boolean = true, ): arrow.RecordBatch { const field = parseField(buffer, schemaPtr); if (!isStructField(field)) { diff --git a/src/vector.ts b/src/vector.ts index 44a30e1..f7b7c15 100644 --- a/src/vector.ts +++ b/src/vector.ts @@ -10,23 +10,48 @@ Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html - `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. - `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located. - `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`. -- `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example + +```ts +const WASM_MEMORY: WebAssembly.Memory = ... +const copiedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type); +// Make zero-copy views instead of copying array contents +const viewedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type, false); */ export function parseVector( buffer: ArrayBuffer, ptr: number, dataType: T, - copy: boolean = false, + copy: boolean = true, ): arrow.Vector { const data = parseData(buffer, ptr, dataType, copy); return arrow.makeVector(data); } +/** +Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct into an [`arrow.Data`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Data.html) instance. Multiple `Data` instances can be joined to make an [`arrow.Vector`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Vector.html). + +- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from. +- `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located. +- `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`. +- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Data` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes. + +#### Example + +```ts +const WASM_MEMORY: WebAssembly.Memory = ... +const copiedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type); +// Make zero-copy views instead of copying array contents +const viewedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type, false); +``` + */ export function parseData( buffer: ArrayBuffer, ptr: number, dataType: T, - copy: boolean = false, + copy: boolean = true, ): arrow.Data { const dataView = new DataView(buffer);