Skip to content

Commit ded97ef

Browse files
authored
PyVortex array subclasses (#2194)
Downcast a PyArray to an encoding-specific array when we return it.
1 parent 947a464 commit ded97ef

File tree

20 files changed

+371
-194
lines changed

20 files changed

+371
-194
lines changed

docs/python/api/arrays.rst

+47-4
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ functions, serialized, and otherwise manipulated as a generic array.
77

88
There are two ways of "downcasting" an array for more specific access patterns:
99

10-
1. Into an encoding-specific array, like `vortex.encoding.BitPackedArray`.vortex.
10+
1. Into an encoding-specific array, like `vortex.BitPackedArray`.vortex.
1111
2. Into a type-specific array, like `vortex.array.BoolTypeArray`.
1212

13-
Be careful to note that :class:`vortex.encoding.BoolArray` represents an array that stores physical data
13+
Be careful to note that :class:`vortex.BoolArray` represents an array that stores physical data
1414
as a bit-buffer of booleans, vs `vortex.array.BoolTypeArray` which represents any array that has a logical
1515
type of boolean.
1616

@@ -20,9 +20,52 @@ Factory Functions
2020
.. autofunction:: vortex.array
2121

2222

23-
Type Classes
24-
------------
23+
Base Class
24+
----------
2525

2626
.. autoclass:: vortex.Array
2727
:members:
2828
:special-members: __len__
29+
30+
31+
Builtin Encodings
32+
-----------------
33+
34+
.. autoclass:: vortex.ChunkedArray
35+
:members:
36+
37+
38+
.. autoclass:: vortex.ConstantArray
39+
:members:
40+
41+
42+
.. autoclass:: vortex.NullArray
43+
:members:
44+
45+
46+
.. autoclass:: vortex.BoolArray
47+
:members:
48+
49+
50+
.. autoclass:: vortex.PrimitiveArray
51+
:members:
52+
53+
54+
.. autoclass:: vortex.VarBinArray
55+
:members:
56+
57+
58+
.. autoclass:: vortex.VarBinViewArray
59+
:members:
60+
61+
62+
.. autoclass:: vortex.StructArray
63+
:members:
64+
65+
66+
.. autoclass:: vortex.ListArray
67+
:members:
68+
69+
70+
.. autoclass:: vortex.ExtensionArray
71+
:members:

docs/python/api/compress.rst

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Compression
2+
===========
3+
4+
.. autofunction:: vortex.compress

docs/python/api/encoding.rst

-14
This file was deleted.

docs/python/api/index.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ Python API
66

77
arrays
88
dtypes
9-
encoding
9+
scalars
10+
expr
11+
compress
1012
io
1113
dataset
12-
expr
13-
scalars

pyvortex/python/vortex/__init__.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
11
from . import _lib
2+
from ._lib.arrays import (
3+
BoolArray,
4+
ChunkedArray,
5+
ConstantArray,
6+
ExtensionArray,
7+
ListArray,
8+
NullArray,
9+
PrimitiveArray,
10+
StructArray,
11+
VarBinArray,
12+
VarBinViewArray,
13+
)
214
from ._lib.compress import compress
315
from ._lib.dtype import (
416
BinaryDType,
@@ -37,9 +49,20 @@
3749
assert _lib, "Ensure we eagerly import the Vortex native library"
3850

3951
__all__ = [
40-
"Array",
4152
"array",
4253
"compress",
54+
# Arrays and builtin encodings
55+
"Array",
56+
"ConstantArray",
57+
"ChunkedArray",
58+
"NullArray",
59+
"BoolArray",
60+
"PrimitiveArray",
61+
"VarBinArray",
62+
"VarBinViewArray",
63+
"StructArray",
64+
"ListArray",
65+
"ExtensionArray",
4366
# DTypes
4467
"DType",
4568
"NullDType",

pyvortex/python/vortex/_lib/arrays.pyi

+34-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any
1+
from typing import Any, final
22

33
import numpy as np
44
import pandas as pd
@@ -32,3 +32,36 @@ class Array:
3232
def to_polars_dataframe(self) -> pl.DataFrame: ...
3333
def to_polars_series(self) -> pl.Series: ...
3434
def to_pylist(self) -> list: ...
35+
36+
@final
37+
class NullArray(Array): ...
38+
39+
@final
40+
class BoolArray(Array): ...
41+
42+
@final
43+
class PrimitiveArray(Array): ...
44+
45+
@final
46+
class VarBinArray(Array): ...
47+
48+
@final
49+
class VarBinViewArray(Array): ...
50+
51+
@final
52+
class StructArray(Array):
53+
def field(self, name: str) -> Array: ...
54+
55+
@final
56+
class ListArray(Array): ...
57+
58+
@final
59+
class ExtensionArray(Array): ...
60+
61+
@final
62+
class ConstantArray(Array):
63+
def scalar(self) -> vx.Scalar: ...
64+
65+
@final
66+
class ChunkedArray(Array):
67+
def chunks(self) -> list[Array]: ...
+1-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,5 @@
1-
from typing import Any, final
1+
from typing import Any
22

33
import pyarrow as pa
44

5-
import vortex as vx
6-
75
def _encode(obj: Any) -> pa.Array: ...
8-
@final
9-
class BoolArray(vx.Array):
10-
def __new__(cls, array: vx.Array) -> BoolArray: ...
11-
def true_count(self) -> int: ...

pyvortex/python/vortex/encoding.py

-3
This file was deleted.
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
use itertools::Itertools;
2+
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
3+
use vortex::array::ChunkedEncoding;
4+
5+
use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};
6+
7+
/// Concrete class for arrays with `vortex.chunked` encoding.
8+
#[pyclass(name = "ChunkedArray", module = "vortex", extends=PyArray, frozen)]
9+
pub(crate) struct PyChunkedArray;
10+
11+
impl ArraySubclass for PyChunkedArray {
12+
type Encoding = ChunkedEncoding;
13+
}
14+
15+
#[pymethods]
16+
impl PyChunkedArray {
17+
pub fn chunks(self_: PyRef<'_, Self>) -> PyResult<Vec<Bound<'_, PyArray>>> {
18+
self_
19+
.as_array_ref()
20+
.chunks()
21+
.map(|chunk| PyArray::init(self_.py(), chunk))
22+
.try_collect()
23+
}
24+
}
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
2+
use vortex::array::ConstantEncoding;
3+
4+
use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};
5+
use crate::scalar::PyScalar;
6+
7+
/// Concrete class for arrays with `vortex.constant` encoding.
8+
#[pyclass(name = "ConstantArray", module = "vortex", extends=PyArray, frozen)]
9+
pub(crate) struct PyConstantArray;
10+
11+
impl ArraySubclass for PyConstantArray {
12+
type Encoding = ConstantEncoding;
13+
}
14+
15+
#[pymethods]
16+
impl PyConstantArray {
17+
/// Return the scalar value of the constant array.
18+
pub fn scalar(self_: PyRef<'_, Self>) -> PyResult<Bound<PyScalar>> {
19+
PyScalar::init(self_.py(), self_.as_array_ref().scalar())
20+
}
21+
}

pyvortex/src/arrays/builtins/mod.rs

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
mod chunked;
2+
mod constant;
3+
mod struct_;
4+
5+
pub(crate) use chunked::*;
6+
pub(crate) use constant::*;
7+
use pyo3::prelude::*;
8+
pub(crate) use struct_::*;
9+
10+
use crate::arrays::PyArray;
11+
12+
/// Concrete class for arrays with `vortex.null` encoding.
13+
#[pyclass(name = "NullArray", module = "vortex", extends=PyArray, frozen)]
14+
pub(crate) struct PyNullArray;
15+
16+
/// Concrete class for arrays with `vortex.bool` encoding.
17+
#[pyclass(name = "BoolArray", module = "vortex", extends=PyArray, frozen)]
18+
pub(crate) struct PyBoolArray;
19+
20+
/// Concrete class for arrays with `vortex.primitive` encoding.
21+
#[pyclass(name = "PrimitiveArray", module = "vortex", extends=PyArray, frozen)]
22+
pub(crate) struct PyPrimitiveArray;
23+
24+
/// Concrete class for arrays with `vortex.varbin` encoding.
25+
#[pyclass(name = "VarBinArray", module = "vortex", extends=PyArray, frozen)]
26+
pub(crate) struct PyVarBinArray;
27+
28+
/// Concrete class for arrays with `vortex.varbinview` encoding.
29+
#[pyclass(name = "VarBinViewArray", module = "vortex", extends=PyArray, frozen)]
30+
pub(crate) struct PyVarBinViewArray;
31+
32+
/// Concrete class for arrays with `vortex.list` encoding.
33+
#[pyclass(name = "ListArray", module = "vortex", extends=PyArray, frozen)]
34+
pub(crate) struct PyListArray;
35+
36+
/// Concrete class for arrays with `vortex.ext` encoding.
37+
#[pyclass(name = "ExtensionArray", module = "vortex", extends=PyArray, frozen)]
38+
pub(crate) struct PyExtensionArray;
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use pyo3::exceptions::PyKeyError;
2+
use pyo3::{pyclass, pymethods, Bound, PyRef, PyResult};
3+
use vortex::array::StructEncoding;
4+
use vortex::variants::StructArrayTrait;
5+
6+
use crate::arrays::{ArraySubclass, AsArrayRef, PyArray};
7+
8+
/// Concrete class for arrays with `vortex.struct` encoding.
9+
#[pyclass(name = "StructArray", module = "vortex", extends=PyArray, frozen)]
10+
pub(crate) struct PyStructArray;
11+
12+
impl ArraySubclass for PyStructArray {
13+
type Encoding = StructEncoding;
14+
}
15+
16+
#[pymethods]
17+
impl PyStructArray {
18+
/// Returns the given field of the struct array.
19+
pub fn field<'py>(self_: PyRef<'py, Self>, name: &str) -> PyResult<Bound<'py, PyArray>> {
20+
let field = self_
21+
.as_array_ref()
22+
.maybe_null_field_by_name(name)
23+
.ok_or_else(|| PyKeyError::new_err(format!("Field name not found: {}", name)))?;
24+
PyArray::init(self_.py(), field)
25+
}
26+
}

0 commit comments

Comments
 (0)