Skip to content

Commit dabe5af

Browse files
authored
Basic Python encoding (#2551)
Make a few changes to support runtime-defined Python Vortex arrays. I think I will rename encoding back to array in PyVortex for consistency with Rust APIs in the next PR.
1 parent 165ce7a commit dabe5af

File tree

50 files changed

+439
-132
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+439
-132
lines changed

docs/api/python/encodings.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,5 @@ Compressed Encodings
108108
:members:
109109

110110

111-
.. autoclass:: vortex.FastLanesForEncoding
111+
.. autoclass:: vortex.FastLanesFoREncoding
112112
:members:

encodings/alp/src/alp/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use vortex_array::arrays::PrimitiveArray;
44
use vortex_array::patches::Patches;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::variants::PrimitiveArrayTrait;
7-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
7+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
88
use vortex_array::{
99
Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayRef, ArrayStatisticsImpl,
1010
ArrayValidityImpl, ArrayVariantsImpl, Canonical, Encoding, EncodingId, SerdeMetadata,
@@ -27,11 +27,16 @@ pub struct ALPArray {
2727

2828
pub struct ALPEncoding;
2929
impl Encoding for ALPEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.alp");
3130
type Array = ALPArray;
3231
type Metadata = SerdeMetadata<ALPMetadata>;
3332
}
3433

34+
impl EncodingVTable for ALPEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.alp")
37+
}
38+
}
39+
3540
impl ALPArray {
3641
// TODO(ngates): remove try_new and panic on wrong DType?
3742
pub fn try_new(

encodings/alp/src/alp_rd/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use vortex_array::arrays::PrimitiveArray;
44
use vortex_array::patches::Patches;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
7-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
7+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
88
use vortex_array::{
99
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1010
Canonical, Encoding, EncodingId, SerdeMetadata, ToCanonical,
@@ -30,11 +30,16 @@ pub struct ALPRDArray {
3030

3131
pub struct ALPRDEncoding;
3232
impl Encoding for ALPRDEncoding {
33-
const ID: EncodingId = EncodingId::new_ref("vortex.alprd");
3433
type Array = ALPRDArray;
3534
type Metadata = SerdeMetadata<ALPRDMetadata>;
3635
}
3736

37+
impl EncodingVTable for ALPRDEncoding {
38+
fn id(&self) -> EncodingId {
39+
EncodingId::new_ref("vortex.alprd")
40+
}
41+
}
42+
3843
impl ALPRDArray {
3944
pub fn try_new(
4045
dtype: DType,

encodings/bytebool/src/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::arrays::BoolArray;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::BoolArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayStatisticsImpl, ArrayValidityImpl,
1111
ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, EncodingId, try_from_array_ref,
@@ -27,11 +27,16 @@ try_from_array_ref!(ByteBoolArray);
2727

2828
pub struct ByteBoolEncoding;
2929
impl Encoding for ByteBoolEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.bytebool");
3130
type Array = ByteBoolArray;
3231
type Metadata = EmptyMetadata;
3332
}
3433

34+
impl EncodingVTable for ByteBoolEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.bytebool")
37+
}
38+
}
39+
3540
impl ByteBoolArray {
3641
pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
3742
let length = buffer.len();

encodings/datetime-parts/src/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::compute::try_cast;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::ExtensionArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl, ArrayVariantsImpl,
1111
Encoding, EncodingId, RkyvMetadata,
@@ -27,11 +27,16 @@ pub struct DateTimePartsArray {
2727

2828
pub struct DateTimePartsEncoding;
2929
impl Encoding for DateTimePartsEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.datetimeparts");
3130
type Array = DateTimePartsArray;
3231
type Metadata = RkyvMetadata<DateTimePartsMetadata>;
3332
}
3433

34+
impl EncodingVTable for DateTimePartsEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.datetimeparts")
37+
}
38+
}
39+
3540
impl DateTimePartsArray {
3641
pub fn try_new(
3742
dtype: DType,

encodings/dict/src/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::builders::ArrayBuilder;
55
use vortex_array::compute::{scalar_at, take, take_into, try_cast};
66
use vortex_array::stats::{ArrayStats, StatsSetRef};
77
use vortex_array::variants::PrimitiveArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1111
Canonical, Encoding, EncodingId, IntoArray, RkyvMetadata, ToCanonical,
@@ -25,11 +25,16 @@ pub struct DictArray {
2525

2626
pub struct DictEncoding;
2727
impl Encoding for DictEncoding {
28-
const ID: EncodingId = EncodingId::new_ref("vortex.dict");
2928
type Array = DictArray;
3029
type Metadata = RkyvMetadata<DictMetadata>;
3130
}
3231

32+
impl EncodingVTable for DictEncoding {
33+
fn id(&self) -> EncodingId {
34+
EncodingId::new_ref("vortex.dict")
35+
}
36+
}
37+
3338
impl DictArray {
3439
pub fn try_new(mut codes: ArrayRef, values: ArrayRef) -> VortexResult<Self> {
3540
if !codes.dtype().is_unsigned_int() {

encodings/fastlanes/src/bitpacking/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use vortex_array::patches::Patches;
88
use vortex_array::stats::{ArrayStats, StatsSetRef};
99
use vortex_array::validity::Validity;
1010
use vortex_array::variants::PrimitiveArrayTrait;
11-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
11+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
1212
use vortex_array::{
1313
Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayStatisticsImpl, ArrayValidityImpl,
1414
ArrayVariantsImpl, Canonical, Encoding, EncodingId, RkyvMetadata, try_from_array_ref,
@@ -40,11 +40,16 @@ try_from_array_ref!(BitPackedArray);
4040

4141
pub struct BitPackedEncoding;
4242
impl Encoding for BitPackedEncoding {
43-
const ID: EncodingId = EncodingId::new_ref("fastlanes.bitpacked");
4443
type Array = BitPackedArray;
4544
type Metadata = RkyvMetadata<BitPackedMetadata>;
4645
}
4746

47+
impl EncodingVTable for BitPackedEncoding {
48+
fn id(&self) -> EncodingId {
49+
EncodingId::new_ref("fastlanes.bitpacked")
50+
}
51+
}
52+
4853
/// NB: All non-null values in the patches array are considered patches
4954
impl BitPackedArray {
5055
/// Create a new bitpacked array using a buffer of packed data.

encodings/fastlanes/src/delta/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::arrays::PrimitiveArray;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::PrimitiveArrayTrait;
8-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
8+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1111
ArrayVariantsImpl, Canonical, Encoding, EncodingId, RkyvMetadata,
@@ -34,11 +34,16 @@ pub struct DeltaArray {
3434

3535
pub struct DeltaEncoding;
3636
impl Encoding for DeltaEncoding {
37-
const ID: EncodingId = EncodingId::new_ref("fastlanes.delta");
3837
type Array = DeltaArray;
3938
type Metadata = RkyvMetadata<DeltaMetadata>;
4039
}
4140

41+
impl EncodingVTable for DeltaEncoding {
42+
fn id(&self) -> EncodingId {
43+
EncodingId::new_ref("fastlanes.delta")
44+
}
45+
}
46+
4247
/// A FastLanes-style delta-encoded array of primitive values.
4348
///
4449
/// A [`DeltaArray`] comprises a sequence of _chunks_ each representing 1,024 delta-encoded values,

encodings/fastlanes/src/for/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::fmt::Debug;
33
pub use compress::*;
44
use vortex_array::stats::{ArrayStats, StatsSetRef};
55
use vortex_array::variants::PrimitiveArrayTrait;
6-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
6+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
77
use vortex_array::{
88
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
99
ArrayVariantsImpl, Canonical, Encoding, EncodingId,
@@ -28,11 +28,16 @@ pub struct FoRArray {
2828

2929
pub struct FoREncoding;
3030
impl Encoding for FoREncoding {
31-
const ID: EncodingId = EncodingId::new_ref("fastlanes.for");
3231
type Array = FoRArray;
3332
type Metadata = ScalarValueMetadata;
3433
}
3534

35+
impl EncodingVTable for FoREncoding {
36+
fn id(&self) -> EncodingId {
37+
EncodingId::new_ref("fastlanes.for")
38+
}
39+
}
40+
3641
impl FoRArray {
3742
pub fn try_new(encoded: ArrayRef, reference: Scalar) -> VortexResult<Self> {
3843
if reference.is_null() {

encodings/fsst/src/array.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use fsst::{Decompressor, Symbol};
22
use vortex_array::arrays::VarBinEncoding;
33
use vortex_array::stats::{ArrayStats, StatsSetRef};
44
use vortex_array::variants::{BinaryArrayTrait, Utf8ArrayTrait};
5-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
5+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
66
use vortex_array::{
77
Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl, ArrayVariantsImpl,
88
Encoding, EncodingId, SerdeMetadata, ToCanonical,
@@ -25,11 +25,16 @@ pub struct FSSTArray {
2525

2626
pub struct FSSTEncoding;
2727
impl Encoding for FSSTEncoding {
28-
const ID: EncodingId = EncodingId::new_ref("vortex.fsst");
2928
type Array = FSSTArray;
3029
type Metadata = SerdeMetadata<FSSTMetadata>;
3130
}
3231

32+
impl EncodingVTable for FSSTEncoding {
33+
fn id(&self) -> EncodingId {
34+
EncodingId::new_ref("vortex.fsst")
35+
}
36+
}
37+
3338
pub(crate) static SYMBOLS_DTYPE: DType = DType::Primitive(PType::U64, Nullability::NonNullable);
3439
pub(crate) static SYMBOL_LENS_DTYPE: DType = DType::Primitive(PType::U8, Nullability::NonNullable);
3540

@@ -74,7 +79,7 @@ impl FSSTArray {
7479
vortex_bail!(InvalidArgument: "uncompressed_lengths must have integer type and cannot be nullable, found {}", uncompressed_lengths.dtype());
7580
}
7681

77-
if codes.encoding() != VarBinEncoding::ID {
82+
if codes.encoding() != VarBinEncoding.id() {
7883
vortex_bail!(
7984
InvalidArgument: "codes must have varbin encoding, was {}",
8085
codes.encoding()

encodings/fsst/tests/fsst_tests.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
use vortex_array::arrays::builder::VarBinBuilder;
44
use vortex_array::compute::{filter, scalar_at, slice, take};
5-
use vortex_array::{Array, ArrayRef, Encoding, IntoArray, ToCanonical};
5+
use vortex_array::vtable::EncodingVTable;
6+
use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
67
use vortex_buffer::buffer;
78
use vortex_dtype::{DType, Nullability};
89
use vortex_fsst::{FSSTEncoding, fsst_compress, fsst_train_compressor};
@@ -52,7 +53,7 @@ fn test_fsst_array_ops() {
5253

5354
// test slice
5455
let fsst_sliced = slice(&fsst_array, 1, 3).unwrap();
55-
assert_eq!(fsst_sliced.encoding(), FSSTEncoding::ID);
56+
assert_eq!(fsst_sliced.encoding(), FSSTEncoding.id());
5657
assert_eq!(fsst_sliced.len(), 2);
5758
assert_nth_scalar!(
5859
fsst_sliced,
@@ -84,7 +85,7 @@ fn test_fsst_array_ops() {
8485
let mask = Mask::from_iter([false, true, false]);
8586

8687
let fsst_filtered = filter(&fsst_array, &mask).unwrap();
87-
assert_eq!(fsst_filtered.encoding(), FSSTEncoding::ID);
88+
assert_eq!(fsst_filtered.encoding(), FSSTEncoding.id());
8889
assert_eq!(fsst_filtered.len(), 1);
8990
assert_nth_scalar!(
9091
fsst_filtered,

encodings/runend/src/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use vortex_array::compute::{
66
};
77
use vortex_array::stats::{ArrayStats, StatsSetRef};
88
use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait};
9-
use vortex_array::vtable::VTableRef;
9+
use vortex_array::vtable::{EncodingVTable, VTableRef};
1010
use vortex_array::{
1111
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1212
ArrayVariantsImpl, Canonical, Encoding, EncodingId, IntoArray, SerdeMetadata, ToCanonical,
@@ -33,11 +33,16 @@ try_from_array_ref!(RunEndArray);
3333

3434
pub struct RunEndEncoding;
3535
impl Encoding for RunEndEncoding {
36-
const ID: EncodingId = EncodingId::new_ref("vortex.runend");
3736
type Array = RunEndArray;
3837
type Metadata = SerdeMetadata<RunEndMetadata>;
3938
}
4039

40+
impl EncodingVTable for RunEndEncoding {
41+
fn id(&self) -> EncodingId {
42+
EncodingId::new_ref("vortex.runend")
43+
}
44+
}
45+
4146
impl RunEndArray {
4247
pub fn try_new(ends: ArrayRef, values: ArrayRef) -> VortexResult<Self> {
4348
let length = if ends.is_empty() {

encodings/sparse/src/lib.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::compute::{scalar_at, sub_scalar};
55
use vortex_array::patches::Patches;
66
use vortex_array::stats::{ArrayStats, Stat, StatsSet, StatsSetRef};
77
use vortex_array::variants::PrimitiveArrayTrait;
8-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
8+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl, Encoding, EncodingId,
1111
RkyvMetadata, ToCanonical, try_from_array_ref,
@@ -33,11 +33,16 @@ try_from_array_ref!(SparseArray);
3333

3434
pub struct SparseEncoding;
3535
impl Encoding for SparseEncoding {
36-
const ID: EncodingId = EncodingId::new_ref("vortex.sparse");
3736
type Array = SparseArray;
3837
type Metadata = RkyvMetadata<SparseMetadata>;
3938
}
4039

40+
impl EncodingVTable for SparseEncoding {
41+
fn id(&self) -> EncodingId {
42+
EncodingId::new_ref("vortex.sparse")
43+
}
44+
}
45+
4146
impl SparseArray {
4247
pub fn try_new(
4348
indices: ArrayRef,

encodings/zigzag/src/array.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use vortex_array::arrays::PrimitiveArray;
22
use vortex_array::stats::{ArrayStats, Precision, Stat, StatsSet, StatsSetRef};
33
use vortex_array::variants::PrimitiveArrayTrait;
4-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
4+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
55
use vortex_array::{
66
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
77
ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, EncodingId, ToCanonical,
@@ -26,11 +26,16 @@ try_from_array_ref!(ZigZagArray);
2626

2727
pub struct ZigZagEncoding;
2828
impl Encoding for ZigZagEncoding {
29-
const ID: EncodingId = EncodingId::new_ref("vortex.zigzag");
3029
type Array = ZigZagArray;
3130
type Metadata = EmptyMetadata;
3231
}
3332

33+
impl EncodingVTable for ZigZagEncoding {
34+
fn id(&self) -> EncodingId {
35+
EncodingId::new_ref("vortex.zigzag")
36+
}
37+
}
38+
3439
impl ZigZagArray {
3540
pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
3641
let encoded_dtype = encoded.dtype().clone();

fuzz/fuzz_targets/array_ops.rs

+8-7
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use vortex_array::arrays::{
99
use vortex_array::compute::{
1010
SearchResult, SearchSortedSide, filter, scalar_at, search_sorted, slice, take,
1111
};
12-
use vortex_array::{Array, ArrayRef, Encoding};
12+
use vortex_array::vtable::EncodingVTable;
13+
use vortex_array::{Array, ArrayRef};
1314
use vortex_btrblocks::BtrBlocksCompressor;
1415
use vortex_fuzz::{Action, FuzzArrayAction, sort_canonical_array};
1516
use vortex_scalar::Scalar;
@@ -40,12 +41,12 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
4041
// TODO(robert): Ideally we'd preserve the encoding perfectly but this is close enough
4142
let mut sorted = sort_canonical_array(&current_array).unwrap();
4243
if !HashSet::from([
43-
PrimitiveEncoding::ID,
44-
VarBinEncoding::ID,
45-
VarBinViewEncoding::ID,
46-
BoolEncoding::ID,
47-
StructEncoding::ID,
48-
ListEncoding::ID,
44+
PrimitiveEncoding.id(),
45+
VarBinEncoding.id(),
46+
VarBinViewEncoding.id(),
47+
BoolEncoding.id(),
48+
StructEncoding.id(),
49+
ListEncoding.id(),
4950
])
5051
.contains(&current_array.encoding())
5152
{

0 commit comments

Comments
 (0)