From a2efb4d6381b66a568b0cac15c17b120f0c3ef4f Mon Sep 17 00:00:00 2001 From: ritchie Date: Wed, 11 Oct 2023 16:50:14 +0200 Subject: [PATCH] add arrow-rs --- Cargo.toml | 4 ++++ crates/nano-arrow/Cargo.toml | 8 ++++---- crates/nano-arrow/src/array/binary/mod.rs | 2 +- crates/nano-arrow/src/array/boolean/mod.rs | 2 +- crates/nano-arrow/src/array/dictionary/mod.rs | 2 +- .../src/array/fixed_size_binary/mod.rs | 2 +- .../nano-arrow/src/array/fixed_size_list/mod.rs | 2 +- crates/nano-arrow/src/array/list/mod.rs | 2 +- crates/nano-arrow/src/array/map/mod.rs | 2 +- crates/nano-arrow/src/array/mod.rs | 16 ++++++++-------- crates/nano-arrow/src/array/null.rs | 2 +- crates/nano-arrow/src/array/primitive/mod.rs | 2 +- crates/nano-arrow/src/array/struct_/mod.rs | 2 +- crates/nano-arrow/src/array/union/mod.rs | 2 +- crates/nano-arrow/src/array/utf8/mod.rs | 2 +- crates/nano-arrow/src/bitmap/immutable.rs | 4 ++-- crates/nano-arrow/src/buffer/immutable.rs | 4 ++-- crates/nano-arrow/src/buffer/mod.rs | 6 +++--- crates/nano-arrow/src/datatypes/field.rs | 10 +++++----- crates/nano-arrow/src/datatypes/mod.rs | 16 ++++++++-------- crates/polars-core/Cargo.toml | 2 ++ crates/polars-core/src/series/mod.rs | 9 +++++++++ 22 files changed, 59 insertions(+), 44 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 32ff9f44fae5..278cff6816de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,10 @@ polars-json = { version = "0.33.2", path = "crates/polars-json", default-feature polars = { version = "0.33.2", path = "crates/polars", default-features = false } rand_distr = "0.4" reqwest = { version = "0.11", default-features = false } +arrow-array = {version = ">=41", default-features = false} +arrow-buffer = { version = ">=41", default-features = false } +arrow-data = { version = ">=41", default-features = false } +arrow-schema = { version = ">=41", default-features = false } [workspace.dependencies.arrow] package = "nano-arrow" diff --git a/crates/nano-arrow/Cargo.toml b/crates/nano-arrow/Cargo.toml index 659b16d9c8b6..641f569a86c5 100644 --- a/crates/nano-arrow/Cargo.toml +++ b/crates/nano-arrow/Cargo.toml @@ -68,10 +68,10 @@ multiversion = { workspace = true, optional = true } ahash = { workspace = true } # Support conversion to/from arrow-rs -arrow-array = { version = ">=40", optional = true } -arrow-buffer = { version = ">=40", optional = true } -arrow-data = { version = ">=40", optional = true } -arrow-schema = { version = ">=40", optional = true } +arrow-array = { workspace = true, optional = true } +arrow-buffer = { workspace = true, optional = true } +arrow-data = { workspace = true, optional = true } +arrow-schema = { workspace = true, optional = true } # parquet support [dependencies.parquet2] diff --git a/crates/nano-arrow/src/array/binary/mod.rs b/crates/nano-arrow/src/array/binary/mod.rs index 22432e059635..94cebf85ca4a 100644 --- a/crates/nano-arrow/src/array/binary/mod.rs +++ b/crates/nano-arrow/src/array/binary/mod.rs @@ -20,7 +20,7 @@ pub use mutable_values::*; mod mutable; pub use mutable::*; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; /// A [`BinaryArray`] is Arrow's semantically equivalent of an immutable `Vec>>`. diff --git a/crates/nano-arrow/src/array/boolean/mod.rs b/crates/nano-arrow/src/array/boolean/mod.rs index 4a13b1893ab1..c1dd4785231e 100644 --- a/crates/nano-arrow/src/array/boolean/mod.rs +++ b/crates/nano-arrow/src/array/boolean/mod.rs @@ -7,7 +7,7 @@ use crate::datatypes::{DataType, PhysicalType}; use crate::error::Error; use crate::trusted_len::TrustedLen; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/dictionary/mod.rs b/crates/nano-arrow/src/array/dictionary/mod.rs index 056621461734..48d2334509e0 100644 --- a/crates/nano-arrow/src/array/dictionary/mod.rs +++ b/crates/nano-arrow/src/array/dictionary/mod.rs @@ -9,7 +9,7 @@ use crate::scalar::{new_scalar, Scalar}; use crate::trusted_len::TrustedLen; use crate::types::NativeType; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/fixed_size_binary/mod.rs b/crates/nano-arrow/src/array/fixed_size_binary/mod.rs index 40c7bce26a93..14bd2aa1e512 100644 --- a/crates/nano-arrow/src/array/fixed_size_binary/mod.rs +++ b/crates/nano-arrow/src/array/fixed_size_binary/mod.rs @@ -4,7 +4,7 @@ use crate::buffer::Buffer; use crate::datatypes::DataType; use crate::error::Error; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/fixed_size_list/mod.rs b/crates/nano-arrow/src/array/fixed_size_list/mod.rs index c4e401549f74..40eb5016b9b7 100644 --- a/crates/nano-arrow/src/array/fixed_size_list/mod.rs +++ b/crates/nano-arrow/src/array/fixed_size_list/mod.rs @@ -3,7 +3,7 @@ use crate::bitmap::Bitmap; use crate::datatypes::{DataType, Field}; use crate::error::Error; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/list/mod.rs b/crates/nano-arrow/src/array/list/mod.rs index 69d0bca10beb..f021deb4d7da 100644 --- a/crates/nano-arrow/src/array/list/mod.rs +++ b/crates/nano-arrow/src/array/list/mod.rs @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field}; use crate::error::Error; use crate::offset::{Offset, Offsets, OffsetsBuffer}; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/map/mod.rs b/crates/nano-arrow/src/array/map/mod.rs index 7f8d6b02e100..abc7993fd7d4 100644 --- a/crates/nano-arrow/src/array/map/mod.rs +++ b/crates/nano-arrow/src/array/map/mod.rs @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field}; use crate::error::Error; use crate::offset::OffsetsBuffer; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/mod.rs b/crates/nano-arrow/src/array/mod.rs index 17b611b85092..4e8e9a2177f2 100644 --- a/crates/nano-arrow/src/array/mod.rs +++ b/crates/nano-arrow/src/array/mod.rs @@ -399,7 +399,7 @@ pub fn new_null_array(data_type: DataType, length: usize) -> Box { /// Trait providing bi-directional conversion between arrow2 [`Array`] and arrow-rs [`ArrayData`] /// /// [`ArrayData`]: arrow_data::ArrayData -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] pub trait Arrow2Arrow: Array { /// Convert this [`Array`] into [`ArrayData`] fn to_data(&self) -> arrow_data::ArrayData; @@ -408,7 +408,7 @@ pub trait Arrow2Arrow: Array { fn from_data(data: &arrow_data::ArrayData) -> Self; } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] macro_rules! to_data_dyn { ($array:expr, $ty:ty) => {{ let f = |x: &$ty| x.to_data(); @@ -416,28 +416,28 @@ macro_rules! to_data_dyn { }}; } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From> for arrow_array::ArrayRef { fn from(value: Box) -> Self { value.as_ref().into() } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From<&dyn Array> for arrow_array::ArrayRef { fn from(value: &dyn Array) -> Self { arrow_array::make_array(to_data(value)) } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for Box { fn from(value: arrow_array::ArrayRef) -> Self { value.as_ref().into() } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From<&dyn arrow_array::Array> for Box { fn from(value: &dyn arrow_array::Array) -> Self { from_data(&value.to_data()) @@ -445,7 +445,7 @@ impl From<&dyn arrow_array::Array> for Box { } /// Convert an arrow2 [`Array`] to [`arrow_data::ArrayData`] -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { use crate::datatypes::PhysicalType::*; match array.data_type().to_physical_type() { @@ -474,7 +474,7 @@ pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { } /// Convert an [`arrow_data::ArrayData`] to arrow2 [`Array`] -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] pub fn from_data(data: &arrow_data::ArrayData) -> Box { use crate::datatypes::PhysicalType::*; let data_type: DataType = data.data_type().clone().into(); diff --git a/crates/nano-arrow/src/array/null.rs b/crates/nano-arrow/src/array/null.rs index e7e14426efbe..4bbd11e8805d 100644 --- a/crates/nano-arrow/src/array/null.rs +++ b/crates/nano-arrow/src/array/null.rs @@ -178,7 +178,7 @@ impl FromFfi for NullArray { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod arrow { use arrow_data::{ArrayData, ArrayDataBuilder}; diff --git a/crates/nano-arrow/src/array/primitive/mod.rs b/crates/nano-arrow/src/array/primitive/mod.rs index c487d41eec96..b3d649a670be 100644 --- a/crates/nano-arrow/src/array/primitive/mod.rs +++ b/crates/nano-arrow/src/array/primitive/mod.rs @@ -9,7 +9,7 @@ use crate::error::Error; use crate::trusted_len::TrustedLen; use crate::types::{days_ms, f16, i256, months_days_ns, NativeType}; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/struct_/mod.rs b/crates/nano-arrow/src/array/struct_/mod.rs index 73a0c5741976..8107c885e4ef 100644 --- a/crates/nano-arrow/src/array/struct_/mod.rs +++ b/crates/nano-arrow/src/array/struct_/mod.rs @@ -3,7 +3,7 @@ use crate::bitmap::Bitmap; use crate::datatypes::{DataType, Field}; use crate::error::Error; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/union/mod.rs b/crates/nano-arrow/src/array/union/mod.rs index 91917cf7725d..9150920ea021 100644 --- a/crates/nano-arrow/src/array/union/mod.rs +++ b/crates/nano-arrow/src/array/union/mod.rs @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field, UnionMode}; use crate::error::Error; use crate::scalar::{new_scalar, Scalar}; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/array/utf8/mod.rs b/crates/nano-arrow/src/array/utf8/mod.rs index e5946eae0f65..bae0169224e9 100644 --- a/crates/nano-arrow/src/array/utf8/mod.rs +++ b/crates/nano-arrow/src/array/utf8/mod.rs @@ -10,7 +10,7 @@ use crate::error::{Error, Result}; use crate::offset::{Offset, Offsets, OffsetsBuffer}; use crate::trusted_len::TrustedLen; -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] mod data; mod ffi; pub(super) mod fmt; diff --git a/crates/nano-arrow/src/bitmap/immutable.rs b/crates/nano-arrow/src/bitmap/immutable.rs index 24a6539b082c..c29ac7a41314 100644 --- a/crates/nano-arrow/src/bitmap/immutable.rs +++ b/crates/nano-arrow/src/bitmap/immutable.rs @@ -427,7 +427,7 @@ impl Bitmap { /// Create a new [`Bitmap`] from an arrow [`NullBuffer`] /// /// [`NullBuffer`]: arrow_buffer::buffer::NullBuffer - #[cfg(feature = "arrow-rs")] + #[cfg(feature = "arrow_rs")] pub fn from_null_buffer(value: arrow_buffer::buffer::NullBuffer) -> Self { let offset = value.offset(); let length = value.len(); @@ -459,7 +459,7 @@ impl IntoIterator for Bitmap { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_buffer::buffer::NullBuffer { fn from(value: Bitmap) -> Self { let null_count = value.unset_bits; diff --git a/crates/nano-arrow/src/buffer/immutable.rs b/crates/nano-arrow/src/buffer/immutable.rs index 5fcad63bef1a..4093734a1114 100644 --- a/crates/nano-arrow/src/buffer/immutable.rs +++ b/crates/nano-arrow/src/buffer/immutable.rs @@ -310,14 +310,14 @@ impl IntoIterator for Buffer { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for Buffer { fn from(value: arrow_buffer::Buffer) -> Self { Self::from_bytes(crate::buffer::to_bytes(value)) } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From> for arrow_buffer::Buffer { fn from(value: Buffer) -> Self { crate::buffer::to_buffer(value.data).slice_with_length( diff --git a/crates/nano-arrow/src/buffer/mod.rs b/crates/nano-arrow/src/buffer/mod.rs index 10269978345f..ef78d5a26e6c 100644 --- a/crates/nano-arrow/src/buffer/mod.rs +++ b/crates/nano-arrow/src/buffer/mod.rs @@ -10,7 +10,7 @@ use crate::ffi::InternalArrowArray; pub(crate) enum BytesAllocator { InternalArrowArray(InternalArrowArray), - #[cfg(feature = "arrow-rs")] + #[cfg(feature = "arrow_rs")] Arrow(arrow_buffer::Buffer), } pub(crate) type BytesInner = foreign_vec::ForeignVec; @@ -65,7 +65,7 @@ impl From> for Bytes { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] pub(crate) fn to_buffer( value: std::sync::Arc>, ) -> arrow_buffer::Buffer { @@ -76,7 +76,7 @@ pub(crate) fn to_buffer( unsafe { arrow_buffer::Buffer::from_custom_allocation(ptr, len, value) } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] pub(crate) fn to_bytes(value: arrow_buffer::Buffer) -> Bytes { let ptr = value.as_ptr(); let align = ptr.align_offset(std::mem::align_of::()); diff --git a/crates/nano-arrow/src/datatypes/field.rs b/crates/nano-arrow/src/datatypes/field.rs index 79035480d550..a32396780cdf 100644 --- a/crates/nano-arrow/src/datatypes/field.rs +++ b/crates/nano-arrow/src/datatypes/field.rs @@ -53,7 +53,7 @@ impl Field { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_schema::Field { fn from(value: Field) -> Self { Self::new(value.name, value.data_type.into(), value.is_nullable) @@ -61,14 +61,14 @@ impl From for arrow_schema::Field { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for Field { fn from(value: arrow_schema::Field) -> Self { (&value).into() } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From<&arrow_schema::Field> for Field { fn from(value: &arrow_schema::Field) -> Self { let data_type = value.data_type().clone().into(); @@ -81,14 +81,14 @@ impl From<&arrow_schema::Field> for Field { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for Field { fn from(value: arrow_schema::FieldRef) -> Self { value.as_ref().into() } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From<&arrow_schema::FieldRef> for Field { fn from(value: &arrow_schema::FieldRef) -> Self { value.as_ref().into() diff --git a/crates/nano-arrow/src/datatypes/mod.rs b/crates/nano-arrow/src/datatypes/mod.rs index 4cfb9ad63ada..95ba5e69bff8 100644 --- a/crates/nano-arrow/src/datatypes/mod.rs +++ b/crates/nano-arrow/src/datatypes/mod.rs @@ -159,7 +159,7 @@ pub enum DataType { Extension(String, Box, Option), } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_schema::DataType { fn from(value: DataType) -> Self { use arrow_schema::{Field as ArrowField, UnionFields}; @@ -218,7 +218,7 @@ impl From for arrow_schema::DataType { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for DataType { fn from(value: arrow_schema::DataType) -> Self { use arrow_schema::DataType; @@ -291,7 +291,7 @@ pub enum UnionMode { Sparse, } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_schema::UnionMode { fn from(value: UnionMode) -> Self { match value { @@ -301,7 +301,7 @@ impl From for arrow_schema::UnionMode { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for UnionMode { fn from(value: arrow_schema::UnionMode) -> Self { match value { @@ -347,7 +347,7 @@ pub enum TimeUnit { Nanosecond, } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_schema::TimeUnit { fn from(value: TimeUnit) -> Self { match value { @@ -359,7 +359,7 @@ impl From for arrow_schema::TimeUnit { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for TimeUnit { fn from(value: arrow_schema::TimeUnit) -> Self { match value { @@ -384,7 +384,7 @@ pub enum IntervalUnit { MonthDayNano, } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for arrow_schema::IntervalUnit { fn from(value: IntervalUnit) -> Self { match value { @@ -395,7 +395,7 @@ impl From for arrow_schema::IntervalUnit { } } -#[cfg(feature = "arrow-rs")] +#[cfg(feature = "arrow_rs")] impl From for IntervalUnit { fn from(value: arrow_schema::IntervalUnit) -> Self { match value { diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml index 2b9984fa2a2c..5579749de621 100644 --- a/crates/polars-core/Cargo.toml +++ b/crates/polars-core/Cargo.toml @@ -16,6 +16,7 @@ polars-utils = { workspace = true } ahash = { workspace = true } arrow = { workspace = true } +arrow-array = { workspace = true, optional = true } bitflags = { workspace = true } bytemuck = { workspace = true } chrono = { workspace = true, optional = true } @@ -108,6 +109,7 @@ chunked_ids = [] describe = [] timezones = ["chrono-tz", "arrow/chrono-tz", "polars-arrow/timezones"] dynamic_group_by = ["dtype-datetime", "dtype-date"] +arrow_rs = ["arrow-array", "arrow/arrow_rs"] # opt-in datatypes for Series dtype-date = ["temporal"] diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 67a13368839a..2ec370820c67 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -232,6 +232,15 @@ impl Series { self } + pub fn from_arrow(name: &str, array: ArrayRef) -> PolarsResult { + Self::try_from((name, array)) + } + + #[cfg(feature = "arrow_rs")] + pub fn from_arrow_rs(name: &str, array: &dyn arrow_array::Array) -> PolarsResult { + Self::from_arrow(name, array.into()) + } + /// Shrink the capacity of this array to fit its length. pub fn shrink_to_fit(&mut self) { self._get_inner_mut().shrink_to_fit()