Skip to content

Commit

Permalink
add arrow-rs
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 11, 2023
1 parent 385b0ac commit a2efb4d
Show file tree
Hide file tree
Showing 22 changed files with 59 additions and 44 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ polars-json = { version = "0.33.2", path = "crates/polars-json", default-feature
polars = { version = "0.33.2", path = "crates/polars", default-features = false }
rand_distr = "0.4"
reqwest = { version = "0.11", default-features = false }
arrow-array = {version = ">=41", default-features = false}
arrow-buffer = { version = ">=41", default-features = false }
arrow-data = { version = ">=41", default-features = false }
arrow-schema = { version = ">=41", default-features = false }

[workspace.dependencies.arrow]
package = "nano-arrow"
Expand Down
8 changes: 4 additions & 4 deletions crates/nano-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ multiversion = { workspace = true, optional = true }
ahash = { workspace = true }

# Support conversion to/from arrow-rs
arrow-array = { version = ">=40", optional = true }
arrow-buffer = { version = ">=40", optional = true }
arrow-data = { version = ">=40", optional = true }
arrow-schema = { version = ">=40", optional = true }
arrow-array = { workspace = true, optional = true }
arrow-buffer = { workspace = true, optional = true }
arrow-data = { workspace = true, optional = true }
arrow-schema = { workspace = true, optional = true }

# parquet support
[dependencies.parquet2]
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub use mutable_values::*;
mod mutable;
pub use mutable::*;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;

/// A [`BinaryArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<Vec<u8>>>`.
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::datatypes::{DataType, PhysicalType};
use crate::error::Error;
use crate::trusted_len::TrustedLen;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::scalar::{new_scalar, Scalar};
use crate::trusted_len::TrustedLen;
use crate::types::NativeType;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::error::Error;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::bitmap::Bitmap;
use crate::datatypes::{DataType, Field};
use crate::error::Error;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field};
use crate::error::Error;
use crate::offset::{Offset, Offsets, OffsetsBuffer};

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field};
use crate::error::Error;
use crate::offset::OffsetsBuffer;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
16 changes: 8 additions & 8 deletions crates/nano-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
/// Trait providing bi-directional conversion between arrow2 [`Array`] and arrow-rs [`ArrayData`]
///
/// [`ArrayData`]: arrow_data::ArrayData
#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub trait Arrow2Arrow: Array {
/// Convert this [`Array`] into [`ArrayData`]
fn to_data(&self) -> arrow_data::ArrayData;
Expand All @@ -408,44 +408,44 @@ pub trait Arrow2Arrow: Array {
fn from_data(data: &arrow_data::ArrayData) -> Self;
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
macro_rules! to_data_dyn {
($array:expr, $ty:ty) => {{
let f = |x: &$ty| x.to_data();
general_dyn!($array, $ty, f)
}};
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<Box<dyn Array>> for arrow_array::ArrayRef {
fn from(value: Box<dyn Array>) -> Self {
value.as_ref().into()
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<&dyn Array> for arrow_array::ArrayRef {
fn from(value: &dyn Array) -> Self {
arrow_array::make_array(to_data(value))
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_array::ArrayRef> for Box<dyn Array> {
fn from(value: arrow_array::ArrayRef) -> Self {
value.as_ref().into()
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<&dyn arrow_array::Array> for Box<dyn Array> {
fn from(value: &dyn arrow_array::Array) -> Self {
from_data(&value.to_data())
}
}

/// Convert an arrow2 [`Array`] to [`arrow_data::ArrayData`]
#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData {
use crate::datatypes::PhysicalType::*;
match array.data_type().to_physical_type() {
Expand Down Expand Up @@ -474,7 +474,7 @@ pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData {
}

/// Convert an [`arrow_data::ArrayData`] to arrow2 [`Array`]
#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub fn from_data(data: &arrow_data::ArrayData) -> Box<dyn Array> {
use crate::datatypes::PhysicalType::*;
let data_type: DataType = data.data_type().clone().into();
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for NullArray {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod arrow {
use arrow_data::{ArrayData, ArrayDataBuilder};

Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::error::Error;
use crate::trusted_len::TrustedLen;
use crate::types::{days_ms, f16, i256, months_days_ns, NativeType};

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::bitmap::Bitmap;
use crate::datatypes::{DataType, Field};
use crate::error::Error;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/union/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::datatypes::{DataType, Field, UnionMode};
use crate::error::Error;
use crate::scalar::{new_scalar, Scalar};

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
2 changes: 1 addition & 1 deletion crates/nano-arrow/src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::error::{Error, Result};
use crate::offset::{Offset, Offsets, OffsetsBuffer};
use crate::trusted_len::TrustedLen;

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
Expand Down
4 changes: 2 additions & 2 deletions crates/nano-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ impl Bitmap {
/// Create a new [`Bitmap`] from an arrow [`NullBuffer`]
///
/// [`NullBuffer`]: arrow_buffer::buffer::NullBuffer
#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub fn from_null_buffer(value: arrow_buffer::buffer::NullBuffer) -> Self {
let offset = value.offset();
let length = value.len();
Expand Down Expand Up @@ -459,7 +459,7 @@ impl IntoIterator for Bitmap {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
fn from(value: Bitmap) -> Self {
let null_count = value.unset_bits;
Expand Down
4 changes: 2 additions & 2 deletions crates/nano-arrow/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,14 +310,14 @@ impl<T: Copy> IntoIterator for Buffer<T> {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl<T: crate::types::NativeType> From<arrow_buffer::Buffer> for Buffer<T> {
fn from(value: arrow_buffer::Buffer) -> Self {
Self::from_bytes(crate::buffer::to_bytes(value))
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl<T: crate::types::NativeType> From<Buffer<T>> for arrow_buffer::Buffer {
fn from(value: Buffer<T>) -> Self {
crate::buffer::to_buffer(value.data).slice_with_length(
Expand Down
6 changes: 3 additions & 3 deletions crates/nano-arrow/src/buffer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::ffi::InternalArrowArray;
pub(crate) enum BytesAllocator {
InternalArrowArray(InternalArrowArray),

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
Arrow(arrow_buffer::Buffer),
}
pub(crate) type BytesInner<T> = foreign_vec::ForeignVec<BytesAllocator, T>;
Expand Down Expand Up @@ -65,7 +65,7 @@ impl<T> From<BytesInner<T>> for Bytes<T> {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub(crate) fn to_buffer<T: crate::types::NativeType>(
value: std::sync::Arc<Bytes<T>>,
) -> arrow_buffer::Buffer {
Expand All @@ -76,7 +76,7 @@ pub(crate) fn to_buffer<T: crate::types::NativeType>(
unsafe { arrow_buffer::Buffer::from_custom_allocation(ptr, len, value) }
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
pub(crate) fn to_bytes<T: crate::types::NativeType>(value: arrow_buffer::Buffer) -> Bytes<T> {
let ptr = value.as_ptr();
let align = ptr.align_offset(std::mem::align_of::<T>());
Expand Down
10 changes: 5 additions & 5 deletions crates/nano-arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,22 +53,22 @@ impl Field {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<Field> for arrow_schema::Field {
fn from(value: Field) -> Self {
Self::new(value.name, value.data_type.into(), value.is_nullable)
.with_metadata(value.metadata.into_iter().collect())
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::Field> for Field {
fn from(value: arrow_schema::Field) -> Self {
(&value).into()
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<&arrow_schema::Field> for Field {
fn from(value: &arrow_schema::Field) -> Self {
let data_type = value.data_type().clone().into();
Expand All @@ -81,14 +81,14 @@ impl From<&arrow_schema::Field> for Field {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::FieldRef> for Field {
fn from(value: arrow_schema::FieldRef) -> Self {
value.as_ref().into()
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<&arrow_schema::FieldRef> for Field {
fn from(value: &arrow_schema::FieldRef) -> Self {
value.as_ref().into()
Expand Down
16 changes: 8 additions & 8 deletions crates/nano-arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ pub enum DataType {
Extension(String, Box<DataType>, Option<String>),
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<DataType> for arrow_schema::DataType {
fn from(value: DataType) -> Self {
use arrow_schema::{Field as ArrowField, UnionFields};
Expand Down Expand Up @@ -218,7 +218,7 @@ impl From<DataType> for arrow_schema::DataType {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::DataType> for DataType {
fn from(value: arrow_schema::DataType) -> Self {
use arrow_schema::DataType;
Expand Down Expand Up @@ -291,7 +291,7 @@ pub enum UnionMode {
Sparse,
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<UnionMode> for arrow_schema::UnionMode {
fn from(value: UnionMode) -> Self {
match value {
Expand All @@ -301,7 +301,7 @@ impl From<UnionMode> for arrow_schema::UnionMode {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::UnionMode> for UnionMode {
fn from(value: arrow_schema::UnionMode) -> Self {
match value {
Expand Down Expand Up @@ -347,7 +347,7 @@ pub enum TimeUnit {
Nanosecond,
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<TimeUnit> for arrow_schema::TimeUnit {
fn from(value: TimeUnit) -> Self {
match value {
Expand All @@ -359,7 +359,7 @@ impl From<TimeUnit> for arrow_schema::TimeUnit {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::TimeUnit> for TimeUnit {
fn from(value: arrow_schema::TimeUnit) -> Self {
match value {
Expand All @@ -384,7 +384,7 @@ pub enum IntervalUnit {
MonthDayNano,
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<IntervalUnit> for arrow_schema::IntervalUnit {
fn from(value: IntervalUnit) -> Self {
match value {
Expand All @@ -395,7 +395,7 @@ impl From<IntervalUnit> for arrow_schema::IntervalUnit {
}
}

#[cfg(feature = "arrow-rs")]
#[cfg(feature = "arrow_rs")]
impl From<arrow_schema::IntervalUnit> for IntervalUnit {
fn from(value: arrow_schema::IntervalUnit) -> Self {
match value {
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ polars-utils = { workspace = true }

ahash = { workspace = true }
arrow = { workspace = true }
arrow-array = { workspace = true, optional = true }
bitflags = { workspace = true }
bytemuck = { workspace = true }
chrono = { workspace = true, optional = true }
Expand Down Expand Up @@ -108,6 +109,7 @@ chunked_ids = []
describe = []
timezones = ["chrono-tz", "arrow/chrono-tz", "polars-arrow/timezones"]
dynamic_group_by = ["dtype-datetime", "dtype-date"]
arrow_rs = ["arrow-array", "arrow/arrow_rs"]

# opt-in datatypes for Series
dtype-date = ["temporal"]
Expand Down
Loading

0 comments on commit a2efb4d

Please sign in to comment.