diff --git a/crates/nano-arrow/Cargo.toml b/crates/nano-arrow/Cargo.toml index 0812ff9eb347..a7ef6583e6dc 100644 --- a/crates/nano-arrow/Cargo.toml +++ b/crates/nano-arrow/Cargo.toml @@ -58,7 +58,7 @@ futures = { workspace = true, optional = true } async-stream = { version = "0.3.2", optional = true } # avro support -avro-schema = { version = "0.3", optional = true } +avro-schema = { workspace = true, optional = true } # for division/remainder optimization at runtime strength_reduce = { version = "0.2", optional = true } @@ -150,7 +150,7 @@ io_parquet_brotli = ["parquet2/brotli"] # parquet bloom filter functions io_parquet_bloom_filter = ["parquet2/bloom_filter"] -io_avro = ["avro-schema"] +io_avro = ["avro-schema", "polars-error/avro-schema"] io_avro_compression = [ "avro-schema/compression", ] diff --git a/crates/nano-arrow/src/io/avro/mod.rs b/crates/nano-arrow/src/io/avro/mod.rs index bf7bda85f197..f535f4ab68a1 100644 --- a/crates/nano-arrow/src/io/avro/mod.rs +++ b/crates/nano-arrow/src/io/avro/mod.rs @@ -2,12 +2,6 @@ pub use avro_schema; -impl From for crate::error::Error { - fn from(error: avro_schema::error::Error) -> Self { - Self::ExternalFormat(error.to_string()) - } -} - pub mod read; pub mod write; @@ -21,9 +15,7 @@ macro_rules! avro_decode { loop { if j > 9 { // if j * 7 > 64 - return Err(Error::ExternalFormat( - "zigzag decoding failed - corrupt avro file".to_string(), - )); + polars_error::polars_bail!(oos = "zigzag decoding failed - corrupt avro file") } $reader.read_exact(&mut buf[..])$($_await)*?; i |= (u64::from(buf[0] & 0x7F)) << (j * 7); diff --git a/crates/nano-arrow/src/io/avro/read/deserialize.rs b/crates/nano-arrow/src/io/avro/read/deserialize.rs index b81efdf831b8..4a1e8d14ebe6 100644 --- a/crates/nano-arrow/src/io/avro/read/deserialize.rs +++ b/crates/nano-arrow/src/io/avro/read/deserialize.rs @@ -2,13 +2,13 @@ use std::convert::TryInto; use avro_schema::file::Block; use avro_schema::schema::{Enum, Field as AvroField, Record, Schema as AvroSchema}; +use polars_error::{polars_bail, polars_err, PolarsResult}; use super::nested::*; use super::util; use crate::array::*; use crate::chunk::Chunk; use crate::datatypes::*; -use crate::error::{Error, Result}; use crate::types::months_days_ns; use crate::with_match_primitive_type; @@ -16,7 +16,7 @@ fn make_mutable( data_type: &DataType, avro_field: Option<&AvroSchema>, capacity: usize, -) -> Result> { +) -> PolarsResult> { Ok(match data_type.to_physical_type() { PhysicalType::Boolean => { Box::new(MutableBooleanArray::with_capacity(capacity)) as Box @@ -57,14 +57,14 @@ fn make_mutable( let values = fields .iter() .map(|field| make_mutable(field.data_type(), None, capacity)) - .collect::>>()?; + .collect::>>()?; Box::new(DynMutableStructArray::new(values, data_type.clone())) as Box }, other => { - return Err(Error::NotYetImplemented(format!( + polars_bail!(nyi = "Deserializing type {other:#?} is still not implemented" - ))) + ) }, }, }) @@ -83,7 +83,7 @@ fn deserialize_item<'a>( is_nullable: bool, avro_field: &AvroSchema, mut block: &'a [u8], -) -> Result<&'a [u8]> { +) -> PolarsResult<&'a [u8]> { if is_nullable { let variant = util::zigzag_i64(&mut block)?; let is_null_first = is_union_null_first(avro_field); @@ -99,7 +99,7 @@ fn deserialize_value<'a>( array: &mut dyn MutableArray, avro_field: &AvroSchema, mut block: &'a [u8], -) -> Result<&'a [u8]> { +) -> PolarsResult<&'a [u8]> { let data_type = array.data_type(); match data_type { DataType::List(inner) => { @@ -250,8 +250,8 @@ fn deserialize_value<'a>( let len = match avro_inner { AvroSchema::Bytes(_) => { util::zigzag_i64(&mut block)?.try_into().map_err(|_| { - Error::ExternalFormat( - "Avro format contains a non-usize number of bytes".to_string(), + polars_err!(oos = + "Avro format contains a non-usize number of bytes" ) })? }, @@ -259,9 +259,9 @@ fn deserialize_value<'a>( _ => unreachable!(), }; if len > 16 { - return Err(Error::ExternalFormat( - "Avro decimal bytes return more than 16 bytes".to_string(), - )); + polars_bail!(oos = + "Avro decimal bytes return more than 16 bytes" + ) } let mut bytes = [0u8; 16]; bytes[..len].copy_from_slice(&block[..len]); @@ -277,8 +277,8 @@ fn deserialize_value<'a>( }, PhysicalType::Utf8 => { let len: usize = util::zigzag_i64(&mut block)?.try_into().map_err(|_| { - Error::ExternalFormat( - "Avro format contains a non-usize number of bytes".to_string(), + polars_err!(oos = + "Avro format contains a non-usize number of bytes" ) })?; let data = simdutf8::basic::from_utf8(&block[..len])?; @@ -292,8 +292,8 @@ fn deserialize_value<'a>( }, PhysicalType::Binary => { let len: usize = util::zigzag_i64(&mut block)?.try_into().map_err(|_| { - Error::ExternalFormat( - "Avro format contains a non-usize number of bytes".to_string(), + polars_err!(oos = + "Avro format contains a non-usize number of bytes" ) })?; let data = &block[..len]; @@ -329,7 +329,7 @@ fn deserialize_value<'a>( Ok(block) } -fn skip_item<'a>(field: &Field, avro_field: &AvroSchema, mut block: &'a [u8]) -> Result<&'a [u8]> { +fn skip_item<'a>(field: &Field, avro_field: &AvroSchema, mut block: &'a [u8]) -> PolarsResult<&'a [u8]> { if field.is_nullable { let variant = util::zigzag_i64(&mut block)?; let is_null_first = is_union_null_first(avro_field); @@ -366,7 +366,7 @@ fn skip_item<'a>(field: &Field, avro_field: &AvroSchema, mut block: &'a [u8]) -> .map(|bytes| { bytes .try_into() - .map_err(|_| Error::oos("Avro block size negative or too large")) + .map_err(|_| polars_err!(oos = "Avro block size negative or too large")) }) .transpose()?; @@ -431,8 +431,8 @@ fn skip_item<'a>(field: &Field, avro_field: &AvroSchema, mut block: &'a [u8]) -> let len = match avro_inner { AvroSchema::Bytes(_) => { util::zigzag_i64(&mut block)?.try_into().map_err(|_| { - Error::ExternalFormat( - "Avro format contains a non-usize number of bytes".to_string(), + polars_err!(oos = + "Avro format contains a non-usize number of bytes" ) })? }, @@ -445,8 +445,8 @@ fn skip_item<'a>(field: &Field, avro_field: &AvroSchema, mut block: &'a [u8]) -> }, PhysicalType::Utf8 | PhysicalType::Binary => { let len: usize = util::zigzag_i64(&mut block)?.try_into().map_err(|_| { - Error::ExternalFormat( - "Avro format contains a non-usize number of bytes".to_string(), + polars_err!(oos = + "Avro format contains a non-usize number of bytes" ) })?; block = &block[len..]; @@ -478,7 +478,7 @@ pub fn deserialize( fields: &[Field], avro_fields: &[AvroField], projection: &[bool], -) -> Result>> { +) -> PolarsResult>> { assert_eq!(fields.len(), avro_fields.len()); assert_eq!(fields.len(), projection.len()); @@ -498,7 +498,7 @@ pub fn deserialize( make_mutable(&DataType::Int32, None, 0) } }) - .collect::>()?; + .collect::>()?; // this is _the_ expensive transpose (rows -> columns) for _ in 0..rows { diff --git a/crates/nano-arrow/src/io/avro/read/mod.rs b/crates/nano-arrow/src/io/avro/read/mod.rs index 5014499c12a6..eeef90b1d4af 100644 --- a/crates/nano-arrow/src/io/avro/read/mod.rs +++ b/crates/nano-arrow/src/io/avro/read/mod.rs @@ -8,6 +8,8 @@ use avro_schema::schema::Field as AvroField; mod deserialize; pub use deserialize::deserialize; +use polars_error::PolarsResult; + mod nested; mod schema; mod util; @@ -17,7 +19,6 @@ pub use schema::infer_schema; use crate::array::Array; use crate::chunk::Chunk; use crate::datatypes::Field; -use crate::error::Result; /// Single threaded, blocking reader of Avro; [`Iterator`] of [`Chunk`]. pub struct Reader { @@ -52,7 +53,7 @@ impl Reader { } impl Iterator for Reader { - type Item = Result>>; + type Item = PolarsResult>>; fn next(&mut self) -> Option { let fields = &self.fields[..]; diff --git a/crates/nano-arrow/src/io/avro/read/nested.rs b/crates/nano-arrow/src/io/avro/read/nested.rs index fd5bb6b7dbbd..516ccf7bf129 100644 --- a/crates/nano-arrow/src/io/avro/read/nested.rs +++ b/crates/nano-arrow/src/io/avro/read/nested.rs @@ -1,7 +1,7 @@ +use polars_error::{polars_err, PolarsResult}; use crate::array::*; use crate::bitmap::*; use crate::datatypes::*; -use crate::error::*; use crate::offset::{Offset, Offsets}; /// Auxiliary struct @@ -31,10 +31,10 @@ impl DynMutableListArray { } #[inline] - pub fn try_push_valid(&mut self) -> Result<()> { + pub fn try_push_valid(&mut self) -> PolarsResult<()> { let total_length = self.values.len(); let offset = self.offsets.last().to_usize(); - let length = total_length.checked_sub(offset).ok_or(Error::Overflow)?; + let length = total_length.checked_sub(offset).ok_or(polars_err!(ComputeError: "overflow"))?; self.offsets.try_push(length)?; if let Some(validity) = &mut self.validity { @@ -227,7 +227,7 @@ impl DynMutableStructArray { } #[inline] - pub fn try_push_valid(&mut self) -> Result<()> { + pub fn try_push_valid(&mut self) -> PolarsResult<()> { if let Some(validity) = &mut self.validity { validity.push(true) } diff --git a/crates/nano-arrow/src/io/avro/read/schema.rs b/crates/nano-arrow/src/io/avro/read/schema.rs index ca50c59ca9fa..40e678dc99ec 100644 --- a/crates/nano-arrow/src/io/avro/read/schema.rs +++ b/crates/nano-arrow/src/io/avro/read/schema.rs @@ -1,7 +1,7 @@ use avro_schema::schema::{Enum, Fixed, Record, Schema as AvroSchema}; +use polars_error::{polars_bail, PolarsResult}; use crate::datatypes::*; -use crate::error::{Error, Result}; fn external_props(schema: &AvroSchema) -> Metadata { let mut props = Metadata::new(); @@ -21,7 +21,7 @@ fn external_props(schema: &AvroSchema) -> Metadata { /// Infers an [`Schema`] from the root [`Record`]. /// This -pub fn infer_schema(record: &Record) -> Result { +pub fn infer_schema(record: &Record) -> PolarsResult { Ok(record .fields .iter() @@ -32,11 +32,11 @@ pub fn infer_schema(record: &Record) -> Result { external_props(&field.schema), ) }) - .collect::>>()? + .collect::>>()? .into()) } -fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> Result { +fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> PolarsResult { let mut nullable = false; let data_type = match schema { AvroSchema::Null => DataType::Null, @@ -94,15 +94,15 @@ fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> { schema_to_field(schema, None, Metadata::default())?.data_type } else { - return Err(Error::NotYetImplemented(format!( + polars_bail!(nyi = "Can't read avro union {schema:?}" - ))); + ); } } else { let fields = schemas .iter() .map(|s| schema_to_field(s, None, Metadata::default())) - .collect::>>()?; + .collect::>>()?; DataType::Union(fields, None, UnionMode::Dense) } }, @@ -116,7 +116,7 @@ fn schema_to_field(schema: &AvroSchema, name: Option<&str>, props: Metadata) -> } schema_to_field(&field.schema, Some(&field.name), props) }) - .collect::>()?; + .collect::>()?; DataType::Struct(fields) }, AvroSchema::Enum { .. } => { diff --git a/crates/nano-arrow/src/io/avro/write/schema.rs b/crates/nano-arrow/src/io/avro/write/schema.rs index b81cdc77ce3a..044e7967083f 100644 --- a/crates/nano-arrow/src/io/avro/write/schema.rs +++ b/crates/nano-arrow/src/io/avro/write/schema.rs @@ -2,18 +2,18 @@ use avro_schema::schema::{ BytesLogical, Field as AvroField, Fixed, FixedLogical, IntLogical, LongLogical, Record, Schema as AvroSchema, }; +use polars_error::{polars_bail, PolarsResult}; use crate::datatypes::*; -use crate::error::{Error, Result}; /// Converts a [`Schema`] to an Avro [`Record`]. -pub fn to_record(schema: &Schema) -> Result { +pub fn to_record(schema: &Schema) -> PolarsResult { let mut name_counter: i32 = 0; let fields = schema .fields .iter() .map(|f| field_to_field(f, &mut name_counter)) - .collect::>()?; + .collect::>()?; Ok(Record { name: "".to_string(), namespace: None, @@ -23,7 +23,7 @@ pub fn to_record(schema: &Schema) -> Result { }) } -fn field_to_field(field: &Field, name_counter: &mut i32) -> Result { +fn field_to_field(field: &Field, name_counter: &mut i32) -> PolarsResult { let schema = type_to_schema(field.data_type(), field.is_nullable, name_counter)?; Ok(AvroField::new(&field.name, schema)) } @@ -32,7 +32,7 @@ fn type_to_schema( data_type: &DataType, is_nullable: bool, name_counter: &mut i32, -) -> Result { +) -> PolarsResult { Ok(if is_nullable { AvroSchema::Union(vec![ AvroSchema::Null, @@ -48,7 +48,7 @@ fn _get_field_name(name_counter: &mut i32) -> String { format!("r{name_counter}") } -fn _type_to_schema(data_type: &DataType, name_counter: &mut i32) -> Result { +fn _type_to_schema(data_type: &DataType, name_counter: &mut i32) -> PolarsResult { Ok(match data_type.to_logical_type() { DataType::Null => AvroSchema::Null, DataType::Boolean => AvroSchema::Boolean, @@ -68,7 +68,7 @@ fn _type_to_schema(data_type: &DataType, name_counter: &mut i32) -> Result>>()?, + .collect::>>()?, )), DataType::Date32 => AvroSchema::Int(Some(IntLogical::Date)), DataType::Time32(TimeUnit::Millisecond) => AvroSchema::Int(Some(IntLogical::Time)), @@ -86,6 +86,6 @@ fn _type_to_schema(data_type: &DataType, name_counter: &mut i32) -> Result AvroSchema::Fixed(Fixed::new("", *size)), DataType::Decimal(p, s) => AvroSchema::Bytes(Some(BytesLogical::Decimal(*p, *s))), - other => return Err(Error::NotYetImplemented(format!("write {other:?} to avro"))), + other => polars_bail!(nyi = "write {other:?} to avro") }) } diff --git a/crates/nano-arrow/src/io/flight/mod.rs b/crates/nano-arrow/src/io/flight/mod.rs index 0cce1774568f..e7b8086b71b4 100644 --- a/crates/nano-arrow/src/io/flight/mod.rs +++ b/crates/nano-arrow/src/io/flight/mod.rs @@ -3,6 +3,7 @@ use arrow_format::flight::data::{FlightData, SchemaResult}; use arrow_format::ipc; use arrow_format::ipc::planus::ReadAsRoot; +use polars_error::{polars_bail, polars_err, PolarsResult}; use super::ipc::read::Dictionaries; pub use super::ipc::write::default_ipc_fields; @@ -10,7 +11,6 @@ use super::ipc::{IpcField, IpcSchema}; use crate::array::Array; use crate::chunk::Chunk; use crate::datatypes::*; -use crate::error::{Error, Result}; pub use crate::io::ipc::write::common::WriteOptions; use crate::io::ipc::write::common::{encode_chunk, DictionaryTracker, EncodedData}; use crate::io::ipc::{read, write}; @@ -23,9 +23,9 @@ pub fn serialize_batch( chunk: &Chunk>, fields: &[IpcField], options: &WriteOptions, -) -> Result<(Vec, FlightData)> { +) -> PolarsResult<(Vec, FlightData)> { if fields.len() != chunk.arrays().len() { - return Err(Error::InvalidArgumentError("The argument `fields` must be consistent with the columns' schema. Use e.g. &arrow2::io::flight::default_ipc_fields(&schema.fields)".to_string())); + polars_bail!(oos = "The argument `fields` must be consistent with the columns' schema. Use e.g. &arrow2::io::flight::default_ipc_fields(&schema.fields)"); } let mut dictionary_tracker = DictionaryTracker { @@ -75,7 +75,7 @@ pub fn serialize_schema(schema: &Schema, ipc_fields: Option<&[IpcField]>) -> Fli pub fn serialize_schema_to_info( schema: &Schema, ipc_fields: Option<&[IpcField]>, -) -> Result> { +) -> PolarsResult> { let encoded_data = if let Some(ipc_fields) = ipc_fields { schema_as_encoded_data(schema, ipc_fields) } else { @@ -106,7 +106,7 @@ fn schema_as_encoded_data(schema: &Schema, ipc_fields: &[IpcField]) -> EncodedDa /// Deserialize an IPC message into [`Schema`], [`IpcSchema`]. /// Use to deserialize [`FlightData::data_header`] and [`SchemaResult::schema`]. -pub fn deserialize_schemas(bytes: &[u8]) -> Result<(Schema, IpcSchema)> { +pub fn deserialize_schemas(bytes: &[u8]) -> PolarsResult<(Schema, IpcSchema)> { read::deserialize_schema(bytes) } @@ -116,16 +116,16 @@ pub fn deserialize_batch( fields: &[Field], ipc_schema: &IpcSchema, dictionaries: &read::Dictionaries, -) -> Result>> { +) -> PolarsResult>> { // check that the data_header is a record batch message let message = arrow_format::ipc::MessageRef::read_as_root(&data.data_header) - .map_err(|err| Error::OutOfSpec(format!("Unable to get root as message: {err:?}")))?; + .map_err(|err| polars_err!(oos = "Unable to get root as message: {err:?}"))?; let length = data.data_body.len(); let mut reader = std::io::Cursor::new(&data.data_body); match message.header()?.ok_or_else(|| { - Error::oos("Unable to convert flight data header to a record batch".to_string()) + polars_err!(oos = "Unable to convert flight data header to a record batch".to_string()) })? { ipc::MessageHeaderRef::RecordBatch(batch) => read::read_record_batch( batch, @@ -140,9 +140,9 @@ pub fn deserialize_batch( length as u64, &mut Default::default(), ), - _ => Err(Error::nyi( - "flight currently only supports reading RecordBatch messages", - )), + _ => polars_bail!(oos = + "flight currently only supports reading RecordBatch messages" + ), } } @@ -152,12 +152,12 @@ pub fn deserialize_dictionary( fields: &[Field], ipc_schema: &IpcSchema, dictionaries: &mut read::Dictionaries, -) -> Result<()> { +) -> PolarsResult<()> { let message = ipc::MessageRef::read_as_root(&data.data_header)?; let chunk = if let ipc::MessageHeaderRef::DictionaryBatch(chunk) = message .header()? - .ok_or_else(|| Error::oos("Header is required"))? + .ok_or_else(|| polars_err!(oos ="Header is required"))? { chunk } else { @@ -187,7 +187,7 @@ pub fn deserialize_message( fields: &[Field], ipc_schema: &IpcSchema, dictionaries: &mut Dictionaries, -) -> Result>>> { +) -> PolarsResult>>> { let FlightData { data_header, data_body, @@ -197,7 +197,7 @@ pub fn deserialize_message( let message = arrow_format::ipc::MessageRef::read_as_root(data_header)?; let header = message .header()? - .ok_or_else(|| Error::oos("IPC Message must contain a header"))?; + .ok_or_else(|| polars_err!(oos = "IPC Message must contain a header"))?; match header { ipc::MessageHeaderRef::RecordBatch(batch) => { @@ -236,8 +236,8 @@ pub fn deserialize_message( )?; Ok(None) }, - t => Err(Error::nyi(format!( + t => polars_bail!(ComputeError: "Reading types other than record batches not yet supported, unable to read {t:?}" - ))), + ) } } diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/dictionary/mod.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/dictionary/mod.rs index 7826f5856c0e..5dde90b7495f 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/dictionary/mod.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/dictionary/mod.rs @@ -16,7 +16,6 @@ use super::Pages; use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray}; use crate::bitmap::MutableBitmap; use crate::datatypes::DataType; -use crate::error::{Error, Result}; // The state of a `DataPage` of `Primitive` parquet primitive type #[derive(Debug)] @@ -33,7 +32,7 @@ pub struct Required<'a> { } impl<'a> Required<'a> { - fn try_new(page: &'a DataPage) -> Result { + fn try_new(page: &'a DataPage) -> PolarsResult { let values = dict_indices_decoder(page)?; Ok(Self { values }) } @@ -45,7 +44,7 @@ pub struct FilteredRequired<'a> { } impl<'a> FilteredRequired<'a> { - fn try_new(page: &'a DataPage) -> Result { + fn try_new(page: &'a DataPage) -> PolarsResult { let values = dict_indices_decoder(page)?; let rows = get_selected_rows(page); @@ -62,7 +61,7 @@ pub struct Optional<'a> { } impl<'a> Optional<'a> { - fn try_new(page: &'a DataPage) -> Result { + fn try_new(page: &'a DataPage) -> PolarsResult { let values = dict_indices_decoder(page)?; Ok(Self { @@ -111,7 +110,7 @@ where type Dict = (); type DecodedState = (Vec, MutableBitmap); - fn build_state(&self, page: &'a DataPage, _: Option<&'a Self::Dict>) -> Result { + fn build_state(&self, page: &'a DataPage, _: Option<&'a Self::Dict>) -> PolarsResult { let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; let is_filtered = page.selected_rows().is_some(); @@ -241,7 +240,7 @@ pub(super) fn next_dict Box, read_dict: F, -) -> MaybeNext>> { +) -> MaybeNext>> { if items.len() > 1 { let (values, validity) = items.pop_front().unwrap(); let keys = finish_key(values, validity); @@ -256,8 +255,8 @@ pub(super) fn next_dict Box { let (page, dict) = match (&dict, page) { (None, Page::Data(_)) => { - return MaybeNext::Some(Err(Error::nyi( - "dictionary arrays from non-dict-encoded pages", + return MaybeNext::Some(Err(polars_err!(ComputeError: + "not implemented: dictionary arrays from non-dict-encoded pages", ))); }, (_, Page::Dict(dict_page)) => { @@ -312,3 +311,4 @@ pub(super) fn next_dict Box { } impl<'a> Required<'a> { - fn try_new(page: &'a DataPage) -> Result { + fn try_new(page: &'a DataPage) -> PolarsResult { let values = dict_indices_decoder(page)?; let length = page.num_values(); Ok(Self { values, length }) @@ -81,7 +81,7 @@ impl<'a, K: DictionaryKey> NestedDecoder<'a> for DictionaryDecoder { &self, page: &'a DataPage, _: Option<&'a Self::Dictionary>, - ) -> Result { + ) -> PolarsResult { let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; let is_filtered = page.selected_rows().is_some(); @@ -104,7 +104,7 @@ impl<'a, K: DictionaryKey> NestedDecoder<'a> for DictionaryDecoder { ) } - fn push_valid(&self, state: &mut Self::State, decoded: &mut Self::DecodedState) -> Result<()> { + fn push_valid(&self, state: &mut Self::State, decoded: &mut Self::DecodedState) -> PolarsResult<()> { let (values, validity) = decoded; match state { State::Optional(page_values) => { @@ -148,7 +148,7 @@ pub fn next_dict Box> data_type: DataType, chunk_size: Option, read_dict: F, -) -> MaybeNext)>> { +) -> MaybeNext)>> { if items.len() > 1 { let (nested, (values, validity)) = items.pop_front().unwrap(); let keys = finish_key(values, validity); @@ -160,9 +160,11 @@ pub fn next_dict Box> Ok(Some(page)) => { let (page, dict) = match (&dict, page) { (None, Page::Data(_)) => { - return MaybeNext::Some(Err(Error::nyi( - "dictionary arrays from non-dict-encoded pages", - ))); + return MaybeNext::Some(Err( + polars_err!(ComputeError: + "not implemented: dictionary arrays from non-dict-encoded pages", + ) + )); }, (_, Page::Dict(dict_page)) => { *dict = Some(read_dict(dict_page)); diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/nested.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/nested.rs index 6a352753b9f1..603ecc819519 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/nested.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/nested.rs @@ -1,18 +1,18 @@ use ethnum::I256; use parquet2::schema::types::PrimitiveType; +use polars_error::polars_bail; use super::nested_utils::{InitNested, NestedArrayIter}; use super::*; use crate::array::PrimitiveArray; use crate::datatypes::{DataType, Field}; -use crate::error::{Error, Result}; use crate::match_integer_type; /// Converts an iterator of arrays to a trait object returning trait objects #[inline] fn remove_nested<'a, I>(iter: I) -> NestedArrayIter<'a> where - I: Iterator)>> + Send + Sync + 'a, + I: Iterator)>> + Send + Sync + 'a, { Box::new(iter.map(|x| { x.map(|(mut nested, array)| { @@ -27,7 +27,7 @@ where fn primitive<'a, A, I>(iter: I) -> NestedArrayIter<'a> where A: Array, - I: Iterator> + Send + Sync + 'a, + I: Iterator> + Send + Sync + 'a, { Box::new(iter.map(|x| { x.map(|(mut nested, array)| { @@ -44,7 +44,7 @@ pub fn columns_to_iter_recursive<'a, I: 'a>( mut init: Vec, num_rows: usize, chunk_size: Option, -) -> Result> +) -> PolarsResult> where I: Pages, { @@ -168,9 +168,9 @@ where |x: i64| x as u32, )), other => { - return Err(Error::nyi(format!( - "Deserializing UInt32 from {other:?}'s parquet" - ))) + polars_bail!(ComputeError: + "deserializing UInt32 from {other:?}'s parquet" + ) }, } }, @@ -282,9 +282,9 @@ where |x: i64| x as i128, )), PhysicalType::FixedLenByteArray(n) if n > 16 => { - return Err(Error::InvalidArgumentError(format!( - "Can't decode Decimal128 type from `FixedLenByteArray` of len {n}" - ))) + polars_bail!( + ComputeError: "Can't decode Decimal128 type from `FixedLenByteArray` of len {n}" + ) }, PhysicalType::FixedLenByteArray(n) => { let iter = fixed_size_binary::NestedIter::new( @@ -317,10 +317,10 @@ where Box::new(iter) }, _ => { - return Err(Error::nyi(format!( + polars_bail!(ComputeError: "Deserializing type for Decimal {:?} from parquet", type_.physical_type - ))) + ) }, } }, @@ -406,15 +406,15 @@ where Box::new(iter) as _ }, PhysicalType::FixedLenByteArray(n) => { - return Err(Error::InvalidArgumentError(format!( + polars_bail!(ComputeError: "Can't decode Decimal256 type from from `FixedLenByteArray` of len {n}" - ))) + ) }, _ => { - return Err(Error::nyi(format!( + polars_bail!(ComputeError: "Deserializing type for Decimal {:?} from parquet", type_.physical_type - ))) + ) }, } }, @@ -437,7 +437,7 @@ where chunk_size, ) }) - .collect::>>()?; + .collect::>>()?; let columns = columns.into_iter().rev().collect(); Box::new(struct_::StructIterator::new(columns, fields.clone())) }, @@ -459,9 +459,9 @@ where Box::new(iter) as _ }, other => { - return Err(Error::nyi(format!( + polars_bail!(ComputeError: "Deserializing type {other:?} from parquet" - ))) + ) }, }, }) @@ -474,7 +474,7 @@ fn dict_read<'a, K: DictionaryKey, I: 'a + Pages>( data_type: DataType, num_rows: usize, chunk_size: Option, -) -> Result> { +) -> PolarsResult> { use DataType::*; let values_data_type = if let Dictionary(_, v, _) = &data_type { v.as_ref() @@ -583,9 +583,9 @@ fn dict_read<'a, K: DictionaryKey, I: 'a + Pages>( } */ other => { - return Err(Error::nyi(format!( + polars_bail!(ComputeError: "Reading nested dictionaries of type {other:?}" - ))) + ) }, }) } diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/primitive/integer.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/primitive/integer.rs index ac6c0bac0c1f..e300870c2682 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/primitive/integer.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/primitive/integer.rs @@ -7,13 +7,13 @@ use parquet2::encoding::Encoding; use parquet2::page::{split_buffer, DataPage, DictPage}; use parquet2::schema::Repetition; use parquet2::types::NativeType as ParquetNativeType; +use polars_error::{PolarsResult, to_compute_err}; use super::super::{utils, Pages}; use super::basic::{finish, PrimitiveDecoder, State as PrimitiveState}; use crate::array::MutablePrimitiveArray; use crate::bitmap::MutableBitmap; use crate::datatypes::DataType; -use crate::error::{Error, Result}; use crate::io::parquet::read::deserialize::utils::{ get_selected_rows, FilteredOptionalPageValidity, OptionalPageValidity, }; @@ -80,7 +80,7 @@ where type Dict = Vec; type DecodedState = (Vec, MutableBitmap); - fn build_state(&self, page: &'a DataPage, dict: Option<&'a Self::Dict>) -> Result { + fn build_state(&self, page: &'a DataPage, dict: Option<&'a Self::Dict>) -> PolarsResult { let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional; let is_filtered = page.selected_rows().is_some(); @@ -90,7 +90,7 @@ where let (_, _, values) = split_buffer(page)?; Decoder::try_new(values) .map(State::DeltaBinaryPackedRequired) - .map_err(Error::from) + .map_err(to_compute_err) }, (Encoding::DeltaBinaryPacked, _, true, false) => { let (_, _, values) = split_buffer(page)?; @@ -239,7 +239,7 @@ where i64: num_traits::AsPrimitive

, F: Copy + Fn(P) -> T, { - type Item = Result>; + type Item = PolarsResult>; fn next(&mut self) -> Option { let maybe_state = utils::next( diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/simple.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/simple.rs index d293865e2c34..7ab2b35e0735 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/simple.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/simple.rs @@ -3,12 +3,12 @@ use parquet2::schema::types::{ PhysicalType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit, }; use parquet2::types::int96_to_i64_ns; +use polars_error::{polars_bail, PolarsResult}; use super::super::{ArrayIter, Pages}; use super::{binary, boolean, fixed_size_binary, null, primitive}; use crate::array::{Array, DictionaryKey, MutablePrimitiveArray, PrimitiveArray}; use crate::datatypes::{DataType, IntervalUnit, TimeUnit}; -use crate::error::{Error, Result}; use crate::match_integer_type; use crate::types::{days_ms, i256, NativeType}; @@ -17,26 +17,26 @@ use crate::types::{days_ms, i256, NativeType}; fn dyn_iter<'a, A, I>(iter: I) -> ArrayIter<'a> where A: Array, - I: Iterator> + Send + Sync + 'a, + I: Iterator> + Send + Sync + 'a, { Box::new(iter.map(|x| x.map(|x| Box::new(x) as Box))) } /// Converts an iterator of [MutablePrimitiveArray] into an iterator of [PrimitiveArray] #[inline] -fn iden(iter: I) -> impl Iterator>> +fn iden(iter: I) -> impl Iterator>> where T: NativeType, - I: Iterator>>, + I: Iterator>>, { iter.map(|x| x.map(|x| x.into())) } #[inline] -fn op(iter: I, op: F) -> impl Iterator>> +fn op(iter: I, op: F) -> impl Iterator>> where T: NativeType, - I: Iterator>>, + I: Iterator>>, F: Fn(T) -> T + Copy, { iter.map(move |x| { @@ -55,7 +55,7 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( data_type: DataType, chunk_size: Option, num_rows: usize, -) -> Result> { +) -> PolarsResult> { use DataType::*; let physical_type = &type_.physical_type; @@ -191,9 +191,9 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( |x: i64| x as i128, ))), (PhysicalType::FixedLenByteArray(n), Decimal(_, _)) if *n > 16 => { - return Err(Error::NotYetImplemented(format!( - "Can't decode Decimal128 type from Fixed Size Byte Array of len {n:?}" - ))) + polars_bail!(ComputeError: + "not implemented: can't decode Decimal128 type from Fixed Size Byte Array of len {n:?}" + ) }, (PhysicalType::FixedLenByteArray(n), Decimal(_, _)) => { let n = *n; @@ -288,9 +288,9 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( Box::new(arrays) as _ }, (PhysicalType::FixedLenByteArray(n), Decimal256(_, _)) if *n > 32 => { - return Err(Error::NotYetImplemented(format!( + polars_bail!(ComputeError: "Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}" - ))) + ) }, (PhysicalType::Int32, Date64) => dyn_iter(iden(primitive::IntegerIter::new( pages, @@ -344,9 +344,9 @@ pub fn page_iter_to_arrays<'a, I: Pages + 'a>( }) }, (from, to) => { - return Err(Error::NotYetImplemented(format!( - "Reading parquet type {from:?} to {to:?} still not implemented" - ))) + polars_bail!(ComputeError: + "not implemented: reading parquet type {from:?} to {to:?} still not implemented" + ) }, }) } @@ -428,7 +428,7 @@ fn timestamp<'a, I: Pages + 'a>( num_rows: usize, chunk_size: Option, time_unit: TimeUnit, -) -> Result> { +) -> PolarsResult> { if physical_type == &PhysicalType::Int96 { return match time_unit { TimeUnit::Nanosecond => Ok(dyn_iter(iden(primitive::Iter::new( @@ -463,9 +463,9 @@ fn timestamp<'a, I: Pages + 'a>( }; if physical_type != &PhysicalType::Int64 { - return Err(Error::nyi( - "Can't decode a timestamp from a non-int64 parquet type", - )); + polars_bail!(ComputeError: + "not implemented: can't decode a timestamp from a non-int64 parquet type", + ); } let iter = primitive::IntegerIter::new(pages, data_type, num_rows, chunk_size, |x: i64| x); @@ -485,7 +485,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: Pages + 'a>( num_rows: usize, chunk_size: Option, time_unit: TimeUnit, -) -> Result> { +) -> PolarsResult> { if physical_type == &PhysicalType::Int96 { let logical_type = PrimitiveLogicalType::Timestamp { unit: ParquetTimeUnit::Nanoseconds, @@ -536,7 +536,7 @@ fn dict_read<'a, K: DictionaryKey, I: Pages + 'a>( data_type: DataType, num_rows: usize, chunk_size: Option, -) -> Result> { +) -> PolarsResult> { use DataType::*; let values_data_type = if let Dictionary(_, v, _) = &data_type { v.as_ref() @@ -644,9 +644,9 @@ fn dict_read<'a, K: DictionaryKey, I: Pages + 'a>( fixed_size_binary::DictIter::::new(iter, data_type, num_rows, chunk_size), ), other => { - return Err(Error::nyi(format!( - "Reading dictionaries of type {other:?}" - ))) + polars_bail!(ComputeError: + "not implemented: reading dictionaries of type {other:?}" + ) }, }) } diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/struct_.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/struct_.rs index 947e7f1141e5..c6f7e6c373a2 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/struct_.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/struct_.rs @@ -1,7 +1,7 @@ +use polars_error::{PolarsError, PolarsResult}; use super::nested_utils::{NestedArrayIter, NestedState}; use crate::array::{Array, StructArray}; use crate::datatypes::{DataType, Field}; -use crate::error::Error; /// An iterator adapter over [`NestedArrayIter`] assumed to be encoded as Struct arrays pub struct StructIterator<'a> { @@ -18,7 +18,7 @@ impl<'a> StructIterator<'a> { } impl<'a> Iterator for StructIterator<'a> { - type Item = Result<(NestedState, Box), Error>; + type Item = PolarsResult<(NestedState, Box)>; fn next(&mut self) -> Option { let values = self diff --git a/crates/nano-arrow/src/io/parquet/read/schema/metadata.rs b/crates/nano-arrow/src/io/parquet/read/schema/metadata.rs index 574ff08d1fd5..ec1efe4f5376 100644 --- a/crates/nano-arrow/src/io/parquet/read/schema/metadata.rs +++ b/crates/nano-arrow/src/io/parquet/read/schema/metadata.rs @@ -1,16 +1,16 @@ use base64::engine::general_purpose; use base64::Engine as _; pub use parquet2::metadata::KeyValue; +use polars_error::{polars_bail, PolarsResult}; use super::super::super::ARROW_SCHEMA_META_KEY; use crate::datatypes::{Metadata, Schema}; -use crate::error::{Error, Result}; use crate::io::ipc::read::deserialize_schema; /// Reads an arrow schema from Parquet's file metadata. Returns `None` if no schema was found. /// # Errors /// Errors iff the schema cannot be correctly parsed. -pub fn read_schema_from_metadata(metadata: &mut Metadata) -> Result> { +pub fn read_schema_from_metadata(metadata: &mut Metadata) -> PolarsResult> { metadata .remove(ARROW_SCHEMA_META_KEY) .map(|encoded| get_arrow_schema_from_metadata(&encoded)) @@ -18,7 +18,7 @@ pub fn read_schema_from_metadata(metadata: &mut Metadata) -> Result Result { +fn get_arrow_schema_from_metadata(encoded_meta: &str) -> PolarsResult { let decoded = general_purpose::STANDARD.decode(encoded_meta); match decoded { Ok(bytes) => { @@ -31,9 +31,9 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result { }, Err(err) => { // The C++ implementation returns an error if the schema can't be parsed. - Err(Error::InvalidArgumentError(format!( - "Unable to decode the encoded schema stored in {ARROW_SCHEMA_META_KEY}, {err:?}" - ))) + polars_bail!(InvalidOperation: + "unable to decode the encoded schema stored in {ARROW_SCHEMA_META_KEY}, {err:?}" + ) }, } } diff --git a/crates/nano-arrow/src/io/parquet/read/statistics/map.rs b/crates/nano-arrow/src/io/parquet/read/statistics/map.rs index d6b2a73388f5..6617af5f2526 100644 --- a/crates/nano-arrow/src/io/parquet/read/statistics/map.rs +++ b/crates/nano-arrow/src/io/parquet/read/statistics/map.rs @@ -1,7 +1,7 @@ +use polars_error::PolarsResult; use super::make_mutable; use crate::array::{Array, MapArray, MutableArray}; use crate::datatypes::DataType; -use crate::error::Error; #[derive(Debug)] pub struct DynMutableMapArray { @@ -10,7 +10,7 @@ pub struct DynMutableMapArray { } impl DynMutableMapArray { - pub fn try_with_capacity(data_type: DataType, capacity: usize) -> Result { + pub fn try_with_capacity(data_type: DataType, capacity: usize) -> PolarsResult { let inner = match data_type.to_logical_type() { DataType::Map(inner, _) => inner, _ => unreachable!(), diff --git a/crates/nano-arrow/src/io/parquet/read/statistics/mod.rs b/crates/nano-arrow/src/io/parquet/read/statistics/mod.rs index f63f090f93ee..a3e3093f0c01 100644 --- a/crates/nano-arrow/src/io/parquet/read/statistics/mod.rs +++ b/crates/nano-arrow/src/io/parquet/read/statistics/mod.rs @@ -12,10 +12,10 @@ use parquet2::statistics::{ Statistics as ParquetStatistics, }; use parquet2::types::int96_to_i64_ns; +use polars_error::{polars_bail, PolarsResult}; use crate::array::*; use crate::datatypes::{DataType, Field, IntervalUnit, PhysicalType}; -use crate::error::{Error, Result}; use crate::types::i256; use crate::with_match_primitive_type; @@ -154,7 +154,7 @@ impl From for Statistics { } } -fn make_mutable(data_type: &DataType, capacity: usize) -> Result> { +fn make_mutable(data_type: &DataType, capacity: usize) -> PolarsResult> { Ok(match data_type.to_physical_type() { PhysicalType::Boolean => { Box::new(MutableBooleanArray::with_capacity(capacity)) as Box @@ -197,9 +197,9 @@ fn make_mutable(data_type: &DataType, capacity: usize) -> Result }, other => { - return Err(Error::NotYetImplemented(format!( - "Deserializing parquet stats from {other:?} is still not implemented" - ))) + polars_bail!(nyi = + "deserializing parquet stats from {other:?} is still not implemented" + ) }, }) } @@ -235,7 +235,7 @@ fn create_dt(data_type: &DataType) -> DataType { } impl MutableStatistics { - fn try_new(field: &Field) -> Result { + fn try_new(field: &Field) -> PolarsResult { let min_value = make_mutable(&field.data_type, 0)?; let max_value = make_mutable(&field.data_type, 0)?; @@ -321,7 +321,7 @@ fn push( max: &mut dyn MutableArray, distinct_count: &mut dyn MutableArray, null_count: &mut dyn MutableArray, -) -> Result<()> { +) -> PolarsResult<()> { match min.data_type().to_logical_type() { List(_) | LargeList(_) => { let min = min @@ -453,9 +453,9 @@ fn push( // some implementations of parquet write arrow's u32 into i64. ParquetPhysicalType::Int64 => primitive::push(from, min, max, |x: i64| Ok(x as u32)), ParquetPhysicalType::Int32 => primitive::push(from, min, max, |x: i32| Ok(x as u32)), - other => Err(Error::NotYetImplemented(format!( + other => polars_bail!(nyi = "Can't decode UInt32 type from parquet type {other:?}" - ))), + ), }, Int32 => primitive::push::(from, min, max, Ok), Date64 => match physical_type { @@ -464,9 +464,9 @@ fn push( ParquetPhysicalType::Int32 => { primitive::push(from, min, max, |x: i32| Ok(x as i64 * 86400000)) }, - other => Err(Error::NotYetImplemented(format!( + other => polars_bail!(nyi = "Can't decode Date64 type from parquet type {other:?}" - ))), + ), }, Int64 | Time64(_) | Duration(_) => primitive::push::(from, min, max, Ok), UInt64 => primitive::push(from, min, max, |x: i64| Ok(x as u64)), @@ -513,9 +513,9 @@ fn push( Decimal(_, _) => match physical_type { ParquetPhysicalType::Int32 => primitive::push(from, min, max, |x: i32| Ok(x as i128)), ParquetPhysicalType::Int64 => primitive::push(from, min, max, |x: i64| Ok(x as i128)), - ParquetPhysicalType::FixedLenByteArray(n) if *n > 16 => Err(Error::NotYetImplemented( - format!("Can't decode Decimal128 type from Fixed Size Byte Array of len {n:?}"), - )), + ParquetPhysicalType::FixedLenByteArray(n) if *n > 16 => polars_bail!(nyi= + "Can't decode Decimal128 type from Fixed Size Byte Array of len {n:?}" + ), ParquetPhysicalType::FixedLenByteArray(n) => fixlen::push_i128(from, *n, min, max), _ => unreachable!(), }, @@ -529,9 +529,9 @@ fn push( ParquetPhysicalType::FixedLenByteArray(n) if *n <= 16 => { fixlen::push_i256_with_i128(from, *n, min, max) }, - ParquetPhysicalType::FixedLenByteArray(n) if *n > 32 => Err(Error::NotYetImplemented( - format!("Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}"), - )), + ParquetPhysicalType::FixedLenByteArray(n) if *n > 32 => polars_bail!( + nyi = "Can't decode Decimal256 type from Fixed Size Byte Array of len {n:?}" + ), ParquetPhysicalType::FixedLenByteArray(_) => fixlen::push_i256(from, min, max), _ => unreachable!(), }, @@ -550,7 +550,7 @@ fn push( /// /// # Errors /// This function errors if the deserialization of the statistics fails (e.g. invalid utf8) -pub fn deserialize(field: &Field, row_group: &RowGroupMetaData) -> Result { +pub fn deserialize(field: &Field, row_group: &RowGroupMetaData) -> PolarsResult { let mut statistics = MutableStatistics::try_new(field)?; let columns = get_field_columns(row_group.columns(), field.name.as_ref()); @@ -562,7 +562,7 @@ pub fn deserialize(field: &Field, row_group: &RowGroupMetaData) -> Result, ParquetPrimitiveType)>>>()?; + .collect::, ParquetPrimitiveType)>>>()?; push( &mut stats, statistics.min_value.as_mut(), diff --git a/crates/nano-arrow/src/io/parquet/read/statistics/utf8.rs b/crates/nano-arrow/src/io/parquet/read/statistics/utf8.rs index da9fcb6e1119..cf5f1dd15318 100644 --- a/crates/nano-arrow/src/io/parquet/read/statistics/utf8.rs +++ b/crates/nano-arrow/src/io/parquet/read/statistics/utf8.rs @@ -1,14 +1,14 @@ use parquet2::statistics::{BinaryStatistics, Statistics as ParquetStatistics}; +use polars_error::PolarsResult; use crate::array::{MutableArray, MutableUtf8Array}; -use crate::error::Result; use crate::offset::Offset; pub(super) fn push( from: Option<&dyn ParquetStatistics>, min: &mut dyn MutableArray, max: &mut dyn MutableArray, -) -> Result<()> { +) -> PolarsResult<()> { let min = min .as_mut_any() .downcast_mut::>() diff --git a/crates/nano-arrow/src/io/parquet/write/binary/basic.rs b/crates/nano-arrow/src/io/parquet/write/binary/basic.rs index de840e45fa5a..7da923802d92 100644 --- a/crates/nano-arrow/src/io/parquet/write/binary/basic.rs +++ b/crates/nano-arrow/src/io/parquet/write/binary/basic.rs @@ -2,11 +2,11 @@ use parquet2::encoding::{delta_bitpacked, Encoding}; use parquet2::page::DataPage; use parquet2::schema::types::PrimitiveType; use parquet2::statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics}; +use polars_error::{polars_bail, PolarsResult}; use super::super::{utils, WriteOptions}; use crate::array::{Array, BinaryArray}; use crate::bitmap::Bitmap; -use crate::error::{Error, Result}; use crate::io::parquet::read::schema::is_nullable; use crate::offset::Offset; @@ -40,7 +40,7 @@ pub fn array_to_page( options: WriteOptions, type_: PrimitiveType, encoding: Encoding, -) -> Result { +) -> PolarsResult { let validity = array.validity(); let is_optional = is_nullable(&type_.field_info); @@ -65,11 +65,11 @@ pub fn array_to_page( &mut buffer, ), _ => { - return Err(Error::InvalidArgumentError(format!( + polars_bail!(InvalidOperation: "Datatype {:?} cannot be encoded by {:?} encoding", array.data_type(), encoding - ))) + ) }, } diff --git a/crates/nano-arrow/src/io/parquet/write/dictionary.rs b/crates/nano-arrow/src/io/parquet/write/dictionary.rs index 4ee0a5c37eac..61f180e88b0a 100644 --- a/crates/nano-arrow/src/io/parquet/write/dictionary.rs +++ b/crates/nano-arrow/src/io/parquet/write/dictionary.rs @@ -4,6 +4,7 @@ use parquet2::page::{DictPage, Page}; use parquet2::schema::types::PrimitiveType; use parquet2::statistics::{serialize_statistics, ParquetStatistics}; use parquet2::write::DynIter; +use polars_error::{polars_bail, PolarsResult}; use super::binary::{ build_statistics as binary_build_statistics, encode_plain as binary_encode_plain, @@ -19,7 +20,6 @@ use super::{nested, Nested, WriteOptions}; use crate::array::{Array, DictionaryArray, DictionaryKey}; use crate::bitmap::{Bitmap, MutableBitmap}; use crate::datatypes::DataType; -use crate::error::{Error, Result}; use crate::io::parquet::read::schema::is_nullable; use crate::io::parquet::write::{slice_nested_leaf, utils}; @@ -29,7 +29,7 @@ fn serialize_def_levels_simple( is_optional: bool, options: WriteOptions, buffer: &mut Vec, -) -> Result<()> { +) -> PolarsResult<()> { utils::write_def_levels(buffer, is_optional, validity, length, options.version) } @@ -37,7 +37,7 @@ fn serialize_keys_values( array: &DictionaryArray, validity: Option<&Bitmap>, buffer: &mut Vec, -) -> Result<()> { +) -> PolarsResult<()> { let keys = array.keys_values_iter().map(|x| x as u32); if let Some(validity) = validity { // discard indices whose values are null. @@ -72,7 +72,7 @@ fn serialize_levels( nested: &[Nested], options: WriteOptions, buffer: &mut Vec, -) -> Result<(usize, usize)> { +) -> PolarsResult<(usize, usize)> { if nested.len() == 1 { let is_optional = is_nullable(&type_.field_info); serialize_def_levels_simple(validity, length, is_optional, options, buffer)?; @@ -103,7 +103,7 @@ fn serialize_keys( nested: &[Nested], statistics: Option, options: WriteOptions, -) -> Result { +) -> PolarsResult { let mut buffer = vec![]; // parquet only accepts a single validity - we "&" the validities into a single one @@ -175,7 +175,7 @@ pub fn array_to_pages( nested: &[Nested], options: WriteOptions, encoding: Encoding, -) -> Result>> { +) -> PolarsResult>> { match encoding { Encoding::PlainDictionary | Encoding::RleDictionary => { // write DictPage @@ -261,9 +261,9 @@ pub fn array_to_pages( (DictPage::new(buffer, array.len(), false), stats) }, other => { - return Err(Error::NotYetImplemented(format!( + polars_bail!(nyi = "Writing dictionary arrays to parquet only support data type {other:?}" - ))) + ) }, }; let dict_page = Page::Dict(dict_page); @@ -274,8 +274,8 @@ pub fn array_to_pages( let iter = std::iter::once(Ok(dict_page)).chain(std::iter::once(Ok(data_page))); Ok(DynIter::new(Box::new(iter))) }, - _ => Err(Error::NotYetImplemented( - "Dictionary arrays only support dictionary encoding".to_string(), - )), + _ => polars_bail!(nyi = + "Dictionary arrays only support dictionary encoding" + ) } } diff --git a/crates/nano-arrow/src/io/parquet/write/mod.rs b/crates/nano-arrow/src/io/parquet/write/mod.rs index 2807ca25f144..52814a2b1157 100644 --- a/crates/nano-arrow/src/io/parquet/write/mod.rs +++ b/crates/nano-arrow/src/io/parquet/write/mod.rs @@ -45,7 +45,6 @@ pub use utils::write_def_levels; use crate::array::*; use crate::datatypes::*; -use crate::error::{Error, Result}; use crate::types::{days_ms, i256, NativeType}; /// Currently supported options to write to parquet @@ -63,6 +62,7 @@ pub struct WriteOptions { pub use file::FileWriter; pub use pages::{array_to_columns, Nested}; +use polars_error::{polars_bail, PolarsResult}; pub use row_group::{row_group_iter, RowGroupIterator}; pub use schema::to_parquet_type; pub use sink::FileSink; @@ -106,12 +106,12 @@ fn decimal_length_from_precision(precision: usize) -> usize { } /// Creates a parquet [`SchemaDescriptor`] from a [`Schema`]. -pub fn to_parquet_schema(schema: &Schema) -> Result { +pub fn to_parquet_schema(schema: &Schema) -> PolarsResult { let parquet_types = schema .fields .iter() .map(to_parquet_type) - .collect::>>()?; + .collect::>>()?; Ok(SchemaDescriptor::new("root".to_string(), parquet_types)) } @@ -219,7 +219,7 @@ pub fn array_to_pages( nested: &[Nested], options: WriteOptions, encoding: Encoding, -) -> Result>> { +) -> PolarsResult>> { if let DataType::Dictionary(key_type, _, _) = primitive_array.data_type().to_logical_type() { return match_integer_type!(key_type, |$T| { dictionary::array_to_pages::<$T>( @@ -283,7 +283,7 @@ pub fn array_to_page( nested: &[Nested], options: WriteOptions, encoding: Encoding, -) -> Result { +) -> PolarsResult { if nested.len() == 1 { // special case where validity == def levels return array_to_page_simple(array, type_, options, encoding); @@ -297,12 +297,12 @@ pub fn array_to_page_simple( type_: ParquetPrimitiveType, options: WriteOptions, encoding: Encoding, -) -> Result { +) -> PolarsResult { let data_type = array.data_type(); if !can_encode(data_type, encoding) { - return Err(Error::InvalidArgumentError(format!( + polars_bail!(InvalidOperation: "The datatype {data_type:?} cannot be encoded by {encoding:?}" - ))); + ) } match data_type.to_logical_type() { @@ -589,9 +589,9 @@ pub fn array_to_page_simple( fixed_len_bytes::array_to_page(&array, options, type_, statistics) } }, - other => Err(Error::NotYetImplemented(format!( + other => polars_bail!(nyi = "Writing parquet pages for data type {other:?}" - ))), + ) } .map(Page::Data) } @@ -602,7 +602,7 @@ fn array_to_page_nested( nested: &[Nested], options: WriteOptions, _encoding: Encoding, -) -> Result { +) -> PolarsResult { use DataType::*; match array.data_type().to_logical_type() { Null => { @@ -800,9 +800,9 @@ fn array_to_page_nested( fixed_len_bytes::array_to_page(&array, options, type_, statistics) } }, - other => Err(Error::NotYetImplemented(format!( + other => polars_bail!(nyi = "Writing nested parquet pages for data type {other:?}" - ))), + ) } .map(Page::Data) } diff --git a/crates/nano-arrow/src/io/parquet/write/pages.rs b/crates/nano-arrow/src/io/parquet/write/pages.rs index ce51bcdcda89..417563bac57f 100644 --- a/crates/nano-arrow/src/io/parquet/write/pages.rs +++ b/crates/nano-arrow/src/io/parquet/write/pages.rs @@ -3,12 +3,12 @@ use std::fmt::Debug; use parquet2::page::Page; use parquet2::schema::types::{ParquetType, PrimitiveType as ParquetPrimitiveType}; use parquet2::write::DynIter; +use polars_error::{polars_bail, PolarsResult}; use super::{array_to_pages, Encoding, WriteOptions}; use crate::array::{Array, ListArray, MapArray, StructArray}; use crate::bitmap::Bitmap; use crate::datatypes::PhysicalType; -use crate::error::{Error, Result}; use crate::io::parquet::read::schema::is_nullable; use crate::offset::{Offset, OffsetsBuffer}; @@ -56,7 +56,7 @@ impl Nested { } /// Constructs the necessary `Vec>` to write the rep and def levels of `array` to parquet -pub fn to_nested(array: &dyn Array, type_: &ParquetType) -> Result>> { +pub fn to_nested(array: &dyn Array, type_: &ParquetType) -> PolarsResult>> { let mut nested = vec![]; to_nested_recursive(array, type_, &mut nested, vec![])?; @@ -68,7 +68,7 @@ fn to_nested_recursive( type_: &ParquetType, nested: &mut Vec>, mut parents: Vec, -) -> Result<()> { +) -> PolarsResult<()> { let is_optional = is_nullable(type_.get_field_info()); use PhysicalType::*; @@ -78,9 +78,9 @@ fn to_nested_recursive( let fields = if let ParquetType::GroupType { fields, .. } = type_ { fields } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a struct array".to_string(), - )); + ) }; parents.push(Nested::Struct( @@ -99,14 +99,14 @@ fn to_nested_recursive( if let ParquetType::GroupType { fields, .. } = &fields[0] { &fields[0] } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a list array".to_string(), - )); + ) } } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a list array".to_string(), - )); + ) }; parents.push(Nested::List(ListNested::new( @@ -122,14 +122,14 @@ fn to_nested_recursive( if let ParquetType::GroupType { fields, .. } = &fields[0] { &fields[0] } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a list array".to_string(), - )); + ) } } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a list array".to_string(), - )); + ) }; parents.push(Nested::LargeList(ListNested::new( @@ -145,14 +145,14 @@ fn to_nested_recursive( if let ParquetType::GroupType { fields, .. } = &fields[0] { &fields[0] } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a map array".to_string(), - )); + ) } } else { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet type must be a group for a map array".to_string(), - )); + ) }; parents.push(Nested::List(ListNested::new( @@ -233,7 +233,7 @@ pub fn array_to_columns + Send + Sync>( type_: ParquetType, options: WriteOptions, encoding: &[Encoding], -) -> Result>>> { +) -> PolarsResult>>> { let array = array.as_ref(); let nested = to_nested(array, &type_)?; diff --git a/crates/nano-arrow/src/io/parquet/write/primitive/basic.rs b/crates/nano-arrow/src/io/parquet/write/primitive/basic.rs index 14d5f9077b49..2982e4911dc3 100644 --- a/crates/nano-arrow/src/io/parquet/write/primitive/basic.rs +++ b/crates/nano-arrow/src/io/parquet/write/primitive/basic.rs @@ -4,10 +4,10 @@ use parquet2::page::DataPage; use parquet2::schema::types::PrimitiveType; use parquet2::statistics::{serialize_statistics, PrimitiveStatistics}; use parquet2::types::NativeType as ParquetNativeType; +use polars_error::{polars_bail, PolarsResult}; use super::super::{utils, WriteOptions}; use crate::array::{Array, PrimitiveArray}; -use crate::error::Error; use crate::io::parquet::read::schema::is_nullable; use crate::io::parquet::write::utils::ExactSizedIter; use crate::types::NativeType; @@ -78,7 +78,7 @@ pub fn array_to_page_plain( array: &PrimitiveArray, options: WriteOptions, type_: PrimitiveType, -) -> Result +) -> PolarsResult where T: NativeType, P: ParquetNativeType, @@ -92,7 +92,7 @@ pub fn array_to_page_integer( options: WriteOptions, type_: PrimitiveType, encoding: Encoding, -) -> Result +) -> PolarsResult where T: NativeType, P: ParquetNativeType, @@ -102,7 +102,7 @@ where match encoding { Encoding::DeltaBinaryPacked => array_to_page(array, options, type_, encoding, encode_delta), Encoding::Plain => array_to_page(array, options, type_, encoding, encode_plain), - other => Err(Error::nyi(format!("Encoding integer as {other:?}"))), + other => polars_bail!(nyi = "Encoding integer as {other:?}"), } } @@ -112,7 +112,7 @@ pub fn array_to_page, bool, Vec) -> Vec>( type_: PrimitiveType, encoding: Encoding, encode: F, -) -> Result +) -> PolarsResult where T: NativeType, P: ParquetNativeType, diff --git a/crates/nano-arrow/src/io/parquet/write/row_group.rs b/crates/nano-arrow/src/io/parquet/write/row_group.rs index d281b63cebda..4af95d42778f 100644 --- a/crates/nano-arrow/src/io/parquet/write/row_group.rs +++ b/crates/nano-arrow/src/io/parquet/write/row_group.rs @@ -2,6 +2,7 @@ use parquet2::error::Error as ParquetError; use parquet2::schema::types::ParquetType; use parquet2::write::Compressor; use parquet2::FallibleStreamingIterator; +use polars_error::{polars_bail, PolarsError, PolarsResult, to_compute_err}; use super::{ array_to_columns, to_parquet_schema, DynIter, DynStreamingIterator, Encoding, RowGroupIter, @@ -10,7 +11,6 @@ use super::{ use crate::array::Array; use crate::chunk::Chunk; use crate::datatypes::Schema; -use crate::error::{Error, Result}; /// Maps a [`Chunk`] and parquet-specific options to an [`RowGroupIter`] used to /// write to parquet @@ -23,7 +23,7 @@ pub fn row_group_iter + 'static + Send + Sync>( encodings: Vec>, fields: Vec, options: WriteOptions, -) -> RowGroupIter<'static, Error> { +) -> RowGroupIter<'static, PolarsError> { assert_eq!(encodings.len(), fields.len()); assert_eq!(encodings.len(), chunk.arrays().len()); DynIter::new( @@ -46,7 +46,7 @@ pub fn row_group_iter + 'static + Send + Sync>( ); let compressed_pages = Compressor::new(pages, options.compression, vec![]) - .map_err(Error::from); + .map_err(to_compute_err); Ok(DynStreamingIterator::new(compressed_pages)) }) .collect::>() @@ -57,14 +57,14 @@ pub fn row_group_iter + 'static + Send + Sync>( /// An iterator adapter that converts an iterator over [`Chunk`] into an iterator /// of row groups. /// Use it to create an iterator consumable by the parquet's API. -pub struct RowGroupIterator + 'static, I: Iterator>>> { +pub struct RowGroupIterator + 'static, I: Iterator>>> { iter: I, options: WriteOptions, parquet_schema: SchemaDescriptor, encodings: Vec>, } -impl + 'static, I: Iterator>>> RowGroupIterator { +impl + 'static, I: Iterator>>> RowGroupIterator { /// Creates a new [`RowGroupIterator`] from an iterator over [`Chunk`]. /// /// # Errors @@ -76,11 +76,11 @@ impl + 'static, I: Iterator>>> RowGro schema: &Schema, options: WriteOptions, encodings: Vec>, - ) -> Result { + ) -> PolarsResult { if encodings.len() != schema.fields.len() { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "The number of encodings must equal the number of fields".to_string(), - )); + ) } let parquet_schema = to_parquet_schema(schema)?; @@ -98,10 +98,10 @@ impl + 'static, I: Iterator>>> RowGro } } -impl + 'static + Send + Sync, I: Iterator>>> Iterator +impl + 'static + Send + Sync, I: Iterator>>> Iterator for RowGroupIterator { - type Item = Result>; + type Item = PolarsResult>; fn next(&mut self) -> Option { let options = self.options; @@ -109,10 +109,9 @@ impl + 'static + Send + Sync, I: Iterator KeyValue { } /// Creates a [`ParquetType`] from a [`Field`]. -pub fn to_parquet_type(field: &Field) -> Result { +pub fn to_parquet_type(field: &Field) -> PolarsResult { let name = field.name.clone(); let repetition = if field.is_nullable { Repetition::Optional @@ -241,15 +241,15 @@ pub fn to_parquet_type(field: &Field) -> Result { )?), DataType::Struct(fields) => { if fields.is_empty() { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "Parquet does not support writing empty structs".to_string(), - )); + ) } // recursively convert children to types/nodes let fields = fields .iter() .map(to_parquet_type) - .collect::>>()?; + .collect::>>()?; Ok(ParquetType::from_group( name, repetition, None, None, fields, None, )) @@ -372,8 +372,8 @@ pub fn to_parquet_type(field: &Field) -> Result { )], None, )), - other => Err(Error::NotYetImplemented(format!( + other => polars_bail!(nyi = "Writing the data type {other:?} is not yet implemented" - ))), + ) } } diff --git a/crates/nano-arrow/src/io/parquet/write/sink.rs b/crates/nano-arrow/src/io/parquet/write/sink.rs index d357d7b89c2d..e3674894f5da 100644 --- a/crates/nano-arrow/src/io/parquet/write/sink.rs +++ b/crates/nano-arrow/src/io/parquet/write/sink.rs @@ -6,13 +6,13 @@ use futures::future::BoxFuture; use futures::{AsyncWrite, AsyncWriteExt, FutureExt, Sink, TryFutureExt}; use parquet2::metadata::KeyValue; use parquet2::write::{FileStreamer, WriteOptions as ParquetWriteOptions}; +use polars_error::{polars_bail, PolarsError, PolarsResult, to_compute_err}; use super::file::add_arrow_schema; use super::{Encoding, SchemaDescriptor, WriteOptions}; use crate::array::Array; use crate::chunk::Chunk; use crate::datatypes::Schema; -use crate::error::Error; /// Sink that writes array [`chunks`](Chunk) as a Parquet file. /// @@ -61,7 +61,7 @@ use crate::error::Error; /// ``` pub struct FileSink<'a, W: AsyncWrite + Send + Unpin> { writer: Option>, - task: Option>, Error>>>, + task: Option>>>>, options: WriteOptions, encodings: Vec>, schema: Schema, @@ -85,11 +85,11 @@ where schema: Schema, encodings: Vec>, options: WriteOptions, - ) -> Result { + ) -> PolarsResult { if encodings.len() != schema.fields.len() { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "The number of encodings must equal the number of fields".to_string(), - )); + ) } let parquet_schema = crate::io::parquet::write::to_parquet_schema(&schema)?; @@ -132,7 +132,7 @@ where fn poll_complete( &mut self, cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { + ) -> std::task::Poll> { if let Some(task) = &mut self.task { match futures::ready!(task.poll_unpin(cx)) { Ok(writer) => { @@ -155,14 +155,13 @@ impl<'a, W> Sink>> for FileSink<'a, W> where W: AsyncWrite + Send + Unpin + 'a, { - type Error = Error; + type Error = PolarsError; fn start_send(self: Pin<&mut Self>, item: Chunk>) -> Result<(), Self::Error> { if self.schema.fields.len() != item.arrays().len() { - return Err(Error::InvalidArgumentError( + polars_bail!(InvalidOperation: "The number of arrays in the chunk must equal the number of fields in the schema" - .to_string(), - )); + ) } let this = self.get_mut(); if let Some(mut writer) = this.writer.take() { @@ -178,10 +177,11 @@ where })); Ok(()) } else { - Err(Error::Io(std::io::Error::new( + let io_err = std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "writer closed".to_string(), - ))) + ); + Err(PolarsError::from(io_err)) } } @@ -221,8 +221,8 @@ where let kv_meta = add_arrow_schema(&this.schema, metadata); this.task = Some(Box::pin(async move { - writer.end(kv_meta).map_err(Error::from).await?; - writer.into_inner().close().map_err(Error::from).await?; + writer.end(kv_meta).map_err(to_compute_err).await?; + writer.into_inner().close().map_err(to_compute_err).await?; Ok(None) })); this.poll_complete(cx) diff --git a/crates/nano-arrow/src/io/parquet/write/utf8/basic.rs b/crates/nano-arrow/src/io/parquet/write/utf8/basic.rs index 39f9c157c988..76cf4cd73cd1 100644 --- a/crates/nano-arrow/src/io/parquet/write/utf8/basic.rs +++ b/crates/nano-arrow/src/io/parquet/write/utf8/basic.rs @@ -2,11 +2,11 @@ use parquet2::encoding::Encoding; use parquet2::page::DataPage; use parquet2::schema::types::PrimitiveType; use parquet2::statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics}; +use polars_error::{polars_bail, PolarsResult}; use super::super::binary::{encode_delta, ord_binary}; use super::super::{utils, WriteOptions}; use crate::array::{Array, Utf8Array}; -use crate::error::{Error, Result}; use crate::io::parquet::read::schema::is_nullable; use crate::offset::Offset; @@ -39,7 +39,7 @@ pub fn array_to_page( options: WriteOptions, type_: PrimitiveType, encoding: Encoding, -) -> Result { +) -> PolarsResult { let validity = array.validity(); let is_optional = is_nullable(&type_.field_info); @@ -64,11 +64,11 @@ pub fn array_to_page( &mut buffer, ), _ => { - return Err(Error::InvalidArgumentError(format!( + polars_bail!(InvalidOperation: "Datatype {:?} cannot be encoded by {:?} encoding", array.data_type(), encoding - ))) + ) }, } diff --git a/crates/nano-arrow/src/legacy/kernels/time.rs b/crates/nano-arrow/src/legacy/kernels/time.rs index 6db99ca288c6..349425c4ed6f 100644 --- a/crates/nano-arrow/src/legacy/kernels/time.rs +++ b/crates/nano-arrow/src/legacy/kernels/time.rs @@ -1,32 +1,27 @@ -use crate::error::{Error as ArrowError, Result}; use chrono::{LocalResult, NaiveDateTime, TimeZone}; use chrono_tz::Tz; +use polars_error::{polars_bail, PolarsResult}; pub fn convert_to_naive_local( from_tz: &Tz, to_tz: &Tz, ndt: NaiveDateTime, ambiguous: &str, -) -> Result { +) -> PolarsResult { let ndt = from_tz.from_utc_datetime(&ndt).naive_local(); match to_tz.from_local_datetime(&ndt) { LocalResult::Single(dt) => Ok(dt.naive_utc()), LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous { "earliest" => Ok(dt_earliest.naive_utc()), "latest" => Ok(dt_latest.naive_utc()), - "raise" => Err(ArrowError::InvalidArgumentError( - format!("datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz) - )), - ambiguous => Err(ArrowError::InvalidArgumentError( - format!("Invalid argument {}, expected one of: \"earliest\", \"latest\", \"raise\"", ambiguous) - )), + "raise" => polars_bail!(InvalidOperation: "datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz), + ambiguous => polars_bail!(InvalidOperation: + "Invalid argument {}, expected one of: \"earliest\", \"latest\", \"raise\"", ambiguous + ) }, - LocalResult::None => Err(ArrowError::InvalidArgumentError( - format!( + LocalResult::None => polars_bail!(InvalidOperation: "datetime '{}' is non-existent in time zone '{}'. Non-existent datetimes are not yet supported", ndt, to_tz - ) - , - )), + ), } } diff --git a/crates/polars-error/Cargo.toml b/crates/polars-error/Cargo.toml index 8f3ac8651c1e..6135fa69f61e 100644 --- a/crates/polars-error/Cargo.toml +++ b/crates/polars-error/Cargo.toml @@ -14,6 +14,8 @@ parquet2 = {workspace = true, optional = true, default-features = false} regex = { workspace = true, optional = true } thiserror = { workspace = true } arrow-format = "0.8.1" +simdutf8 = { workspace = true } +avro-schema = { workspace = true, optional = true } [features] python = [] diff --git a/crates/polars-error/src/lib.rs b/crates/polars-error/src/lib.rs index b33841dc0a4b..bb5d00abfa81 100644 --- a/crates/polars-error/src/lib.rs +++ b/crates/polars-error/src/lib.rs @@ -95,6 +95,26 @@ impl From for PolarsError { } } +#[cfg(feature = "avro-schema")] +impl From for PolarsError { + fn from(value: avro_schema::error::Error) -> Self { + polars_err!(ComputeError: "avro-error: {}", value) + } +} + +impl From for parquet2::error::Error { + fn from(value: PolarsError) -> Self { + // catch all needed :(. + parquet2::error::Error::OutOfSpec(format!("error: {value}")) + } +} + +impl From for PolarsError { + fn from(value: simdutf8::basic::Utf8Error) -> Self { + polars_err!(ComputeError: "invalid utf8: {}", value) + } +} + impl From for PolarsError { fn from(err: arrow_format::ipc::planus::Error) -> Self { PolarsError::Io(std::io::Error::new( @@ -184,8 +204,11 @@ macro_rules! polars_err { InvalidOperation: "{} operation not supported for dtypes `{}` and `{}`", $op, $lhs, $rhs ) }; - (oos = $arg:expr) => { - $crate::polars_err!(ComputeError: "out-of-spec: {}", $arg) + (oos = $($tt:tt)+) => { + $crate::polars_err!(ComputeError: "out-of-spec: {}", $($tt)+) + }; + (nyi = $($tt:tt)+) => { + $crate::polars_err!(ComputeError: "not yet implemented: {}", format!($($tt)+) ) }; (opq = $op:ident, $arg:expr) => { $crate::polars_err!(op = concat!("`", stringify!($op), "`"), $arg)