diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ca0d2441ceae..da5c9c043a4f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -130,6 +130,16 @@ jobs: run: | cargo update -p tokio --precise 1.29.1 cargo update -p url --precise 2.5.0 + cargo update -p once_cell --precise 1.20.3 + - name: Downgrade arrow-pyarrow-integration-testing dependencies + working-directory: arrow-pyarrow-integration-testing + # Necessary because half 2.5 requires rust 1.81 or newer + run: | + cargo update -p half --precise 2.4.0 + - name: Downgrade workspace dependencies + # Necessary because half 2.5 requires rust 1.81 or newer + run: | + cargo update -p half --precise 2.4.0 - name: Check all packages run: | # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies diff --git a/Cargo.toml b/Cargo.toml index 38a48149bae2..88348803b4ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,7 +74,7 @@ include = [ "Cargo.toml", ] edition = "2021" -rust-version = "1.70" +rust-version = "1.81" [workspace.dependencies] arrow = { version = "54.2.1", path = "./arrow", default-features = false } diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index 6eae8e24677d..ce66f8a9f96c 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -55,7 +55,7 @@ ffi = ["arrow-schema/ffi", "arrow-data/ffi"] force_validate = [] [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } criterion = { version = "0.5", default-features = false } [build-dependencies] diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs index 5270a4a5def3..f9b862f88086 100644 --- a/arrow-array/benches/fixed_size_list_array.rs +++ b/arrow-array/benches/fixed_size_list_array.rs @@ -18,13 +18,13 @@ use arrow_array::{Array, FixedSizeListArray, Int32Array}; use arrow_schema::Field; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::sync::Arc; fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray { - let mut rng = thread_rng(); + let mut rng = rng(); let values = Arc::new(Int32Array::from( - (0..len).map(|_| rng.gen::()).collect::>(), + (0..len).map(|_| rng.random::()).collect::>(), )); let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); FixedSizeListArray::new(field, value_len as i32, values, None) diff --git a/arrow-array/benches/occupancy.rs b/arrow-array/benches/occupancy.rs index ed4b94351c28..fd334b613257 100644 --- a/arrow-array/benches/occupancy.rs +++ b/arrow-array/benches/occupancy.rs @@ -19,7 +19,7 @@ use arrow_array::types::Int32Type; use arrow_array::{DictionaryArray, Int32Array}; use arrow_buffer::NullBuffer; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::sync::Arc; fn gen_dict( @@ -28,11 +28,11 @@ fn gen_dict( occupancy: f64, null_percent: f64, ) -> DictionaryArray { - let mut rng = thread_rng(); + let mut rng = rng(); let values = Int32Array::from(vec![0; values_len]); let max_key = (values_len as f64 * occupancy) as i32; - let keys = (0..len).map(|_| rng.gen_range(0..max_key)).collect(); - let nulls = (0..len).map(|_| !rng.gen_bool(null_percent)).collect(); + let keys = (0..len).map(|_| rng.random_range(0..max_key)).collect(); + let nulls = (0..len).map(|_| !rng.random_bool(null_percent)).collect(); let keys = Int32Array::new(keys, Some(NullBuffer::new(nulls))); DictionaryArray::new(keys, Arc::new(values)) diff --git a/arrow-array/benches/union_array.rs b/arrow-array/benches/union_array.rs index c5b2ec0f7752..753cc8148eef 100644 --- a/arrow-array/benches/union_array.rs +++ b/arrow-array/benches/union_array.rs @@ -24,17 +24,17 @@ use arrow_array::{Array, ArrayRef, Int32Array, UnionArray}; use arrow_buffer::{NullBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field, UnionFields}; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn array_with_nulls() -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); - let values = ScalarBuffer::from_iter(repeat_with(|| rng.gen()).take(4096)); + let values = ScalarBuffer::from_iter(repeat_with(|| rng.random()).take(4096)); // nulls with at least one null and one valid let nulls: NullBuffer = [true, false] .into_iter() - .chain(repeat_with(|| rng.gen())) + .chain(repeat_with(|| rng.random())) .take(4096) .collect(); @@ -42,9 +42,9 @@ fn array_with_nulls() -> ArrayRef { } fn array_without_nulls() -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); - let values = ScalarBuffer::from_iter(repeat_with(|| rng.gen()).take(4096)); + let values = ScalarBuffer::from_iter(repeat_with(|| rng.random()).take(4096)); Arc::new(Int32Array::new(values.clone(), None)) } diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 9c2d4af8c454..fcebf5a0f718 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -479,7 +479,7 @@ impl From for BooleanArray { mod tests { use super::*; use arrow_buffer::Buffer; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_boolean_fmt_debug() { @@ -667,11 +667,11 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] // Takes too long fn test_true_false_count() { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..10 { // No nulls - let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect(); + let d: Vec<_> = (0..2000).map(|_| rng.random_bool(0.5)).collect(); let b = BooleanArray::from(d.clone()); let expected_true = d.iter().filter(|x| **x).count(); @@ -680,7 +680,7 @@ mod tests { // With nulls let d: Vec<_> = (0..2000) - .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5))) + .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5))) .collect(); let b = BooleanArray::from(d.clone()); diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index b340bf9a9065..b305025706bc 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -662,8 +662,8 @@ where #[cfg(test)] mod tests { + use rand::rng; use rand::seq::SliceRandom; - use rand::thread_rng; use rand::Rng; use super::*; @@ -691,7 +691,7 @@ mod tests { ]; let mut result: Vec> = Vec::with_capacity(size); let mut ix = 0; - let mut rng = thread_rng(); + let mut rng = rng(); // run length can go up to 8. Cap the max run length for smaller arrays to size / 2. let max_run_length = 8_usize.min(1_usize.max(size / 2)); while result.len() < size { @@ -700,7 +700,7 @@ mod tests { seed.shuffle(&mut rng); } // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays - let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length)); + let num = max_run_length.min(rng.random_range(1..=max_run_length)); for _ in 0..num { result.push(seed[ix]); } @@ -1000,7 +1000,7 @@ mod tests { let mut logical_indices: Vec = (0_u32..(logical_len as u32)).collect(); // add same indices once more logical_indices.append(&mut logical_indices.clone()); - let mut rng = thread_rng(); + let mut rng = rng(); logical_indices.shuffle(&mut rng); let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap(); @@ -1036,7 +1036,7 @@ mod tests { let mut logical_indices: Vec = (0_u32..(slice_len as u32)).collect(); // add same indices once more logical_indices.append(&mut logical_indices.clone()); - let mut rng = thread_rng(); + let mut rng = rng(); logical_indices.shuffle(&mut rng); // test for offset = 0 and slice length = slice_len diff --git a/arrow-array/src/run_iterator.rs b/arrow-array/src/run_iterator.rs index 2922bf04dd2f..4fb0eef32eca 100644 --- a/arrow-array/src/run_iterator.rs +++ b/arrow-array/src/run_iterator.rs @@ -172,7 +172,7 @@ where #[cfg(test)] mod tests { - use rand::{seq::SliceRandom, thread_rng, Rng}; + use rand::{rng, seq::SliceRandom, Rng}; use crate::{ array::{Int32Array, StringArray}, @@ -200,7 +200,7 @@ mod tests { ]; let mut result: Vec> = Vec::with_capacity(size); let mut ix = 0; - let mut rng = thread_rng(); + let mut rng = rng(); // run length can go up to 8. Cap the max run length for smaller arrays to size / 2. let max_run_length = 8_usize.min(1_usize.max(size / 2)); while result.len() < size { @@ -209,7 +209,7 @@ mod tests { seed.shuffle(&mut rng); } // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays - let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length)); + let num = max_run_length.min(rng.random_range(1..=max_run_length)); for _ in 0..num { result.push(seed[ix]); } diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index c103c2ecc0f3..b1391559f292 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -51,5 +51,4 @@ crc = { version = "3.0", optional = true } [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } - +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml index 68bfe8ddf732..69029759dee1 100644 --- a/arrow-buffer/Cargo.toml +++ b/arrow-buffer/Cargo.toml @@ -40,7 +40,7 @@ half = { version = "2.1", default-features = false } [dev-dependencies] criterion = { version = "0.5", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } [build-dependencies] diff --git a/arrow-buffer/benches/i256.rs b/arrow-buffer/benches/i256.rs index ebb45e793bd0..f94ca6951ef9 100644 --- a/arrow-buffer/benches/i256.rs +++ b/arrow-buffer/benches/i256.rs @@ -47,8 +47,8 @@ fn criterion_benchmark(c: &mut Criterion) { let numerators: Vec<_> = (0..SIZE) .map(|_| { - let high = rng.gen_range(1000..i128::MAX); - let low = rng.gen(); + let high = rng.random_range(1000..i128::MAX); + let low = rng.random(); i256::from_parts(low, high) }) .collect(); @@ -56,7 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { let divisors: Vec<_> = numerators .iter() .map(|n| { - let quotient = rng.gen_range(1..100_i32); + let quotient = rng.random_range(1..100_i32); n.wrapping_div(i256::from(quotient)) }) .collect(); @@ -70,7 +70,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); let divisors: Vec<_> = (0..SIZE) - .map(|_| i256::from(rng.gen_range(1..100_i32))) + .map(|_| i256::from(rng.random_range(1..100_i32))) .collect(); c.bench_function("i256_div_rem small divisor", |b| { diff --git a/arrow-buffer/benches/offset.rs b/arrow-buffer/benches/offset.rs index 1aea5024fbd1..12c00a60c53a 100644 --- a/arrow-buffer/benches/offset.rs +++ b/arrow-buffer/benches/offset.rs @@ -24,7 +24,7 @@ const SIZE: usize = 1024; fn criterion_benchmark(c: &mut Criterion) { let mut rng = StdRng::seed_from_u64(42); - let lengths: Vec = black_box((0..SIZE).map(|_| rng.gen_range(0..40)).collect()); + let lengths: Vec = black_box((0..SIZE).map(|_| rng.random_range(0..40)).collect()); c.bench_function("OffsetBuffer::from_lengths", |b| { b.iter(|| OffsetBuffer::::from_lengths(lengths.iter().copied())); diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs index 41146f0ad11d..9868ab55cc11 100644 --- a/arrow-buffer/src/bigint/mod.rs +++ b/arrow-buffer/src/bigint/mod.rs @@ -840,7 +840,7 @@ impl ToPrimitive for i256 { mod tests { use super::*; use num::Signed; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_signed_cmp() { @@ -1091,16 +1091,16 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn test_i256_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..1000 { let mut l = [0_u8; 32]; - let len = rng.gen_range(0..32); - l.iter_mut().take(len).for_each(|x| *x = rng.gen()); + let len = rng.random_range(0..32); + l.iter_mut().take(len).for_each(|x| *x = rng.random()); let mut r = [0_u8; 32]; - let len = rng.gen_range(0..32); - r.iter_mut().take(len).for_each(|x| *x = rng.gen()); + let len = rng.random_range(0..32); + r.iter_mut().take(len).for_each(|x| *x = rng.random()); test_ops(i256::from_le_bytes(l), i256::from_le_bytes(r)) } diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs index 83d64ab8d8b3..bdcc3a55dbf2 100644 --- a/arrow-buffer/src/builder/boolean.rs +++ b/arrow-buffer/src/builder/boolean.rs @@ -416,7 +416,7 @@ mod tests { let mut buffer = BooleanBufferBuilder::new(12); let mut all_bools = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let src_len = 32; let (src, compacted_src) = { diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs index 54995314c49b..0562c7ad5998 100644 --- a/arrow-buffer/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -371,7 +371,10 @@ impl ExactSizeIterator for BitChunkIterator<'_> { #[cfg(test)] mod tests { + use rand::distr::uniform::UniformSampler; + use rand::distr::uniform::UniformUsize; use rand::prelude::*; + use rand::rng; use crate::buffer::Buffer; use crate::util::bit_chunk_iterator::UnalignedBitChunk; @@ -624,21 +627,25 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_unaligned_bit_chunk_iterator() { - let mut rng = thread_rng(); + let mut rng = rng(); + let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); for _ in 0..100 { - let mask_len = rng.gen_range(0..1024); - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.gen())) + let mask_len = rng.random_range(0..1024); + let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random())) .take(mask_len) .collect(); let buffer = Buffer::from_iter(bools.iter().cloned()); let max_offset = 64.min(mask_len); - let offset = rng.gen::().checked_rem(max_offset).unwrap_or(0); + let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0); let max_truncate = 128.min(mask_len - offset); - let truncate = rng.gen::().checked_rem(max_truncate).unwrap_or(0); + let truncate = uusize + .sample(&mut rng) + .checked_rem(max_truncate) + .unwrap_or(0); let unaligned = UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate); diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 97be7e006dec..0d694d13ec75 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -164,7 +164,7 @@ mod tests { use super::*; use crate::bit_util::{get_bit, set_bit, unset_bit}; use rand::prelude::StdRng; - use rand::{Fill, Rng, SeedableRng}; + use rand::{Rng, SeedableRng, TryRngCore}; use std::fmt::Display; #[test] @@ -322,20 +322,20 @@ mod tests { // -------------------+-----------------+------- // length of data to copy - let len = rng.gen_range(0..=200); + let len = rng.random_range(0..=200); // randomly pick where we will write to - let offset_write_bits = rng.gen_range(0..=200); + let offset_write_bits = rng.random_range(0..=200); let offset_write_bytes = if offset_write_bits % 8 == 0 { offset_write_bits / 8 } else { (offset_write_bits / 8) + 1 }; - let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0 shows up often + let extra_write_data_bytes = rng.random_range(0..=5); // ensure 0 shows up often // randomly decide where we will read from - let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0 shows up often - let offset_read_bits = rng.gen_range(0..=200); + let extra_read_data_bytes = rng.random_range(0..=5); // make sure 0 shows up often + let offset_read_bits = rng.random_range(0..=200); let offset_read_bytes = if offset_read_bits % 8 != 0 { (offset_read_bits / 8) + 1 } else { @@ -356,7 +356,7 @@ mod tests { self.data .resize(offset_read_bytes + len + extra_read_data_bytes, 0); // fill source data with random bytes - self.data.try_fill(rng).unwrap(); + rng.try_fill_bytes(self.data.as_mut_slice()).unwrap(); self.offset_read = offset_read_bits; self.len = len; diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index ed5d363d607f..f39cb69c314d 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -153,7 +153,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if b { set_bit(&mut buf[..], i) @@ -197,7 +197,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if b { unsafe { @@ -221,7 +221,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if !b { unsafe { @@ -247,7 +247,7 @@ mod tests { let mut v = HashSet::new(); let mut rng = seedable_rng(); for _ in 0..NUM_SETS { - let offset = rng.gen_range(0..8 * NUM_BYTES); + let offset = rng.random_range(0..8 * NUM_BYTES); v.insert(offset); set_bit(&mut buffer[..], offset); } diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 4046f5226094..22e446ce313b 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -58,7 +58,7 @@ ryu = "1.0.16" [dev-dependencies] criterion = { version = "0.5", default-features = false } half = { version = "2.1", default-features = false } -rand = "0.8" +rand = "0.9" [build-dependencies] diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs index 534b21878c56..e7bb84ebe24c 100644 --- a/arrow-cast/src/base64.rs +++ b/arrow-cast/src/base64.rs @@ -90,7 +90,7 @@ pub fn b64_decode( mod tests { use super::*; use arrow_array::BinaryArray; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; fn test_engine(e: &E, a: &BinaryArray) { let encoded = b64_encode(e, a); @@ -105,12 +105,12 @@ mod tests { #[test] fn test_b64() { - let mut rng = thread_rng(); - let len = rng.gen_range(1024..1050); + let mut rng = rng(); + let len = rng.random_range(1024..1050); let data: BinaryArray = (0..len) .map(|_| { - let len = rng.gen_range(0..16); - Some((0..len).map(|_| rng.gen()).collect::>()) + let len = rng.random_range(0..16); + Some((0..len).map(|_| rng.random()).collect::>()) }) .collect(); diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 55834ad92a01..28d36db89af0 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -463,20 +463,11 @@ impl Parser for Float64Type { } } -/// This API is only stable since 1.70 so can't use it when current MSRV is lower -#[inline(always)] -fn is_some_and(opt: Option, f: impl FnOnce(T) -> bool) -> bool { - match opt { - None => false, - Some(x) => f(x), - } -} - macro_rules! parser_primitive { ($t:ty) => { impl Parser for $t { fn parse(string: &str) -> Option { - if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) { + if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) { return None; } match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked( diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml index fbb295036a9b..78245b9040e1 100644 --- a/arrow-flight/Cargo.toml +++ b/arrow-flight/Cargo.toml @@ -20,7 +20,7 @@ name = "arrow-flight" description = "Apache Arrow Flight" version = { workspace = true } edition = { workspace = true } -rust-version = "1.71.1" +rust-version = { workspace = true } authors = { workspace = true } homepage = { workspace = true } repository = { workspace = true } @@ -43,12 +43,12 @@ base64 = { version = "0.22", default-features = false, features = ["std"] } bytes = { version = "1", default-features = false } futures = { version = "0.3", default-features = false, features = ["alloc"] } once_cell = { version = "1", optional = true } -paste = { version = "1.0" , optional = true } -prost = { version = "0.13.1", default-features = false, features = ["prost-derive"] } +paste = { version = "1.0" } +prost = { version = "0.12.3", default-features = false, features = ["prost-derive"] } # For Timestamp type -prost-types = { version = "0.13.1", default-features = false } -tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"], optional = true } -tonic = { version = "0.12.3", default-features = false, features = ["transport", "codegen", "prost"] } +prost-types = { version = "0.12.3", default-features = false } +tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] } +tonic = { version = "0.11.0", default-features = false, features = ["transport", "codegen", "prost"] } # CLI-related dependencies anyhow = { version = "1.0", optional = true } @@ -61,7 +61,7 @@ all-features = true [features] default = [] -flight-sql-experimental = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"] +flight-sql-experimental = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell"] tls = ["tonic/tls"] # Enable CLI tools cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber"] @@ -69,9 +69,8 @@ cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-root [dev-dependencies] arrow-cast = { workspace = true, features = ["prettyprint"] } assert_cmd = "2.0.8" -http = "1.1.0" -http-body = "1.0.0" -hyper-util = "0.1" +http = "0.2.9" +http-body = "0.4.5" pin-project-lite = "0.2" tempfile = "3.3" tracing-log = { version = "0.2" } diff --git a/arrow-flight/examples/flight_sql_server.rs b/arrow-flight/examples/flight_sql_server.rs index 657298b4a8b3..7367886f61bc 100644 --- a/arrow-flight/examples/flight_sql_server.rs +++ b/arrow-flight/examples/flight_sql_server.rs @@ -792,8 +792,7 @@ impl ProstMessageExt for FetchResults { #[cfg(test)] mod tests { use super::*; - use futures::{TryFutureExt, TryStreamExt}; - use hyper_util::rt::TokioIo; + use futures::TryStreamExt; use std::fs; use std::future::Future; use std::net::SocketAddr; @@ -854,8 +853,7 @@ mod tests { .serve_with_incoming(stream); let request_future = async { - let connector = - service_fn(move |_| UnixStream::connect(path.clone()).map_ok(TokioIo::new)); + let connector = service_fn(move |_| UnixStream::connect(path.clone())); let channel = Endpoint::try_from("http://example.com") .unwrap() .connect_with_connector(connector) diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml index 77b0a190b6d3..7264a527ca8d 100644 --- a/arrow-flight/gen/Cargo.toml +++ b/arrow-flight/gen/Cargo.toml @@ -20,7 +20,7 @@ name = "gen" description = "Code generation for arrow-flight" version = "0.1.0" edition = { workspace = true } -rust-version = "1.71.1" +rust-version = { workspace = true } authors = { workspace = true } homepage = { workspace = true } repository = { workspace = true } @@ -32,5 +32,6 @@ publish = false [dependencies] # Pin specific version of the tonic-build dependencies to avoid auto-generated # (and checked in) arrow.flight.protocol.rs from changing -prost-build = { version = "=0.13.5", default-features = false } -tonic-build = { version = "=0.12.3", default-features = false, features = ["transport", "prost"] } +proc-macro2 = { version = "=1.0.86", default-features = false } +prost-build = { version = "=0.12.6", default-features = false } +tonic-build = { version = "=0.11.0", default-features = false, features = ["transport", "prost"] } diff --git a/arrow-flight/gen/src/main.rs b/arrow-flight/gen/src/main.rs index a69134e7acbe..e8019b36607c 100644 --- a/arrow-flight/gen/src/main.rs +++ b/arrow-flight/gen/src/main.rs @@ -29,7 +29,7 @@ fn main() -> Result<(), Box> { // protoc in Ubuntu builder needs this option .protoc_arg("--experimental_allow_proto3_optional") .out_dir("src") - .compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?; + .compile_with_config(prost_config(), &[proto_path], &[proto_dir])?; // read file contents to string let mut file = OpenOptions::new() @@ -52,7 +52,7 @@ fn main() -> Result<(), Box> { // protoc in Ubuntu builder needs this option .protoc_arg("--experimental_allow_proto3_optional") .out_dir("src/sql") - .compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?; + .compile_with_config(prost_config(), &[proto_path], &[proto_dir])?; // read file contents to string let mut file = OpenOptions::new() diff --git a/arrow-flight/src/arrow.flight.protocol.rs b/arrow-flight/src/arrow.flight.protocol.rs index 0cd4f6948b77..bc314de9d19f 100644 --- a/arrow-flight/src/arrow.flight.protocol.rs +++ b/arrow-flight/src/arrow.flight.protocol.rs @@ -3,6 +3,7 @@ // This file is @generated by prost-build. /// /// The request that a client provides to a server on handshake. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct HandshakeRequest { /// @@ -14,6 +15,7 @@ pub struct HandshakeRequest { #[prost(bytes = "bytes", tag = "2")] pub payload: ::prost::bytes::Bytes, } +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct HandshakeResponse { /// @@ -27,6 +29,7 @@ pub struct HandshakeResponse { } /// /// A message for doing simple auth. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct BasicAuth { #[prost(string, tag = "2")] @@ -34,11 +37,13 @@ pub struct BasicAuth { #[prost(string, tag = "3")] pub password: ::prost::alloc::string::String, } -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Empty {} /// /// Describes an available action, including both the name used for execution /// along with a short description of the purpose of the action. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionType { #[prost(string, tag = "1")] @@ -49,6 +54,7 @@ pub struct ActionType { /// /// A service specific expression that can be used to return a limited set /// of available Arrow Flight streams. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Criteria { #[prost(bytes = "bytes", tag = "1")] @@ -56,6 +62,7 @@ pub struct Criteria { } /// /// An opaque action specific for the service. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Action { #[prost(string, tag = "1")] @@ -67,6 +74,7 @@ pub struct Action { /// The request of the CancelFlightInfo action. /// /// The request should be stored in Action.body. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CancelFlightInfoRequest { #[prost(message, optional, tag = "1")] @@ -76,6 +84,7 @@ pub struct CancelFlightInfoRequest { /// The request of the RenewFlightEndpoint action. /// /// The request should be stored in Action.body. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct RenewFlightEndpointRequest { #[prost(message, optional, tag = "1")] @@ -83,6 +92,7 @@ pub struct RenewFlightEndpointRequest { } /// /// An opaque result returned after executing an action. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Result { #[prost(bytes = "bytes", tag = "1")] @@ -92,13 +102,15 @@ pub struct Result { /// The result of the CancelFlightInfo action. /// /// The result should be stored in Result.body. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct CancelFlightInfoResult { #[prost(enumeration = "CancelStatus", tag = "1")] pub status: i32, } /// /// Wrap the result of a getSchema call +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SchemaResult { /// The schema of the dataset in its IPC form: @@ -111,6 +123,7 @@ pub struct SchemaResult { /// /// The name or tag for a Flight. May be used as a way to retrieve or generate /// a flight or be used to expose a set of previously defined flights. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FlightDescriptor { #[prost(enumeration = "flight_descriptor::DescriptorType", tag = "1")] @@ -161,9 +174,9 @@ pub mod flight_descriptor { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unknown => "UNKNOWN", - Self::Path => "PATH", - Self::Cmd => "CMD", + DescriptorType::Unknown => "UNKNOWN", + DescriptorType::Path => "PATH", + DescriptorType::Cmd => "CMD", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -180,6 +193,7 @@ pub mod flight_descriptor { /// /// The access coordinates for retrieval of a dataset. With a FlightInfo, a /// consumer is able to determine how to retrieve a dataset. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FlightInfo { /// The schema of the dataset in its IPC form: @@ -239,6 +253,7 @@ pub struct FlightInfo { } /// /// The information to process a long-running query. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PollInfo { /// @@ -279,6 +294,7 @@ pub struct PollInfo { } /// /// A particular stream or split associated with a flight. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FlightEndpoint { /// @@ -322,6 +338,7 @@ pub struct FlightEndpoint { /// /// A location where a Flight service will accept retrieval of a particular /// stream given a ticket. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Location { #[prost(string, tag = "1")] @@ -333,6 +350,7 @@ pub struct Location { /// /// Tickets are meant to be single use. It is an error/application-defined /// behavior to reuse a ticket. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Ticket { #[prost(bytes = "bytes", tag = "1")] @@ -340,6 +358,7 @@ pub struct Ticket { } /// /// A batch of Arrow data as part of a stream of batches. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FlightData { /// @@ -365,6 +384,7 @@ pub struct FlightData { } /// * /// The response message associated with the submission of a DoPut. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PutResult { #[prost(bytes = "bytes", tag = "1")] @@ -398,10 +418,10 @@ impl CancelStatus { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "CANCEL_STATUS_UNSPECIFIED", - Self::Cancelled => "CANCEL_STATUS_CANCELLED", - Self::Cancelling => "CANCEL_STATUS_CANCELLING", - Self::NotCancellable => "CANCEL_STATUS_NOT_CANCELLABLE", + CancelStatus::Unspecified => "CANCEL_STATUS_UNSPECIFIED", + CancelStatus::Cancelled => "CANCEL_STATUS_CANCELLED", + CancelStatus::Cancelling => "CANCEL_STATUS_CANCELLING", + CancelStatus::NotCancellable => "CANCEL_STATUS_NOT_CANCELLABLE", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -417,13 +437,7 @@ impl CancelStatus { } /// Generated client implementations. pub mod flight_service_client { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] use tonic::codegen::*; use tonic::codegen::http::Uri; /// @@ -450,8 +464,8 @@ pub mod flight_service_client { where T: tonic::client::GrpcService, T::Error: Into, - T::ResponseBody: Body + std::marker::Send + 'static, - ::Error: Into + std::marker::Send, + T::ResponseBody: Body + Send + 'static, + ::Error: Into + Send, { pub fn new(inner: T) -> Self { let inner = tonic::client::Grpc::new(inner); @@ -476,7 +490,7 @@ pub mod flight_service_client { >, , - >>::Error: Into + std::marker::Send + std::marker::Sync, + >>::Error: Into + Send + Sync, { FlightServiceClient::new(InterceptedService::new(inner, interceptor)) } @@ -527,7 +541,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -560,7 +575,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -594,7 +610,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -643,7 +660,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -674,7 +692,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -705,7 +724,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -736,7 +756,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -766,7 +787,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -799,7 +821,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -829,7 +852,8 @@ pub mod flight_service_client { .ready() .await .map_err(|e| { - tonic::Status::unknown( + tonic::Status::new( + tonic::Code::Unknown, format!("Service was not ready: {}", e.into()), ) })?; @@ -848,22 +872,16 @@ pub mod flight_service_client { } /// Generated server implementations. pub mod flight_service_server { - #![allow( - unused_variables, - dead_code, - missing_docs, - clippy::wildcard_imports, - clippy::let_unit_value, - )] + #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)] use tonic::codegen::*; /// Generated trait containing gRPC methods that should be implemented for use with FlightServiceServer. #[async_trait] - pub trait FlightService: std::marker::Send + std::marker::Sync + 'static { + pub trait FlightService: Send + Sync + 'static { /// Server streaming response type for the Handshake method. type HandshakeStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Handshake between client and server. Depending on the server, the @@ -878,7 +896,7 @@ pub mod flight_service_server { type ListFlightsStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Get a list of available streams given a particular criteria. Most flight @@ -949,7 +967,7 @@ pub mod flight_service_server { type DoGetStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Retrieve a single stream associated with a particular descriptor @@ -964,7 +982,7 @@ pub mod flight_service_server { type DoPutStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Push a stream to the flight service associated with a particular @@ -981,7 +999,7 @@ pub mod flight_service_server { type DoExchangeStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Open a bidirectional data channel for a given descriptor. This @@ -997,7 +1015,7 @@ pub mod flight_service_server { type DoActionStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// Flight services can support an arbitrary number of simple actions in @@ -1014,7 +1032,7 @@ pub mod flight_service_server { type ListActionsStream: tonic::codegen::tokio_stream::Stream< Item = std::result::Result, > - + std::marker::Send + + Send + 'static; /// /// A flight service exposes all of the available action types that it has @@ -1034,18 +1052,20 @@ pub mod flight_service_server { /// accessed using the Arrow Flight Protocol. Additionally, a flight service /// can expose a set of actions that are available. #[derive(Debug)] - pub struct FlightServiceServer { - inner: Arc, + pub struct FlightServiceServer { + inner: _Inner, accept_compression_encodings: EnabledCompressionEncodings, send_compression_encodings: EnabledCompressionEncodings, max_decoding_message_size: Option, max_encoding_message_size: Option, } - impl FlightServiceServer { + struct _Inner(Arc); + impl FlightServiceServer { pub fn new(inner: T) -> Self { Self::from_arc(Arc::new(inner)) } pub fn from_arc(inner: Arc) -> Self { + let inner = _Inner(inner); Self { inner, accept_compression_encodings: Default::default(), @@ -1095,8 +1115,8 @@ pub mod flight_service_server { impl tonic::codegen::Service> for FlightServiceServer where T: FlightService, - B: Body + std::marker::Send + 'static, - B::Error: Into + std::marker::Send + 'static, + B: Body + Send + 'static, + B::Error: Into + Send + 'static, { type Response = http::Response; type Error = std::convert::Infallible; @@ -1108,6 +1128,7 @@ pub mod flight_service_server { Poll::Ready(Ok(())) } fn call(&mut self, req: http::Request) -> Self::Future { + let inner = self.inner.clone(); match req.uri().path() { "/arrow.flight.protocol.FlightService/Handshake" => { #[allow(non_camel_case_types)] @@ -1141,6 +1162,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = HandshakeSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1187,6 +1209,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = ListFlightsSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1232,6 +1255,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = GetFlightInfoSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1278,6 +1302,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = PollFlightInfoSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1323,6 +1348,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = GetSchemaSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1369,6 +1395,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = DoGetSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1415,6 +1442,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = DoPutSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1461,6 +1489,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = DoExchangeSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1507,6 +1536,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = DoActionSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1553,6 +1583,7 @@ pub mod flight_service_server { let max_encoding_message_size = self.max_encoding_message_size; let inner = self.inner.clone(); let fut = async move { + let inner = inner.0; let method = ListActionsSvc(inner); let codec = tonic::codec::ProstCodec::default(); let mut grpc = tonic::server::Grpc::new(codec) @@ -1571,25 +1602,20 @@ pub mod flight_service_server { } _ => { Box::pin(async move { - let mut response = http::Response::new(empty_body()); - let headers = response.headers_mut(); - headers - .insert( - tonic::Status::GRPC_STATUS, - (tonic::Code::Unimplemented as i32).into(), - ); - headers - .insert( - http::header::CONTENT_TYPE, - tonic::metadata::GRPC_CONTENT_TYPE, - ); - Ok(response) + Ok( + http::Response::builder() + .status(200) + .header("grpc-status", "12") + .header("content-type", "application/grpc") + .body(empty_body()) + .unwrap(), + ) }) } } } } - impl Clone for FlightServiceServer { + impl Clone for FlightServiceServer { fn clone(&self) -> Self { let inner = self.inner.clone(); Self { @@ -1601,9 +1627,17 @@ pub mod flight_service_server { } } } - /// Generated gRPC service name - pub const SERVICE_NAME: &str = "arrow.flight.protocol.FlightService"; - impl tonic::server::NamedService for FlightServiceServer { - const NAME: &'static str = SERVICE_NAME; + impl Clone for _Inner { + fn clone(&self) -> Self { + Self(Arc::clone(&self.0)) + } + } + impl std::fmt::Debug for _Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } + } + impl tonic::server::NamedService for FlightServiceServer { + const NAME: &'static str = "arrow.flight.protocol.FlightService"; } } diff --git a/arrow-flight/src/bin/flight_sql_client.rs b/arrow-flight/src/bin/flight_sql_client.rs index 7b9e34898ac8..8f0618f495bc 100644 --- a/arrow-flight/src/bin/flight_sql_client.rs +++ b/arrow-flight/src/bin/flight_sql_client.rs @@ -357,7 +357,7 @@ async fn setup_client(args: ClientArgs) -> Result), /// Some unexpected message was received ProtocolError(String), /// An error occurred during decoding @@ -74,7 +74,7 @@ impl Error for FlightError { impl From for FlightError { fn from(status: tonic::Status) -> Self { - Self::Tonic(status) + Self::Tonic(Box::new(status)) } } @@ -91,7 +91,7 @@ impl From for tonic::Status { match value { FlightError::Arrow(e) => tonic::Status::internal(e.to_string()), FlightError::NotYetImplemented(e) => tonic::Status::internal(e), - FlightError::Tonic(status) => status, + FlightError::Tonic(status) => *status, FlightError::ProtocolError(e) => tonic::Status::internal(e), FlightError::DecodeError(e) => tonic::Status::internal(e), FlightError::ExternalError(e) => tonic::Status::internal(e.to_string()), @@ -147,4 +147,10 @@ mod test { let source = root_error.downcast_ref::().unwrap(); assert!(matches!(source, FlightError::DecodeError(_))); } + + #[test] + fn test_error_size() { + // use Box in variants to keep this size down + assert_eq!(std::mem::size_of::(), 32); + } } diff --git a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs index 7a37a0b28856..822f095ed088 100644 --- a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs +++ b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs @@ -19,6 +19,7 @@ /// int32_to_int32_list_map: map> /// > /// where there is one row per requested piece of metadata information. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetSqlInfo { /// @@ -99,7 +100,8 @@ pub struct CommandGetSqlInfo { /// is only relevant to be used by ODBC). /// > /// The returned data should be ordered by data_type and then by type_name. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetXdbcTypeInfo { /// /// Specifies the data type to search for the info. @@ -118,7 +120,8 @@ pub struct CommandGetXdbcTypeInfo { /// catalog_name: utf8 not null /// > /// The returned data should be ordered by catalog_name. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetCatalogs {} /// /// Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend. @@ -133,6 +136,7 @@ pub struct CommandGetCatalogs {} /// db_schema_name: utf8 not null /// > /// The returned data should be ordered by catalog_name, then db_schema_name. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetDbSchemas { /// @@ -177,6 +181,7 @@ pub struct CommandGetDbSchemas { /// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. /// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. /// The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetTables { /// @@ -226,7 +231,8 @@ pub struct CommandGetTables { /// table_type: utf8 not null /// > /// The returned data should be ordered by table_type. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetTableTypes {} /// /// Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend. @@ -244,6 +250,7 @@ pub struct CommandGetTableTypes {} /// key_sequence: int32 not null /// > /// The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetPrimaryKeys { /// @@ -287,6 +294,7 @@ pub struct CommandGetPrimaryKeys { /// > /// The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence. /// update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetExportedKeys { /// @@ -334,6 +342,7 @@ pub struct CommandGetExportedKeys { /// - 2 = SET NULL /// - 3 = NO ACTION /// - 4 = SET DEFAULT +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetImportedKeys { /// @@ -383,6 +392,7 @@ pub struct CommandGetImportedKeys { /// - 2 = SET NULL /// - 3 = NO ACTION /// - 4 = SET DEFAULT +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandGetCrossReference { /// * @@ -420,6 +430,7 @@ pub struct CommandGetCrossReference { } /// /// Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionCreatePreparedStatementRequest { /// The valid SQL string to create a prepared statement for. @@ -432,6 +443,7 @@ pub struct ActionCreatePreparedStatementRequest { } /// /// An embedded message describing a Substrait plan to execute. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SubstraitPlan { /// The serialized substrait.Plan to create a prepared statement for. @@ -448,6 +460,7 @@ pub struct SubstraitPlan { } /// /// Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionCreatePreparedSubstraitPlanRequest { /// The serialized substrait.Plan to create a prepared statement for. @@ -466,6 +479,7 @@ pub struct ActionCreatePreparedSubstraitPlanRequest { /// - Automatically, by a server timeout. /// /// The result should be wrapped in a google.protobuf.Any message. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionCreatePreparedStatementResult { /// Opaque handle for the prepared statement on the server. @@ -486,6 +500,7 @@ pub struct ActionCreatePreparedStatementResult { /// /// Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend. /// Closes server resources associated with the prepared statement handle. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionClosePreparedStatementRequest { /// Opaque handle for the prepared statement on the server. @@ -495,7 +510,8 @@ pub struct ActionClosePreparedStatementRequest { /// /// Request message for the "BeginTransaction" action. /// Begins a transaction. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionBeginTransactionRequest {} /// /// Request message for the "BeginSavepoint" action. @@ -503,6 +519,7 @@ pub struct ActionBeginTransactionRequest {} /// /// Only supported if FLIGHT_SQL_TRANSACTION is /// FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionBeginSavepointRequest { /// The transaction to which a savepoint belongs. @@ -520,6 +537,7 @@ pub struct ActionBeginSavepointRequest { /// automatically rolled back. /// /// The result should be wrapped in a google.protobuf.Any message. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionBeginTransactionResult { /// Opaque handle for the transaction on the server. @@ -534,6 +552,7 @@ pub struct ActionBeginTransactionResult { /// out, then the savepoint is also invalidated. /// /// The result should be wrapped in a google.protobuf.Any message. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionBeginSavepointResult { /// Opaque handle for the savepoint on the server. @@ -547,6 +566,7 @@ pub struct ActionBeginSavepointResult { /// /// If the action completes successfully, the transaction handle is /// invalidated, as are all associated savepoints. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionEndTransactionRequest { /// Opaque handle for the transaction on the server. @@ -584,9 +604,9 @@ pub mod action_end_transaction_request { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "END_TRANSACTION_UNSPECIFIED", - Self::Commit => "END_TRANSACTION_COMMIT", - Self::Rollback => "END_TRANSACTION_ROLLBACK", + EndTransaction::Unspecified => "END_TRANSACTION_UNSPECIFIED", + EndTransaction::Commit => "END_TRANSACTION_COMMIT", + EndTransaction::Rollback => "END_TRANSACTION_ROLLBACK", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -609,6 +629,7 @@ pub mod action_end_transaction_request { /// Releasing a savepoint invalidates that savepoint. Rolling back to /// a savepoint does not invalidate the savepoint, but invalidates all /// savepoints created after the current savepoint. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionEndSavepointRequest { /// Opaque handle for the savepoint on the server. @@ -646,9 +667,9 @@ pub mod action_end_savepoint_request { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "END_SAVEPOINT_UNSPECIFIED", - Self::Release => "END_SAVEPOINT_RELEASE", - Self::Rollback => "END_SAVEPOINT_ROLLBACK", + EndSavepoint::Unspecified => "END_SAVEPOINT_UNSPECIFIED", + EndSavepoint::Release => "END_SAVEPOINT_RELEASE", + EndSavepoint::Rollback => "END_SAVEPOINT_ROLLBACK", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -678,6 +699,7 @@ pub mod action_end_savepoint_request { /// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. /// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. /// - GetFlightInfo: execute the query. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandStatementQuery { /// The SQL syntax. @@ -704,6 +726,7 @@ pub struct CommandStatementQuery { /// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. /// - GetFlightInfo: execute the query. /// - DoPut: execute the query. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandStatementSubstraitPlan { /// A serialized substrait.Plan @@ -716,6 +739,7 @@ pub struct CommandStatementSubstraitPlan { /// * /// Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery. /// This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct TicketStatementQuery { /// Unique identifier for the instance of the statement to execute. @@ -742,6 +766,7 @@ pub struct TicketStatementQuery { /// for the parameters when determining the schema. /// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. /// - GetFlightInfo: execute the prepared statement instance. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandPreparedStatementQuery { /// Opaque handle for the prepared statement on the server. @@ -751,6 +776,7 @@ pub struct CommandPreparedStatementQuery { /// /// Represents a SQL update query. Used in the command member of FlightDescriptor /// for the RPC call DoPut to cause the server to execute the included SQL update. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandStatementUpdate { /// The SQL syntax. @@ -764,6 +790,7 @@ pub struct CommandStatementUpdate { /// Represents a SQL update query. Used in the command member of FlightDescriptor /// for the RPC call DoPut to cause the server to execute the included /// prepared statement handle as an update. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandPreparedStatementUpdate { /// Opaque handle for the prepared statement on the server. @@ -774,6 +801,7 @@ pub struct CommandPreparedStatementUpdate { /// Represents a bulk ingestion request. Used in the command member of FlightDescriptor /// for the the RPC call DoPut to cause the server load the contents of the stream's /// FlightData into the target destination. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct CommandStatementIngest { /// The behavior for handling the table definition. @@ -810,7 +838,8 @@ pub struct CommandStatementIngest { /// Nested message and enum types in `CommandStatementIngest`. pub mod command_statement_ingest { /// Options for table definition behavior - #[derive(Clone, Copy, PartialEq, ::prost::Message)] + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Message)] pub struct TableDefinitionOptions { #[prost( enumeration = "table_definition_options::TableNotExistOption", @@ -850,9 +879,11 @@ pub mod command_statement_ingest { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "TABLE_NOT_EXIST_OPTION_UNSPECIFIED", - Self::Create => "TABLE_NOT_EXIST_OPTION_CREATE", - Self::Fail => "TABLE_NOT_EXIST_OPTION_FAIL", + TableNotExistOption::Unspecified => { + "TABLE_NOT_EXIST_OPTION_UNSPECIFIED" + } + TableNotExistOption::Create => "TABLE_NOT_EXIST_OPTION_CREATE", + TableNotExistOption::Fail => "TABLE_NOT_EXIST_OPTION_FAIL", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -895,10 +926,10 @@ pub mod command_statement_ingest { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "TABLE_EXISTS_OPTION_UNSPECIFIED", - Self::Fail => "TABLE_EXISTS_OPTION_FAIL", - Self::Append => "TABLE_EXISTS_OPTION_APPEND", - Self::Replace => "TABLE_EXISTS_OPTION_REPLACE", + TableExistsOption::Unspecified => "TABLE_EXISTS_OPTION_UNSPECIFIED", + TableExistsOption::Fail => "TABLE_EXISTS_OPTION_FAIL", + TableExistsOption::Append => "TABLE_EXISTS_OPTION_APPEND", + TableExistsOption::Replace => "TABLE_EXISTS_OPTION_REPLACE", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -918,7 +949,8 @@ pub mod command_statement_ingest { /// Returned from the RPC call DoPut when a CommandStatementUpdate, /// CommandPreparedStatementUpdate, or CommandStatementIngest was /// in the request, containing results from the update. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct DoPutUpdateResult { /// The number of records updated. A return value of -1 represents /// an unknown updated record count. @@ -930,6 +962,7 @@ pub struct DoPutUpdateResult { /// *Note on legacy behavior*: previous versions of the protocol did not return any result for /// this command, and that behavior should still be supported by clients. In that case, the client /// can continue as though the fields in this message were not provided or set to sensible default values. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct DoPutPreparedStatementResult { /// Represents a (potentially updated) opaque handle for the prepared statement on the server. @@ -959,6 +992,7 @@ pub struct DoPutPreparedStatementResult { /// /// This command is deprecated since 13.0.0. Use the "CancelFlightInfo" /// action with DoAction instead. +#[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionCancelQueryRequest { /// The result of the GetFlightInfo RPC that initiated the query. @@ -975,7 +1009,8 @@ pub struct ActionCancelQueryRequest { /// /// This command is deprecated since 13.0.0. Use the "CancelFlightInfo" /// action with DoAction instead. -#[derive(Clone, Copy, PartialEq, ::prost::Message)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct ActionCancelQueryResult { #[prost(enumeration = "action_cancel_query_result::CancelResult", tag = "1")] pub result: i32, @@ -1016,10 +1051,10 @@ pub mod action_cancel_query_result { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Unspecified => "CANCEL_RESULT_UNSPECIFIED", - Self::Cancelled => "CANCEL_RESULT_CANCELLED", - Self::Cancelling => "CANCEL_RESULT_CANCELLING", - Self::NotCancellable => "CANCEL_RESULT_NOT_CANCELLABLE", + CancelResult::Unspecified => "CANCEL_RESULT_UNSPECIFIED", + CancelResult::Cancelled => "CANCEL_RESULT_CANCELLED", + CancelResult::Cancelling => "CANCEL_RESULT_CANCELLING", + CancelResult::NotCancellable => "CANCEL_RESULT_NOT_CANCELLABLE", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1654,131 +1689,135 @@ impl SqlInfo { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::FlightSqlServerName => "FLIGHT_SQL_SERVER_NAME", - Self::FlightSqlServerVersion => "FLIGHT_SQL_SERVER_VERSION", - Self::FlightSqlServerArrowVersion => "FLIGHT_SQL_SERVER_ARROW_VERSION", - Self::FlightSqlServerReadOnly => "FLIGHT_SQL_SERVER_READ_ONLY", - Self::FlightSqlServerSql => "FLIGHT_SQL_SERVER_SQL", - Self::FlightSqlServerSubstrait => "FLIGHT_SQL_SERVER_SUBSTRAIT", - Self::FlightSqlServerSubstraitMinVersion => { + SqlInfo::FlightSqlServerName => "FLIGHT_SQL_SERVER_NAME", + SqlInfo::FlightSqlServerVersion => "FLIGHT_SQL_SERVER_VERSION", + SqlInfo::FlightSqlServerArrowVersion => "FLIGHT_SQL_SERVER_ARROW_VERSION", + SqlInfo::FlightSqlServerReadOnly => "FLIGHT_SQL_SERVER_READ_ONLY", + SqlInfo::FlightSqlServerSql => "FLIGHT_SQL_SERVER_SQL", + SqlInfo::FlightSqlServerSubstrait => "FLIGHT_SQL_SERVER_SUBSTRAIT", + SqlInfo::FlightSqlServerSubstraitMinVersion => { "FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION" } - Self::FlightSqlServerSubstraitMaxVersion => { + SqlInfo::FlightSqlServerSubstraitMaxVersion => { "FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION" } - Self::FlightSqlServerTransaction => "FLIGHT_SQL_SERVER_TRANSACTION", - Self::FlightSqlServerCancel => "FLIGHT_SQL_SERVER_CANCEL", - Self::FlightSqlServerBulkIngestion => "FLIGHT_SQL_SERVER_BULK_INGESTION", - Self::FlightSqlServerIngestTransactionsSupported => { + SqlInfo::FlightSqlServerTransaction => "FLIGHT_SQL_SERVER_TRANSACTION", + SqlInfo::FlightSqlServerCancel => "FLIGHT_SQL_SERVER_CANCEL", + SqlInfo::FlightSqlServerBulkIngestion => "FLIGHT_SQL_SERVER_BULK_INGESTION", + SqlInfo::FlightSqlServerIngestTransactionsSupported => { "FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED" } - Self::FlightSqlServerStatementTimeout => { + SqlInfo::FlightSqlServerStatementTimeout => { "FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT" } - Self::FlightSqlServerTransactionTimeout => { + SqlInfo::FlightSqlServerTransactionTimeout => { "FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT" } - Self::SqlDdlCatalog => "SQL_DDL_CATALOG", - Self::SqlDdlSchema => "SQL_DDL_SCHEMA", - Self::SqlDdlTable => "SQL_DDL_TABLE", - Self::SqlIdentifierCase => "SQL_IDENTIFIER_CASE", - Self::SqlIdentifierQuoteChar => "SQL_IDENTIFIER_QUOTE_CHAR", - Self::SqlQuotedIdentifierCase => "SQL_QUOTED_IDENTIFIER_CASE", - Self::SqlAllTablesAreSelectable => "SQL_ALL_TABLES_ARE_SELECTABLE", - Self::SqlNullOrdering => "SQL_NULL_ORDERING", - Self::SqlKeywords => "SQL_KEYWORDS", - Self::SqlNumericFunctions => "SQL_NUMERIC_FUNCTIONS", - Self::SqlStringFunctions => "SQL_STRING_FUNCTIONS", - Self::SqlSystemFunctions => "SQL_SYSTEM_FUNCTIONS", - Self::SqlDatetimeFunctions => "SQL_DATETIME_FUNCTIONS", - Self::SqlSearchStringEscape => "SQL_SEARCH_STRING_ESCAPE", - Self::SqlExtraNameCharacters => "SQL_EXTRA_NAME_CHARACTERS", - Self::SqlSupportsColumnAliasing => "SQL_SUPPORTS_COLUMN_ALIASING", - Self::SqlNullPlusNullIsNull => "SQL_NULL_PLUS_NULL_IS_NULL", - Self::SqlSupportsConvert => "SQL_SUPPORTS_CONVERT", - Self::SqlSupportsTableCorrelationNames => { + SqlInfo::SqlDdlCatalog => "SQL_DDL_CATALOG", + SqlInfo::SqlDdlSchema => "SQL_DDL_SCHEMA", + SqlInfo::SqlDdlTable => "SQL_DDL_TABLE", + SqlInfo::SqlIdentifierCase => "SQL_IDENTIFIER_CASE", + SqlInfo::SqlIdentifierQuoteChar => "SQL_IDENTIFIER_QUOTE_CHAR", + SqlInfo::SqlQuotedIdentifierCase => "SQL_QUOTED_IDENTIFIER_CASE", + SqlInfo::SqlAllTablesAreSelectable => "SQL_ALL_TABLES_ARE_SELECTABLE", + SqlInfo::SqlNullOrdering => "SQL_NULL_ORDERING", + SqlInfo::SqlKeywords => "SQL_KEYWORDS", + SqlInfo::SqlNumericFunctions => "SQL_NUMERIC_FUNCTIONS", + SqlInfo::SqlStringFunctions => "SQL_STRING_FUNCTIONS", + SqlInfo::SqlSystemFunctions => "SQL_SYSTEM_FUNCTIONS", + SqlInfo::SqlDatetimeFunctions => "SQL_DATETIME_FUNCTIONS", + SqlInfo::SqlSearchStringEscape => "SQL_SEARCH_STRING_ESCAPE", + SqlInfo::SqlExtraNameCharacters => "SQL_EXTRA_NAME_CHARACTERS", + SqlInfo::SqlSupportsColumnAliasing => "SQL_SUPPORTS_COLUMN_ALIASING", + SqlInfo::SqlNullPlusNullIsNull => "SQL_NULL_PLUS_NULL_IS_NULL", + SqlInfo::SqlSupportsConvert => "SQL_SUPPORTS_CONVERT", + SqlInfo::SqlSupportsTableCorrelationNames => { "SQL_SUPPORTS_TABLE_CORRELATION_NAMES" } - Self::SqlSupportsDifferentTableCorrelationNames => { + SqlInfo::SqlSupportsDifferentTableCorrelationNames => { "SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES" } - Self::SqlSupportsExpressionsInOrderBy => { + SqlInfo::SqlSupportsExpressionsInOrderBy => { "SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY" } - Self::SqlSupportsOrderByUnrelated => "SQL_SUPPORTS_ORDER_BY_UNRELATED", - Self::SqlSupportedGroupBy => "SQL_SUPPORTED_GROUP_BY", - Self::SqlSupportsLikeEscapeClause => "SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE", - Self::SqlSupportsNonNullableColumns => "SQL_SUPPORTS_NON_NULLABLE_COLUMNS", - Self::SqlSupportedGrammar => "SQL_SUPPORTED_GRAMMAR", - Self::SqlAnsi92SupportedLevel => "SQL_ANSI92_SUPPORTED_LEVEL", - Self::SqlSupportsIntegrityEnhancementFacility => { + SqlInfo::SqlSupportsOrderByUnrelated => "SQL_SUPPORTS_ORDER_BY_UNRELATED", + SqlInfo::SqlSupportedGroupBy => "SQL_SUPPORTED_GROUP_BY", + SqlInfo::SqlSupportsLikeEscapeClause => "SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE", + SqlInfo::SqlSupportsNonNullableColumns => "SQL_SUPPORTS_NON_NULLABLE_COLUMNS", + SqlInfo::SqlSupportedGrammar => "SQL_SUPPORTED_GRAMMAR", + SqlInfo::SqlAnsi92SupportedLevel => "SQL_ANSI92_SUPPORTED_LEVEL", + SqlInfo::SqlSupportsIntegrityEnhancementFacility => { "SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY" } - Self::SqlOuterJoinsSupportLevel => "SQL_OUTER_JOINS_SUPPORT_LEVEL", - Self::SqlSchemaTerm => "SQL_SCHEMA_TERM", - Self::SqlProcedureTerm => "SQL_PROCEDURE_TERM", - Self::SqlCatalogTerm => "SQL_CATALOG_TERM", - Self::SqlCatalogAtStart => "SQL_CATALOG_AT_START", - Self::SqlSchemasSupportedActions => "SQL_SCHEMAS_SUPPORTED_ACTIONS", - Self::SqlCatalogsSupportedActions => "SQL_CATALOGS_SUPPORTED_ACTIONS", - Self::SqlSupportedPositionedCommands => "SQL_SUPPORTED_POSITIONED_COMMANDS", - Self::SqlSelectForUpdateSupported => "SQL_SELECT_FOR_UPDATE_SUPPORTED", - Self::SqlStoredProceduresSupported => "SQL_STORED_PROCEDURES_SUPPORTED", - Self::SqlSupportedSubqueries => "SQL_SUPPORTED_SUBQUERIES", - Self::SqlCorrelatedSubqueriesSupported => { + SqlInfo::SqlOuterJoinsSupportLevel => "SQL_OUTER_JOINS_SUPPORT_LEVEL", + SqlInfo::SqlSchemaTerm => "SQL_SCHEMA_TERM", + SqlInfo::SqlProcedureTerm => "SQL_PROCEDURE_TERM", + SqlInfo::SqlCatalogTerm => "SQL_CATALOG_TERM", + SqlInfo::SqlCatalogAtStart => "SQL_CATALOG_AT_START", + SqlInfo::SqlSchemasSupportedActions => "SQL_SCHEMAS_SUPPORTED_ACTIONS", + SqlInfo::SqlCatalogsSupportedActions => "SQL_CATALOGS_SUPPORTED_ACTIONS", + SqlInfo::SqlSupportedPositionedCommands => { + "SQL_SUPPORTED_POSITIONED_COMMANDS" + } + SqlInfo::SqlSelectForUpdateSupported => "SQL_SELECT_FOR_UPDATE_SUPPORTED", + SqlInfo::SqlStoredProceduresSupported => "SQL_STORED_PROCEDURES_SUPPORTED", + SqlInfo::SqlSupportedSubqueries => "SQL_SUPPORTED_SUBQUERIES", + SqlInfo::SqlCorrelatedSubqueriesSupported => { "SQL_CORRELATED_SUBQUERIES_SUPPORTED" } - Self::SqlSupportedUnions => "SQL_SUPPORTED_UNIONS", - Self::SqlMaxBinaryLiteralLength => "SQL_MAX_BINARY_LITERAL_LENGTH", - Self::SqlMaxCharLiteralLength => "SQL_MAX_CHAR_LITERAL_LENGTH", - Self::SqlMaxColumnNameLength => "SQL_MAX_COLUMN_NAME_LENGTH", - Self::SqlMaxColumnsInGroupBy => "SQL_MAX_COLUMNS_IN_GROUP_BY", - Self::SqlMaxColumnsInIndex => "SQL_MAX_COLUMNS_IN_INDEX", - Self::SqlMaxColumnsInOrderBy => "SQL_MAX_COLUMNS_IN_ORDER_BY", - Self::SqlMaxColumnsInSelect => "SQL_MAX_COLUMNS_IN_SELECT", - Self::SqlMaxColumnsInTable => "SQL_MAX_COLUMNS_IN_TABLE", - Self::SqlMaxConnections => "SQL_MAX_CONNECTIONS", - Self::SqlMaxCursorNameLength => "SQL_MAX_CURSOR_NAME_LENGTH", - Self::SqlMaxIndexLength => "SQL_MAX_INDEX_LENGTH", - Self::SqlDbSchemaNameLength => "SQL_DB_SCHEMA_NAME_LENGTH", - Self::SqlMaxProcedureNameLength => "SQL_MAX_PROCEDURE_NAME_LENGTH", - Self::SqlMaxCatalogNameLength => "SQL_MAX_CATALOG_NAME_LENGTH", - Self::SqlMaxRowSize => "SQL_MAX_ROW_SIZE", - Self::SqlMaxRowSizeIncludesBlobs => "SQL_MAX_ROW_SIZE_INCLUDES_BLOBS", - Self::SqlMaxStatementLength => "SQL_MAX_STATEMENT_LENGTH", - Self::SqlMaxStatements => "SQL_MAX_STATEMENTS", - Self::SqlMaxTableNameLength => "SQL_MAX_TABLE_NAME_LENGTH", - Self::SqlMaxTablesInSelect => "SQL_MAX_TABLES_IN_SELECT", - Self::SqlMaxUsernameLength => "SQL_MAX_USERNAME_LENGTH", - Self::SqlDefaultTransactionIsolation => "SQL_DEFAULT_TRANSACTION_ISOLATION", - Self::SqlTransactionsSupported => "SQL_TRANSACTIONS_SUPPORTED", - Self::SqlSupportedTransactionsIsolationLevels => { + SqlInfo::SqlSupportedUnions => "SQL_SUPPORTED_UNIONS", + SqlInfo::SqlMaxBinaryLiteralLength => "SQL_MAX_BINARY_LITERAL_LENGTH", + SqlInfo::SqlMaxCharLiteralLength => "SQL_MAX_CHAR_LITERAL_LENGTH", + SqlInfo::SqlMaxColumnNameLength => "SQL_MAX_COLUMN_NAME_LENGTH", + SqlInfo::SqlMaxColumnsInGroupBy => "SQL_MAX_COLUMNS_IN_GROUP_BY", + SqlInfo::SqlMaxColumnsInIndex => "SQL_MAX_COLUMNS_IN_INDEX", + SqlInfo::SqlMaxColumnsInOrderBy => "SQL_MAX_COLUMNS_IN_ORDER_BY", + SqlInfo::SqlMaxColumnsInSelect => "SQL_MAX_COLUMNS_IN_SELECT", + SqlInfo::SqlMaxColumnsInTable => "SQL_MAX_COLUMNS_IN_TABLE", + SqlInfo::SqlMaxConnections => "SQL_MAX_CONNECTIONS", + SqlInfo::SqlMaxCursorNameLength => "SQL_MAX_CURSOR_NAME_LENGTH", + SqlInfo::SqlMaxIndexLength => "SQL_MAX_INDEX_LENGTH", + SqlInfo::SqlDbSchemaNameLength => "SQL_DB_SCHEMA_NAME_LENGTH", + SqlInfo::SqlMaxProcedureNameLength => "SQL_MAX_PROCEDURE_NAME_LENGTH", + SqlInfo::SqlMaxCatalogNameLength => "SQL_MAX_CATALOG_NAME_LENGTH", + SqlInfo::SqlMaxRowSize => "SQL_MAX_ROW_SIZE", + SqlInfo::SqlMaxRowSizeIncludesBlobs => "SQL_MAX_ROW_SIZE_INCLUDES_BLOBS", + SqlInfo::SqlMaxStatementLength => "SQL_MAX_STATEMENT_LENGTH", + SqlInfo::SqlMaxStatements => "SQL_MAX_STATEMENTS", + SqlInfo::SqlMaxTableNameLength => "SQL_MAX_TABLE_NAME_LENGTH", + SqlInfo::SqlMaxTablesInSelect => "SQL_MAX_TABLES_IN_SELECT", + SqlInfo::SqlMaxUsernameLength => "SQL_MAX_USERNAME_LENGTH", + SqlInfo::SqlDefaultTransactionIsolation => { + "SQL_DEFAULT_TRANSACTION_ISOLATION" + } + SqlInfo::SqlTransactionsSupported => "SQL_TRANSACTIONS_SUPPORTED", + SqlInfo::SqlSupportedTransactionsIsolationLevels => { "SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS" } - Self::SqlDataDefinitionCausesTransactionCommit => { + SqlInfo::SqlDataDefinitionCausesTransactionCommit => { "SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT" } - Self::SqlDataDefinitionsInTransactionsIgnored => { + SqlInfo::SqlDataDefinitionsInTransactionsIgnored => { "SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED" } - Self::SqlSupportedResultSetTypes => "SQL_SUPPORTED_RESULT_SET_TYPES", - Self::SqlSupportedConcurrenciesForResultSetUnspecified => { + SqlInfo::SqlSupportedResultSetTypes => "SQL_SUPPORTED_RESULT_SET_TYPES", + SqlInfo::SqlSupportedConcurrenciesForResultSetUnspecified => { "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED" } - Self::SqlSupportedConcurrenciesForResultSetForwardOnly => { + SqlInfo::SqlSupportedConcurrenciesForResultSetForwardOnly => { "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY" } - Self::SqlSupportedConcurrenciesForResultSetScrollSensitive => { + SqlInfo::SqlSupportedConcurrenciesForResultSetScrollSensitive => { "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE" } - Self::SqlSupportedConcurrenciesForResultSetScrollInsensitive => { + SqlInfo::SqlSupportedConcurrenciesForResultSetScrollInsensitive => { "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE" } - Self::SqlBatchUpdatesSupported => "SQL_BATCH_UPDATES_SUPPORTED", - Self::SqlSavepointsSupported => "SQL_SAVEPOINTS_SUPPORTED", - Self::SqlNamedParametersSupported => "SQL_NAMED_PARAMETERS_SUPPORTED", - Self::SqlLocatorsUpdateCopy => "SQL_LOCATORS_UPDATE_COPY", - Self::SqlStoredFunctionsUsingCallSyntaxSupported => { + SqlInfo::SqlBatchUpdatesSupported => "SQL_BATCH_UPDATES_SUPPORTED", + SqlInfo::SqlSavepointsSupported => "SQL_SAVEPOINTS_SUPPORTED", + SqlInfo::SqlNamedParametersSupported => "SQL_NAMED_PARAMETERS_SUPPORTED", + SqlInfo::SqlLocatorsUpdateCopy => "SQL_LOCATORS_UPDATE_COPY", + SqlInfo::SqlStoredFunctionsUsingCallSyntaxSupported => { "SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED" } } @@ -1945,9 +1984,11 @@ impl SqlSupportedTransaction { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::None => "SQL_SUPPORTED_TRANSACTION_NONE", - Self::Transaction => "SQL_SUPPORTED_TRANSACTION_TRANSACTION", - Self::Savepoint => "SQL_SUPPORTED_TRANSACTION_SAVEPOINT", + SqlSupportedTransaction::None => "SQL_SUPPORTED_TRANSACTION_NONE", + SqlSupportedTransaction::Transaction => { + "SQL_SUPPORTED_TRANSACTION_TRANSACTION" + } + SqlSupportedTransaction::Savepoint => "SQL_SUPPORTED_TRANSACTION_SAVEPOINT", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1975,12 +2016,18 @@ impl SqlSupportedCaseSensitivity { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlCaseSensitivityUnknown => "SQL_CASE_SENSITIVITY_UNKNOWN", - Self::SqlCaseSensitivityCaseInsensitive => { + SqlSupportedCaseSensitivity::SqlCaseSensitivityUnknown => { + "SQL_CASE_SENSITIVITY_UNKNOWN" + } + SqlSupportedCaseSensitivity::SqlCaseSensitivityCaseInsensitive => { "SQL_CASE_SENSITIVITY_CASE_INSENSITIVE" } - Self::SqlCaseSensitivityUppercase => "SQL_CASE_SENSITIVITY_UPPERCASE", - Self::SqlCaseSensitivityLowercase => "SQL_CASE_SENSITIVITY_LOWERCASE", + SqlSupportedCaseSensitivity::SqlCaseSensitivityUppercase => { + "SQL_CASE_SENSITIVITY_UPPERCASE" + } + SqlSupportedCaseSensitivity::SqlCaseSensitivityLowercase => { + "SQL_CASE_SENSITIVITY_LOWERCASE" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2011,10 +2058,10 @@ impl SqlNullOrdering { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlNullsSortedHigh => "SQL_NULLS_SORTED_HIGH", - Self::SqlNullsSortedLow => "SQL_NULLS_SORTED_LOW", - Self::SqlNullsSortedAtStart => "SQL_NULLS_SORTED_AT_START", - Self::SqlNullsSortedAtEnd => "SQL_NULLS_SORTED_AT_END", + SqlNullOrdering::SqlNullsSortedHigh => "SQL_NULLS_SORTED_HIGH", + SqlNullOrdering::SqlNullsSortedLow => "SQL_NULLS_SORTED_LOW", + SqlNullOrdering::SqlNullsSortedAtStart => "SQL_NULLS_SORTED_AT_START", + SqlNullOrdering::SqlNullsSortedAtEnd => "SQL_NULLS_SORTED_AT_END", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2042,9 +2089,9 @@ impl SupportedSqlGrammar { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlMinimumGrammar => "SQL_MINIMUM_GRAMMAR", - Self::SqlCoreGrammar => "SQL_CORE_GRAMMAR", - Self::SqlExtendedGrammar => "SQL_EXTENDED_GRAMMAR", + SupportedSqlGrammar::SqlMinimumGrammar => "SQL_MINIMUM_GRAMMAR", + SupportedSqlGrammar::SqlCoreGrammar => "SQL_CORE_GRAMMAR", + SupportedSqlGrammar::SqlExtendedGrammar => "SQL_EXTENDED_GRAMMAR", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2071,9 +2118,11 @@ impl SupportedAnsi92SqlGrammarLevel { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Ansi92EntrySql => "ANSI92_ENTRY_SQL", - Self::Ansi92IntermediateSql => "ANSI92_INTERMEDIATE_SQL", - Self::Ansi92FullSql => "ANSI92_FULL_SQL", + SupportedAnsi92SqlGrammarLevel::Ansi92EntrySql => "ANSI92_ENTRY_SQL", + SupportedAnsi92SqlGrammarLevel::Ansi92IntermediateSql => { + "ANSI92_INTERMEDIATE_SQL" + } + SupportedAnsi92SqlGrammarLevel::Ansi92FullSql => "ANSI92_FULL_SQL", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2100,9 +2149,9 @@ impl SqlOuterJoinsSupportLevel { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlJoinsUnsupported => "SQL_JOINS_UNSUPPORTED", - Self::SqlLimitedOuterJoins => "SQL_LIMITED_OUTER_JOINS", - Self::SqlFullOuterJoins => "SQL_FULL_OUTER_JOINS", + SqlOuterJoinsSupportLevel::SqlJoinsUnsupported => "SQL_JOINS_UNSUPPORTED", + SqlOuterJoinsSupportLevel::SqlLimitedOuterJoins => "SQL_LIMITED_OUTER_JOINS", + SqlOuterJoinsSupportLevel::SqlFullOuterJoins => "SQL_FULL_OUTER_JOINS", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2128,8 +2177,8 @@ impl SqlSupportedGroupBy { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlGroupByUnrelated => "SQL_GROUP_BY_UNRELATED", - Self::SqlGroupByBeyondSelect => "SQL_GROUP_BY_BEYOND_SELECT", + SqlSupportedGroupBy::SqlGroupByUnrelated => "SQL_GROUP_BY_UNRELATED", + SqlSupportedGroupBy::SqlGroupByBeyondSelect => "SQL_GROUP_BY_BEYOND_SELECT", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2155,9 +2204,13 @@ impl SqlSupportedElementActions { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlElementInProcedureCalls => "SQL_ELEMENT_IN_PROCEDURE_CALLS", - Self::SqlElementInIndexDefinitions => "SQL_ELEMENT_IN_INDEX_DEFINITIONS", - Self::SqlElementInPrivilegeDefinitions => { + SqlSupportedElementActions::SqlElementInProcedureCalls => { + "SQL_ELEMENT_IN_PROCEDURE_CALLS" + } + SqlSupportedElementActions::SqlElementInIndexDefinitions => { + "SQL_ELEMENT_IN_INDEX_DEFINITIONS" + } + SqlSupportedElementActions::SqlElementInPrivilegeDefinitions => { "SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS" } } @@ -2189,8 +2242,12 @@ impl SqlSupportedPositionedCommands { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlPositionedDelete => "SQL_POSITIONED_DELETE", - Self::SqlPositionedUpdate => "SQL_POSITIONED_UPDATE", + SqlSupportedPositionedCommands::SqlPositionedDelete => { + "SQL_POSITIONED_DELETE" + } + SqlSupportedPositionedCommands::SqlPositionedUpdate => { + "SQL_POSITIONED_UPDATE" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2217,10 +2274,14 @@ impl SqlSupportedSubqueries { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlSubqueriesInComparisons => "SQL_SUBQUERIES_IN_COMPARISONS", - Self::SqlSubqueriesInExists => "SQL_SUBQUERIES_IN_EXISTS", - Self::SqlSubqueriesInIns => "SQL_SUBQUERIES_IN_INS", - Self::SqlSubqueriesInQuantifieds => "SQL_SUBQUERIES_IN_QUANTIFIEDS", + SqlSupportedSubqueries::SqlSubqueriesInComparisons => { + "SQL_SUBQUERIES_IN_COMPARISONS" + } + SqlSupportedSubqueries::SqlSubqueriesInExists => "SQL_SUBQUERIES_IN_EXISTS", + SqlSupportedSubqueries::SqlSubqueriesInIns => "SQL_SUBQUERIES_IN_INS", + SqlSupportedSubqueries::SqlSubqueriesInQuantifieds => { + "SQL_SUBQUERIES_IN_QUANTIFIEDS" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2247,8 +2308,8 @@ impl SqlSupportedUnions { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlUnion => "SQL_UNION", - Self::SqlUnionAll => "SQL_UNION_ALL", + SqlSupportedUnions::SqlUnion => "SQL_UNION", + SqlSupportedUnions::SqlUnionAll => "SQL_UNION_ALL", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2276,11 +2337,19 @@ impl SqlTransactionIsolationLevel { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlTransactionNone => "SQL_TRANSACTION_NONE", - Self::SqlTransactionReadUncommitted => "SQL_TRANSACTION_READ_UNCOMMITTED", - Self::SqlTransactionReadCommitted => "SQL_TRANSACTION_READ_COMMITTED", - Self::SqlTransactionRepeatableRead => "SQL_TRANSACTION_REPEATABLE_READ", - Self::SqlTransactionSerializable => "SQL_TRANSACTION_SERIALIZABLE", + SqlTransactionIsolationLevel::SqlTransactionNone => "SQL_TRANSACTION_NONE", + SqlTransactionIsolationLevel::SqlTransactionReadUncommitted => { + "SQL_TRANSACTION_READ_UNCOMMITTED" + } + SqlTransactionIsolationLevel::SqlTransactionReadCommitted => { + "SQL_TRANSACTION_READ_COMMITTED" + } + SqlTransactionIsolationLevel::SqlTransactionRepeatableRead => { + "SQL_TRANSACTION_REPEATABLE_READ" + } + SqlTransactionIsolationLevel::SqlTransactionSerializable => { + "SQL_TRANSACTION_SERIALIZABLE" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2311,9 +2380,15 @@ impl SqlSupportedTransactions { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlTransactionUnspecified => "SQL_TRANSACTION_UNSPECIFIED", - Self::SqlDataDefinitionTransactions => "SQL_DATA_DEFINITION_TRANSACTIONS", - Self::SqlDataManipulationTransactions => "SQL_DATA_MANIPULATION_TRANSACTIONS", + SqlSupportedTransactions::SqlTransactionUnspecified => { + "SQL_TRANSACTION_UNSPECIFIED" + } + SqlSupportedTransactions::SqlDataDefinitionTransactions => { + "SQL_DATA_DEFINITION_TRANSACTIONS" + } + SqlSupportedTransactions::SqlDataManipulationTransactions => { + "SQL_DATA_MANIPULATION_TRANSACTIONS" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2345,12 +2420,16 @@ impl SqlSupportedResultSetType { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlResultSetTypeUnspecified => "SQL_RESULT_SET_TYPE_UNSPECIFIED", - Self::SqlResultSetTypeForwardOnly => "SQL_RESULT_SET_TYPE_FORWARD_ONLY", - Self::SqlResultSetTypeScrollInsensitive => { + SqlSupportedResultSetType::SqlResultSetTypeUnspecified => { + "SQL_RESULT_SET_TYPE_UNSPECIFIED" + } + SqlSupportedResultSetType::SqlResultSetTypeForwardOnly => { + "SQL_RESULT_SET_TYPE_FORWARD_ONLY" + } + SqlSupportedResultSetType::SqlResultSetTypeScrollInsensitive => { "SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE" } - Self::SqlResultSetTypeScrollSensitive => { + SqlSupportedResultSetType::SqlResultSetTypeScrollSensitive => { "SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE" } } @@ -2384,13 +2463,13 @@ impl SqlSupportedResultSetConcurrency { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlResultSetConcurrencyUnspecified => { + SqlSupportedResultSetConcurrency::SqlResultSetConcurrencyUnspecified => { "SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED" } - Self::SqlResultSetConcurrencyReadOnly => { + SqlSupportedResultSetConcurrency::SqlResultSetConcurrencyReadOnly => { "SQL_RESULT_SET_CONCURRENCY_READ_ONLY" } - Self::SqlResultSetConcurrencyUpdatable => { + SqlSupportedResultSetConcurrency::SqlResultSetConcurrencyUpdatable => { "SQL_RESULT_SET_CONCURRENCY_UPDATABLE" } } @@ -2442,26 +2521,30 @@ impl SqlSupportsConvert { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::SqlConvertBigint => "SQL_CONVERT_BIGINT", - Self::SqlConvertBinary => "SQL_CONVERT_BINARY", - Self::SqlConvertBit => "SQL_CONVERT_BIT", - Self::SqlConvertChar => "SQL_CONVERT_CHAR", - Self::SqlConvertDate => "SQL_CONVERT_DATE", - Self::SqlConvertDecimal => "SQL_CONVERT_DECIMAL", - Self::SqlConvertFloat => "SQL_CONVERT_FLOAT", - Self::SqlConvertInteger => "SQL_CONVERT_INTEGER", - Self::SqlConvertIntervalDayTime => "SQL_CONVERT_INTERVAL_DAY_TIME", - Self::SqlConvertIntervalYearMonth => "SQL_CONVERT_INTERVAL_YEAR_MONTH", - Self::SqlConvertLongvarbinary => "SQL_CONVERT_LONGVARBINARY", - Self::SqlConvertLongvarchar => "SQL_CONVERT_LONGVARCHAR", - Self::SqlConvertNumeric => "SQL_CONVERT_NUMERIC", - Self::SqlConvertReal => "SQL_CONVERT_REAL", - Self::SqlConvertSmallint => "SQL_CONVERT_SMALLINT", - Self::SqlConvertTime => "SQL_CONVERT_TIME", - Self::SqlConvertTimestamp => "SQL_CONVERT_TIMESTAMP", - Self::SqlConvertTinyint => "SQL_CONVERT_TINYINT", - Self::SqlConvertVarbinary => "SQL_CONVERT_VARBINARY", - Self::SqlConvertVarchar => "SQL_CONVERT_VARCHAR", + SqlSupportsConvert::SqlConvertBigint => "SQL_CONVERT_BIGINT", + SqlSupportsConvert::SqlConvertBinary => "SQL_CONVERT_BINARY", + SqlSupportsConvert::SqlConvertBit => "SQL_CONVERT_BIT", + SqlSupportsConvert::SqlConvertChar => "SQL_CONVERT_CHAR", + SqlSupportsConvert::SqlConvertDate => "SQL_CONVERT_DATE", + SqlSupportsConvert::SqlConvertDecimal => "SQL_CONVERT_DECIMAL", + SqlSupportsConvert::SqlConvertFloat => "SQL_CONVERT_FLOAT", + SqlSupportsConvert::SqlConvertInteger => "SQL_CONVERT_INTEGER", + SqlSupportsConvert::SqlConvertIntervalDayTime => { + "SQL_CONVERT_INTERVAL_DAY_TIME" + } + SqlSupportsConvert::SqlConvertIntervalYearMonth => { + "SQL_CONVERT_INTERVAL_YEAR_MONTH" + } + SqlSupportsConvert::SqlConvertLongvarbinary => "SQL_CONVERT_LONGVARBINARY", + SqlSupportsConvert::SqlConvertLongvarchar => "SQL_CONVERT_LONGVARCHAR", + SqlSupportsConvert::SqlConvertNumeric => "SQL_CONVERT_NUMERIC", + SqlSupportsConvert::SqlConvertReal => "SQL_CONVERT_REAL", + SqlSupportsConvert::SqlConvertSmallint => "SQL_CONVERT_SMALLINT", + SqlSupportsConvert::SqlConvertTime => "SQL_CONVERT_TIME", + SqlSupportsConvert::SqlConvertTimestamp => "SQL_CONVERT_TIMESTAMP", + SqlSupportsConvert::SqlConvertTinyint => "SQL_CONVERT_TINYINT", + SqlSupportsConvert::SqlConvertVarbinary => "SQL_CONVERT_VARBINARY", + SqlSupportsConvert::SqlConvertVarchar => "SQL_CONVERT_VARCHAR", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2529,30 +2612,30 @@ impl XdbcDataType { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::XdbcUnknownType => "XDBC_UNKNOWN_TYPE", - Self::XdbcChar => "XDBC_CHAR", - Self::XdbcNumeric => "XDBC_NUMERIC", - Self::XdbcDecimal => "XDBC_DECIMAL", - Self::XdbcInteger => "XDBC_INTEGER", - Self::XdbcSmallint => "XDBC_SMALLINT", - Self::XdbcFloat => "XDBC_FLOAT", - Self::XdbcReal => "XDBC_REAL", - Self::XdbcDouble => "XDBC_DOUBLE", - Self::XdbcDatetime => "XDBC_DATETIME", - Self::XdbcInterval => "XDBC_INTERVAL", - Self::XdbcVarchar => "XDBC_VARCHAR", - Self::XdbcDate => "XDBC_DATE", - Self::XdbcTime => "XDBC_TIME", - Self::XdbcTimestamp => "XDBC_TIMESTAMP", - Self::XdbcLongvarchar => "XDBC_LONGVARCHAR", - Self::XdbcBinary => "XDBC_BINARY", - Self::XdbcVarbinary => "XDBC_VARBINARY", - Self::XdbcLongvarbinary => "XDBC_LONGVARBINARY", - Self::XdbcBigint => "XDBC_BIGINT", - Self::XdbcTinyint => "XDBC_TINYINT", - Self::XdbcBit => "XDBC_BIT", - Self::XdbcWchar => "XDBC_WCHAR", - Self::XdbcWvarchar => "XDBC_WVARCHAR", + XdbcDataType::XdbcUnknownType => "XDBC_UNKNOWN_TYPE", + XdbcDataType::XdbcChar => "XDBC_CHAR", + XdbcDataType::XdbcNumeric => "XDBC_NUMERIC", + XdbcDataType::XdbcDecimal => "XDBC_DECIMAL", + XdbcDataType::XdbcInteger => "XDBC_INTEGER", + XdbcDataType::XdbcSmallint => "XDBC_SMALLINT", + XdbcDataType::XdbcFloat => "XDBC_FLOAT", + XdbcDataType::XdbcReal => "XDBC_REAL", + XdbcDataType::XdbcDouble => "XDBC_DOUBLE", + XdbcDataType::XdbcDatetime => "XDBC_DATETIME", + XdbcDataType::XdbcInterval => "XDBC_INTERVAL", + XdbcDataType::XdbcVarchar => "XDBC_VARCHAR", + XdbcDataType::XdbcDate => "XDBC_DATE", + XdbcDataType::XdbcTime => "XDBC_TIME", + XdbcDataType::XdbcTimestamp => "XDBC_TIMESTAMP", + XdbcDataType::XdbcLongvarchar => "XDBC_LONGVARCHAR", + XdbcDataType::XdbcBinary => "XDBC_BINARY", + XdbcDataType::XdbcVarbinary => "XDBC_VARBINARY", + XdbcDataType::XdbcLongvarbinary => "XDBC_LONGVARBINARY", + XdbcDataType::XdbcBigint => "XDBC_BIGINT", + XdbcDataType::XdbcTinyint => "XDBC_TINYINT", + XdbcDataType::XdbcBit => "XDBC_BIT", + XdbcDataType::XdbcWchar => "XDBC_WCHAR", + XdbcDataType::XdbcWvarchar => "XDBC_WVARCHAR", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2626,39 +2709,57 @@ impl XdbcDatetimeSubcode { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::XdbcSubcodeUnknown => "XDBC_SUBCODE_UNKNOWN", - Self::XdbcSubcodeYear => "XDBC_SUBCODE_YEAR", - Self::XdbcSubcodeTime => "XDBC_SUBCODE_TIME", - Self::XdbcSubcodeTimestamp => "XDBC_SUBCODE_TIMESTAMP", - Self::XdbcSubcodeTimeWithTimezone => "XDBC_SUBCODE_TIME_WITH_TIMEZONE", - Self::XdbcSubcodeTimestampWithTimezone => { + XdbcDatetimeSubcode::XdbcSubcodeUnknown => "XDBC_SUBCODE_UNKNOWN", + XdbcDatetimeSubcode::XdbcSubcodeYear => "XDBC_SUBCODE_YEAR", + XdbcDatetimeSubcode::XdbcSubcodeTime => "XDBC_SUBCODE_TIME", + XdbcDatetimeSubcode::XdbcSubcodeTimestamp => "XDBC_SUBCODE_TIMESTAMP", + XdbcDatetimeSubcode::XdbcSubcodeTimeWithTimezone => { + "XDBC_SUBCODE_TIME_WITH_TIMEZONE" + } + XdbcDatetimeSubcode::XdbcSubcodeTimestampWithTimezone => { "XDBC_SUBCODE_TIMESTAMP_WITH_TIMEZONE" } - Self::XdbcSubcodeSecond => "XDBC_SUBCODE_SECOND", - Self::XdbcSubcodeYearToMonth => "XDBC_SUBCODE_YEAR_TO_MONTH", - Self::XdbcSubcodeDayToHour => "XDBC_SUBCODE_DAY_TO_HOUR", - Self::XdbcSubcodeDayToMinute => "XDBC_SUBCODE_DAY_TO_MINUTE", - Self::XdbcSubcodeDayToSecond => "XDBC_SUBCODE_DAY_TO_SECOND", - Self::XdbcSubcodeHourToMinute => "XDBC_SUBCODE_HOUR_TO_MINUTE", - Self::XdbcSubcodeHourToSecond => "XDBC_SUBCODE_HOUR_TO_SECOND", - Self::XdbcSubcodeMinuteToSecond => "XDBC_SUBCODE_MINUTE_TO_SECOND", - Self::XdbcSubcodeIntervalYear => "XDBC_SUBCODE_INTERVAL_YEAR", - Self::XdbcSubcodeIntervalMonth => "XDBC_SUBCODE_INTERVAL_MONTH", - Self::XdbcSubcodeIntervalDay => "XDBC_SUBCODE_INTERVAL_DAY", - Self::XdbcSubcodeIntervalHour => "XDBC_SUBCODE_INTERVAL_HOUR", - Self::XdbcSubcodeIntervalMinute => "XDBC_SUBCODE_INTERVAL_MINUTE", - Self::XdbcSubcodeIntervalSecond => "XDBC_SUBCODE_INTERVAL_SECOND", - Self::XdbcSubcodeIntervalYearToMonth => "XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH", - Self::XdbcSubcodeIntervalDayToHour => "XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR", - Self::XdbcSubcodeIntervalDayToMinute => "XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE", - Self::XdbcSubcodeIntervalDayToSecond => "XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND", - Self::XdbcSubcodeIntervalHourToMinute => { + XdbcDatetimeSubcode::XdbcSubcodeSecond => "XDBC_SUBCODE_SECOND", + XdbcDatetimeSubcode::XdbcSubcodeYearToMonth => "XDBC_SUBCODE_YEAR_TO_MONTH", + XdbcDatetimeSubcode::XdbcSubcodeDayToHour => "XDBC_SUBCODE_DAY_TO_HOUR", + XdbcDatetimeSubcode::XdbcSubcodeDayToMinute => "XDBC_SUBCODE_DAY_TO_MINUTE", + XdbcDatetimeSubcode::XdbcSubcodeDayToSecond => "XDBC_SUBCODE_DAY_TO_SECOND", + XdbcDatetimeSubcode::XdbcSubcodeHourToMinute => "XDBC_SUBCODE_HOUR_TO_MINUTE", + XdbcDatetimeSubcode::XdbcSubcodeHourToSecond => "XDBC_SUBCODE_HOUR_TO_SECOND", + XdbcDatetimeSubcode::XdbcSubcodeMinuteToSecond => { + "XDBC_SUBCODE_MINUTE_TO_SECOND" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalYear => "XDBC_SUBCODE_INTERVAL_YEAR", + XdbcDatetimeSubcode::XdbcSubcodeIntervalMonth => { + "XDBC_SUBCODE_INTERVAL_MONTH" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalDay => "XDBC_SUBCODE_INTERVAL_DAY", + XdbcDatetimeSubcode::XdbcSubcodeIntervalHour => "XDBC_SUBCODE_INTERVAL_HOUR", + XdbcDatetimeSubcode::XdbcSubcodeIntervalMinute => { + "XDBC_SUBCODE_INTERVAL_MINUTE" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalSecond => { + "XDBC_SUBCODE_INTERVAL_SECOND" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalYearToMonth => { + "XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalDayToHour => { + "XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalDayToMinute => { + "XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalDayToSecond => { + "XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND" + } + XdbcDatetimeSubcode::XdbcSubcodeIntervalHourToMinute => { "XDBC_SUBCODE_INTERVAL_HOUR_TO_MINUTE" } - Self::XdbcSubcodeIntervalHourToSecond => { + XdbcDatetimeSubcode::XdbcSubcodeIntervalHourToSecond => { "XDBC_SUBCODE_INTERVAL_HOUR_TO_SECOND" } - Self::XdbcSubcodeIntervalMinuteToSecond => { + XdbcDatetimeSubcode::XdbcSubcodeIntervalMinuteToSecond => { "XDBC_SUBCODE_INTERVAL_MINUTE_TO_SECOND" } } @@ -2733,9 +2834,9 @@ impl Nullable { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::NullabilityNoNulls => "NULLABILITY_NO_NULLS", - Self::NullabilityNullable => "NULLABILITY_NULLABLE", - Self::NullabilityUnknown => "NULLABILITY_UNKNOWN", + Nullable::NullabilityNoNulls => "NULLABILITY_NO_NULLS", + Nullable::NullabilityNullable => "NULLABILITY_NULLABLE", + Nullable::NullabilityUnknown => "NULLABILITY_UNKNOWN", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2776,10 +2877,10 @@ impl Searchable { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::None => "SEARCHABLE_NONE", - Self::Char => "SEARCHABLE_CHAR", - Self::Basic => "SEARCHABLE_BASIC", - Self::Full => "SEARCHABLE_FULL", + Searchable::None => "SEARCHABLE_NONE", + Searchable::Char => "SEARCHABLE_CHAR", + Searchable::Basic => "SEARCHABLE_BASIC", + Searchable::Full => "SEARCHABLE_FULL", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2809,11 +2910,11 @@ impl UpdateDeleteRules { /// (if the ProtoBuf definition does not change) and safe for programmatic use. pub fn as_str_name(&self) -> &'static str { match self { - Self::Cascade => "CASCADE", - Self::Restrict => "RESTRICT", - Self::SetNull => "SET_NULL", - Self::NoAction => "NO_ACTION", - Self::SetDefault => "SET_DEFAULT", + UpdateDeleteRules::Cascade => "CASCADE", + UpdateDeleteRules::Restrict => "RESTRICT", + UpdateDeleteRules::SetNull => "SET_NULL", + UpdateDeleteRules::NoAction => "NO_ACTION", + UpdateDeleteRules::SetDefault => "SET_DEFAULT", } } /// Creates an enum from field names used in the ProtoBuf definition. diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs index 6d3ac3dbe610..6791b68b757d 100644 --- a/arrow-flight/src/sql/client.rs +++ b/arrow-flight/src/sql/client.rs @@ -309,7 +309,7 @@ impl FlightSqlServiceClient { let (response_stream, trailers) = extract_lazy_trailers(response_stream); Ok(FlightRecordBatchStream::new_from_flight_data( - response_stream.map_err(FlightError::Tonic), + response_stream.map_err(|status| status.into()), ) .with_headers(md) .with_trailers(trailers)) diff --git a/arrow-flight/src/streams.rs b/arrow-flight/src/streams.rs index e532a80e1ebb..ab496122013d 100644 --- a/arrow-flight/src/streams.rs +++ b/arrow-flight/src/streams.rs @@ -127,7 +127,7 @@ impl Stream for FallibleTonicResponseStream { match ready!(pinned.response_stream.poll_next_unpin(cx)) { Some(Ok(res)) => Poll::Ready(Some(Ok(res))), - Some(Err(status)) => Poll::Ready(Some(Err(FlightError::Tonic(status)))), + Some(Err(status)) => Poll::Ready(Some(Err(status.into()))), None => Poll::Ready(None), } } diff --git a/arrow-flight/tests/common/trailers_layer.rs b/arrow-flight/tests/common/trailers_layer.rs index 0ccb7df86c74..b2ab74f7d925 100644 --- a/arrow-flight/tests/common/trailers_layer.rs +++ b/arrow-flight/tests/common/trailers_layer.rs @@ -21,7 +21,7 @@ use std::task::{Context, Poll}; use futures::ready; use http::{HeaderValue, Request, Response}; -use http_body::{Frame, SizeHint}; +use http_body::SizeHint; use pin_project_lite::pin_project; use tower::{Layer, Service}; @@ -99,19 +99,31 @@ impl http_body::Body for WrappedBody { type Data = B::Data; type Error = B::Error; - fn poll_frame( - self: Pin<&mut Self>, + fn poll_data( + mut self: Pin<&mut Self>, cx: &mut Context<'_>, - ) -> Poll, Self::Error>>> { - let mut result = ready!(self.project().inner.poll_frame(cx)); + ) -> Poll>> { + self.as_mut().project().inner.poll_data(cx) + } + + fn poll_trailers( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll, Self::Error>> { + let result: Result, Self::Error> = + ready!(self.as_mut().project().inner.poll_trailers(cx)); + + let mut trailers = http::header::HeaderMap::new(); + trailers.insert("test-trailer", HeaderValue::from_static("trailer_val")); - if let Some(Ok(frame)) = &mut result { - if let Some(trailers) = frame.trailers_mut() { - trailers.insert("test-trailer", HeaderValue::from_static("trailer_val")); + match result { + Ok(Some(mut existing)) => { + existing.extend(trailers.iter().map(|(k, v)| (k.clone(), v.clone()))); + Poll::Ready(Ok(Some(existing))) } + Ok(None) => Poll::Ready(Ok(Some(trailers))), + Err(e) => Poll::Ready(Err(e)), } - - Poll::Ready(result) } fn is_end_stream(&self) -> bool { diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml index 26cb05fae1c2..502c0886860a 100644 --- a/arrow-integration-testing/Cargo.toml +++ b/arrow-integration-testing/Cargo.toml @@ -25,7 +25,7 @@ authors = { workspace = true } license = { workspace = true } edition = { workspace = true } publish = false -rust-version = "1.75.0" +rust-version = { workspace = true } [lib] crate-type = ["lib", "cdylib"] @@ -39,11 +39,12 @@ arrow-flight = { path = "../arrow-flight", default-features = false } arrow-integration-test = { path = "../arrow-integration-test", default-features = false } clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] } futures = { version = "0.3", default-features = false } -prost = { version = "0.13", default-features = false } +hex = { version = "0.4", default-features = false, features = ["std"] } +prost = { version = "0.12", default-features = false } serde = { version = "1.0", default-features = false, features = ["rc", "derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } -tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] } -tonic = { version = "0.12", default-features = false } +tokio = { version = "1.0", default-features = false } +tonic = { version = "0.11", default-features = false } tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true } flate2 = { version = "1", default-features = false, features = ["rust_backend"] } diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index 892238fe6828..cd99cb6ec429 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -56,7 +56,7 @@ futures = "0.3" tokio = { version = "1.27", default-features = false, features = ["io-util"] } bytes = "1.4" criterion = { version = "0.5", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } [[bench]] name = "serde" diff --git a/arrow-json/benches/serde.rs b/arrow-json/benches/serde.rs index 7636b9c9dff9..7baaac458f86 100644 --- a/arrow-json/benches/serde.rs +++ b/arrow-json/benches/serde.rs @@ -18,7 +18,7 @@ use arrow_json::ReaderBuilder; use arrow_schema::{DataType, Field, Schema}; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use serde::Serialize; use std::sync::Arc; @@ -35,26 +35,28 @@ fn do_bench(c: &mut Criterion, name: &str, rows: &[R], schema: &Sc } fn criterion_benchmark(c: &mut Criterion) { - let mut rng = thread_rng(); + let mut rng = rng(); let schema = Schema::new(vec![Field::new("i32", DataType::Int32, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..10000)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0..10000)).collect(); do_bench(c, "small_i32", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen()).collect(); + let v: Vec = (0..2048).map(|_| rng.random()).collect(); do_bench(c, "large_i32", &v, &schema); let schema = Schema::new(vec![Field::new("i64", DataType::Int64, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..10000)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0..10000)).collect(); do_bench(c, "small_i64", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..i32::MAX as _)).collect(); + let v: Vec = (0..2048) + .map(|_| rng.random_range(0..i32::MAX as _)) + .collect(); do_bench(c, "medium_i64", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen()).collect(); + let v: Vec = (0..2048).map(|_| rng.random()).collect(); do_bench(c, "large_i64", &v, &schema); let schema = Schema::new(vec![Field::new("f32", DataType::Float32, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0.0..10000.)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0.0..10000.)).collect(); do_bench(c, "small_f32", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen_range(0.0..f32::MAX)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0.0..f32::MAX)).collect(); do_bench(c, "large_f32", &v, &schema); } diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs index bb02d0a3176c..d178a4bac574 100644 --- a/arrow-json/src/lib.rs +++ b/arrow-json/src/lib.rs @@ -70,7 +70,10 @@ pub mod reader; pub mod writer; pub use self::reader::{Reader, ReaderBuilder}; -pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer, WriterBuilder}; +pub use self::writer::{ + ArrayWriter, Encoder, EncoderFactory, EncoderOptions, LineDelimitedWriter, Writer, + WriterBuilder, +}; use half::f16; use serde_json::{Number, Value}; diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index 0b3c788d5519..d9481cc484b9 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -14,6 +14,8 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +use std::io::Write; +use std::sync::Arc; use crate::StructMode; use arrow_array::cast::AsArray; @@ -25,126 +27,322 @@ use arrow_schema::{ArrowError, DataType, FieldRef}; use half::f16; use lexical_core::FormattedSize; use serde::Serializer; -use std::io::Write; +/// Configuration options for the JSON encoder. #[derive(Debug, Clone, Default)] pub struct EncoderOptions { - pub explicit_nulls: bool, - pub struct_mode: StructMode, + /// Whether to include nulls in the output or elide them. + explicit_nulls: bool, + /// Whether to encode structs as JSON objects or JSON arrays of their values. + struct_mode: StructMode, + /// An optional hook for customizing encoding behavior. + encoder_factory: Option>, +} + +impl EncoderOptions { + /// Set whether to include nulls in the output or elide them. + pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self { + self.explicit_nulls = explicit_nulls; + self + } + + /// Set whether to encode structs as JSON objects or JSON arrays of their values. + pub fn with_struct_mode(mut self, struct_mode: StructMode) -> Self { + self.struct_mode = struct_mode; + self + } + + /// Set an optional hook for customizing encoding behavior. + pub fn with_encoder_factory(mut self, encoder_factory: Arc) -> Self { + self.encoder_factory = Some(encoder_factory); + self + } + + /// Get whether to include nulls in the output or elide them. + pub fn explicit_nulls(&self) -> bool { + self.explicit_nulls + } + + /// Get whether to encode structs as JSON objects or JSON arrays of their values. + pub fn struct_mode(&self) -> StructMode { + self.struct_mode + } + + /// Get the optional hook for customizing encoding behavior. + pub fn encoder_factory(&self) -> Option<&Arc> { + self.encoder_factory.as_ref() + } +} + +/// A trait to create custom encoders for specific data types. +/// +/// This allows overriding the default encoders for specific data types, +/// or adding new encoders for custom data types. +/// +/// # Examples +/// +/// ``` +/// use std::io::Write; +/// use arrow_array::{ArrayAccessor, Array, BinaryArray, Float64Array, RecordBatch}; +/// use arrow_array::cast::AsArray; +/// use arrow_schema::{DataType, Field, Schema, FieldRef}; +/// use arrow_json::{writer::{WriterBuilder, JsonArray, NullableEncoder}, StructMode}; +/// use arrow_json::{Encoder, EncoderFactory, EncoderOptions}; +/// use arrow_schema::ArrowError; +/// use std::sync::Arc; +/// use serde_json::json; +/// use serde_json::Value; +/// +/// struct IntArrayBinaryEncoder { +/// array: B, +/// } +/// +/// impl<'a, B> Encoder for IntArrayBinaryEncoder +/// where +/// B: ArrayAccessor, +/// { +/// fn encode(&mut self, idx: usize, out: &mut Vec) { +/// out.push(b'['); +/// let child = self.array.value(idx); +/// for (idx, byte) in child.iter().enumerate() { +/// write!(out, "{byte}").unwrap(); +/// if idx < child.len() - 1 { +/// out.push(b','); +/// } +/// } +/// out.push(b']'); +/// } +/// } +/// +/// #[derive(Debug)] +/// struct IntArayBinaryEncoderFactory; +/// +/// impl EncoderFactory for IntArayBinaryEncoderFactory { +/// fn make_default_encoder<'a>( +/// &self, +/// _field: &'a FieldRef, +/// array: &'a dyn Array, +/// _options: &'a EncoderOptions, +/// ) -> Result>, ArrowError> { +/// match array.data_type() { +/// DataType::Binary => { +/// let array = array.as_binary::(); +/// let encoder = IntArrayBinaryEncoder { array }; +/// let array_encoder = Box::new(encoder) as Box; +/// let nulls = array.nulls().cloned(); +/// Ok(Some(NullableEncoder::new(array_encoder, nulls))) +/// } +/// _ => Ok(None), +/// } +/// } +/// } +/// +/// let binary_array = BinaryArray::from_iter([Some(b"a".as_slice()), None, Some(b"b".as_slice())]); +/// let float_array = Float64Array::from(vec![Some(1.0), Some(2.3), None]); +/// let fields = vec![ +/// Field::new("bytes", DataType::Binary, true), +/// Field::new("float", DataType::Float64, true), +/// ]; +/// let batch = RecordBatch::try_new( +/// Arc::new(Schema::new(fields)), +/// vec![ +/// Arc::new(binary_array) as Arc, +/// Arc::new(float_array) as Arc, +/// ], +/// ) +/// .unwrap(); +/// +/// let json_value: Value = { +/// let mut buf = Vec::new(); +/// let mut writer = WriterBuilder::new() +/// .with_encoder_factory(Arc::new(IntArayBinaryEncoderFactory)) +/// .build::<_, JsonArray>(&mut buf); +/// writer.write_batches(&[&batch]).unwrap(); +/// writer.finish().unwrap(); +/// serde_json::from_slice(&buf).unwrap() +/// }; +/// +/// let expected = json!([ +/// {"bytes": [97], "float": 1.0}, +/// {"float": 2.3}, +/// {"bytes": [98]}, +/// ]); +/// +/// assert_eq!(json_value, expected); +/// ``` +pub trait EncoderFactory: std::fmt::Debug + Send + Sync { + /// Make an encoder that overrides the default encoder for a specific field and array or provides an encoder for a custom data type. + /// This can be used to override how e.g. binary data is encoded so that it is an encoded string or an array of integers. + /// + /// Note that the type of the field may not match the type of the array: for dictionary arrays unless the top-level dictionary is handled this + /// will be called again for the keys and values of the dictionary, at which point the field type will still be the outer dictionary type but the + /// array will have a different type. + /// For example, `field`` might have the type `Dictionary(i32, Utf8)` but `array` will be `Utf8`. + fn make_default_encoder<'a>( + &self, + _field: &'a FieldRef, + _array: &'a dyn Array, + _options: &'a EncoderOptions, + ) -> Result>, ArrowError> { + Ok(None) + } +} + +/// An encoder + a null buffer. +/// This is packaged together into a wrapper struct to minimize dynamic dispatch for null checks. +pub struct NullableEncoder<'a> { + encoder: Box, + nulls: Option, +} + +impl<'a> NullableEncoder<'a> { + /// Create a new encoder with a null buffer. + pub fn new(encoder: Box, nulls: Option) -> Self { + Self { encoder, nulls } + } + + /// Encode the value at index `idx` to `out`. + pub fn encode(&mut self, idx: usize, out: &mut Vec) { + self.encoder.encode(idx, out) + } + + /// Returns whether the value at index `idx` is null. + pub fn is_null(&self, idx: usize) -> bool { + self.nulls.as_ref().is_some_and(|nulls| nulls.is_null(idx)) + } + + /// Returns whether the encoder has any nulls. + pub fn has_nulls(&self) -> bool { + match self.nulls { + Some(ref nulls) => nulls.null_count() > 0, + None => false, + } + } +} + +impl Encoder for NullableEncoder<'_> { + fn encode(&mut self, idx: usize, out: &mut Vec) { + self.encoder.encode(idx, out) + } } /// A trait to format array values as JSON values /// /// Nullability is handled by the caller to allow encoding nulls implicitly, i.e. `{}` instead of `{"a": null}` pub trait Encoder { - /// Encode the non-null value at index `idx` to `out` + /// Encode the non-null value at index `idx` to `out`. /// - /// The behaviour is unspecified if `idx` corresponds to a null index + /// The behaviour is unspecified if `idx` corresponds to a null index. fn encode(&mut self, idx: usize, out: &mut Vec); } +/// Creates an encoder for the given array and field. +/// +/// This first calls the EncoderFactory if one is provided, and then falls back to the default encoders. pub fn make_encoder<'a>( + field: &'a FieldRef, array: &'a dyn Array, - options: &EncoderOptions, -) -> Result, ArrowError> { - let (encoder, nulls) = make_encoder_impl(array, options)?; - assert!(nulls.is_none(), "root cannot be nullable"); - Ok(encoder) -} - -fn make_encoder_impl<'a>( - array: &'a dyn Array, - options: &EncoderOptions, -) -> Result<(Box, Option), ArrowError> { + options: &'a EncoderOptions, +) -> Result, ArrowError> { macro_rules! primitive_helper { ($t:ty) => {{ let array = array.as_primitive::<$t>(); let nulls = array.nulls().cloned(); - (Box::new(PrimitiveEncoder::new(array)) as _, nulls) + NullableEncoder::new(Box::new(PrimitiveEncoder::new(array)), nulls) }}; } - Ok(downcast_integer! { + if let Some(factory) = options.encoder_factory() { + if let Some(encoder) = factory.make_default_encoder(field, array, options)? { + return Ok(encoder); + } + } + + let nulls = array.nulls().cloned(); + let encoder = downcast_integer! { array.data_type() => (primitive_helper), DataType::Float16 => primitive_helper!(Float16Type), DataType::Float32 => primitive_helper!(Float32Type), DataType::Float64 => primitive_helper!(Float64Type), DataType::Boolean => { let array = array.as_boolean(); - (Box::new(BooleanEncoder(array)), array.nulls().cloned()) + NullableEncoder::new(Box::new(BooleanEncoder(array)), array.nulls().cloned()) } - DataType::Null => (Box::new(NullEncoder), array.logical_nulls()), + DataType::Null => NullableEncoder::new(Box::new(NullEncoder), array.logical_nulls()), DataType::Utf8 => { let array = array.as_string::(); - (Box::new(StringEncoder(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(StringEncoder(array)), array.nulls().cloned()) } DataType::LargeUtf8 => { let array = array.as_string::(); - (Box::new(StringEncoder(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(StringEncoder(array)), array.nulls().cloned()) } DataType::Utf8View => { let array = array.as_string_view(); - (Box::new(StringViewEncoder(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(StringViewEncoder(array)), array.nulls().cloned()) } DataType::List(_) => { let array = array.as_list::(); - (Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::LargeList(_) => { let array = array.as_list::(); - (Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::FixedSizeList(_, _) => { let array = array.as_fixed_size_list(); - (Box::new(FixedSizeListEncoder::try_new(array, options)?) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(FixedSizeListEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::Dictionary(_, _) => downcast_dictionary_array! { - array => (Box::new(DictionaryEncoder::try_new(array, options)?) as _, array.logical_nulls()), + array => { + NullableEncoder::new(Box::new(DictionaryEncoder::try_new(field, array, options)?), array.nulls().cloned()) + }, _ => unreachable!() } DataType::Map(_, _) => { let array = array.as_map(); - (Box::new(MapEncoder::try_new(array, options)?) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(MapEncoder::try_new(field, array, options)?), array.nulls().cloned()) } DataType::FixedSizeBinary(_) => { let array = array.as_fixed_size_binary(); - (Box::new(BinaryEncoder::new(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(BinaryEncoder::new(array)) as _, array.nulls().cloned()) } DataType::Binary => { let array: &BinaryArray = array.as_binary(); - (Box::new(BinaryEncoder::new(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(BinaryEncoder::new(array)), array.nulls().cloned()) } DataType::LargeBinary => { let array: &LargeBinaryArray = array.as_binary(); - (Box::new(BinaryEncoder::new(array)) as _, array.nulls().cloned()) + NullableEncoder::new(Box::new(BinaryEncoder::new(array)), array.nulls().cloned()) } DataType::Struct(fields) => { let array = array.as_struct(); let encoders = fields.iter().zip(array.columns()).map(|(field, array)| { - let (encoder, nulls) = make_encoder_impl(array, options)?; + let encoder = make_encoder(field, array, options)?; Ok(FieldEncoder{ field: field.clone(), - encoder, nulls + encoder, }) }).collect::, ArrowError>>()?; let encoder = StructArrayEncoder{ encoders, - explicit_nulls: options.explicit_nulls, - struct_mode: options.struct_mode, + explicit_nulls: options.explicit_nulls(), + struct_mode: options.struct_mode(), }; - (Box::new(encoder) as _, array.nulls().cloned()) + let nulls = array.nulls().cloned(); + NullableEncoder::new(Box::new(encoder) as Box, nulls) } DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => { let options = FormatOptions::new().with_display_error(true); - let formatter = ArrayFormatter::try_new(array, &options)?; - (Box::new(RawArrayFormatter(formatter)) as _, array.nulls().cloned()) + let formatter = JsonArrayFormatter::new(ArrayFormatter::try_new(array, &options)?); + NullableEncoder::new(Box::new(RawArrayFormatter(formatter)) as Box, nulls) } d => match d.is_temporal() { true => { @@ -154,11 +352,17 @@ fn make_encoder_impl<'a>( // may need to be revisited let options = FormatOptions::new().with_display_error(true); let formatter = ArrayFormatter::try_new(array, &options)?; - (Box::new(formatter) as _, array.nulls().cloned()) + let formatter = JsonArrayFormatter::new(formatter); + NullableEncoder::new(Box::new(formatter) as Box, nulls) } - false => return Err(ArrowError::InvalidArgumentError(format!("JSON Writer does not support data type: {d}"))), + false => return Err(ArrowError::JsonError(format!( + "Unsupported data type for JSON encoding: {:?}", + d + ))) } - }) + }; + + Ok(encoder) } fn encode_string(s: &str, out: &mut Vec) { @@ -168,8 +372,13 @@ fn encode_string(s: &str, out: &mut Vec) { struct FieldEncoder<'a> { field: FieldRef, - encoder: Box, - nulls: Option, + encoder: NullableEncoder<'a>, +} + +impl FieldEncoder<'_> { + fn is_null(&self, idx: usize) -> bool { + self.encoder.is_null(idx) + } } struct StructArrayEncoder<'a> { @@ -178,15 +387,6 @@ struct StructArrayEncoder<'a> { struct_mode: StructMode, } -/// This API is only stable since 1.70 so can't use it when current MSRV is lower -#[inline(always)] -fn is_some_and(opt: Option, f: impl FnOnce(T) -> bool) -> bool { - match opt { - None => false, - Some(x) => f(x), - } -} - impl Encoder for StructArrayEncoder<'_> { fn encode(&mut self, idx: usize, out: &mut Vec) { match self.struct_mode { @@ -196,9 +396,10 @@ impl Encoder for StructArrayEncoder<'_> { let mut is_first = true; // Nulls can only be dropped in explicit mode let drop_nulls = (self.struct_mode == StructMode::ObjectOnly) && !self.explicit_nulls; - for field_encoder in &mut self.encoders { - let is_null = is_some_and(field_encoder.nulls.as_ref(), |n| n.is_null(idx)); - if drop_nulls && is_null { + + for field_encoder in self.encoders.iter_mut() { + let is_null = field_encoder.is_null(idx); + if is_null && drop_nulls { continue; } @@ -212,9 +413,10 @@ impl Encoder for StructArrayEncoder<'_> { out.push(b':'); } - match is_null { - true => out.extend_from_slice(b"null"), - false => field_encoder.encoder.encode(idx, out), + if is_null { + out.extend_from_slice(b"null"); + } else { + field_encoder.encoder.encode(idx, out); } } match self.struct_mode { @@ -339,20 +541,19 @@ impl Encoder for StringViewEncoder<'_> { struct ListEncoder<'a, O: OffsetSizeTrait> { offsets: OffsetBuffer, - nulls: Option, - encoder: Box, + encoder: NullableEncoder<'a>, } impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> { fn try_new( + field: &'a FieldRef, array: &'a GenericListArray, - options: &EncoderOptions, + options: &'a EncoderOptions, ) -> Result { - let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), options)?; + let encoder = make_encoder(field, array.values().as_ref(), options)?; Ok(Self { offsets: array.offsets().clone(), encoder, - nulls, }) } } @@ -362,22 +563,25 @@ impl Encoder for ListEncoder<'_, O> { let end = self.offsets[idx + 1].as_usize(); let start = self.offsets[idx].as_usize(); out.push(b'['); - match self.nulls.as_ref() { - Some(n) => (start..end).for_each(|idx| { + + if self.encoder.has_nulls() { + for idx in start..end { if idx != start { out.push(b',') } - match n.is_null(idx) { - true => out.extend_from_slice(b"null"), - false => self.encoder.encode(idx, out), + if self.encoder.is_null(idx) { + out.extend_from_slice(b"null"); + } else { + self.encoder.encode(idx, out); } - }), - None => (start..end).for_each(|idx| { + } + } else { + for idx in start..end { if idx != start { out.push(b',') } self.encoder.encode(idx, out); - }), + } } out.push(b']'); } @@ -385,19 +589,18 @@ impl Encoder for ListEncoder<'_, O> { struct FixedSizeListEncoder<'a> { value_length: usize, - nulls: Option, - encoder: Box, + encoder: NullableEncoder<'a>, } impl<'a> FixedSizeListEncoder<'a> { fn try_new( + field: &'a FieldRef, array: &'a FixedSizeListArray, - options: &EncoderOptions, + options: &'a EncoderOptions, ) -> Result { - let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), options)?; + let encoder = make_encoder(field, array.values().as_ref(), options)?; Ok(Self { encoder, - nulls, value_length: array.value_length().as_usize(), }) } @@ -408,23 +611,24 @@ impl Encoder for FixedSizeListEncoder<'_> { let start = idx * self.value_length; let end = start + self.value_length; out.push(b'['); - match self.nulls.as_ref() { - Some(n) => (start..end).for_each(|idx| { + if self.encoder.has_nulls() { + for idx in start..end { if idx != start { - out.push(b','); + out.push(b',') } - if n.is_null(idx) { + if self.encoder.is_null(idx) { out.extend_from_slice(b"null"); } else { self.encoder.encode(idx, out); } - }), - None => (start..end).for_each(|idx| { + } + } else { + for idx in start..end { if idx != start { - out.push(b','); + out.push(b',') } self.encoder.encode(idx, out); - }), + } } out.push(b']'); } @@ -432,15 +636,16 @@ impl Encoder for FixedSizeListEncoder<'_> { struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> { keys: ScalarBuffer, - encoder: Box, + encoder: NullableEncoder<'a>, } impl<'a, K: ArrowDictionaryKeyType> DictionaryEncoder<'a, K> { fn try_new( + field: &'a FieldRef, array: &'a DictionaryArray, - options: &EncoderOptions, + options: &'a EncoderOptions, ) -> Result { - let (encoder, _) = make_encoder_impl(array.values().as_ref(), options)?; + let encoder = make_encoder(field, array.values().as_ref(), options)?; Ok(Self { keys: array.keys().values().clone(), @@ -455,22 +660,33 @@ impl Encoder for DictionaryEncoder<'_, K> { } } -impl Encoder for ArrayFormatter<'_> { +/// A newtype wrapper around [`ArrayFormatter`] to keep our usage of it private and not implement `Encoder` for the public type +struct JsonArrayFormatter<'a> { + formatter: ArrayFormatter<'a>, +} + +impl<'a> JsonArrayFormatter<'a> { + fn new(formatter: ArrayFormatter<'a>) -> Self { + Self { formatter } + } +} + +impl Encoder for JsonArrayFormatter<'_> { fn encode(&mut self, idx: usize, out: &mut Vec) { out.push(b'"'); // Should be infallible // Note: We are making an assumption that the formatter does not produce characters that require escaping - let _ = write!(out, "{}", self.value(idx)); + let _ = write!(out, "{}", self.formatter.value(idx)); out.push(b'"') } } -/// A newtype wrapper around [`ArrayFormatter`] that skips surrounding the value with `"` -struct RawArrayFormatter<'a>(ArrayFormatter<'a>); +/// A newtype wrapper around [`JsonArrayFormatter`] that skips surrounding the value with `"` +struct RawArrayFormatter<'a>(JsonArrayFormatter<'a>); impl Encoder for RawArrayFormatter<'_> { fn encode(&mut self, idx: usize, out: &mut Vec) { - let _ = write!(out, "{}", self.0.value(idx)); + let _ = write!(out, "{}", self.0.formatter.value(idx)); } } @@ -484,14 +700,17 @@ impl Encoder for NullEncoder { struct MapEncoder<'a> { offsets: OffsetBuffer, - keys: Box, - values: Box, - value_nulls: Option, + keys: NullableEncoder<'a>, + values: NullableEncoder<'a>, explicit_nulls: bool, } impl<'a> MapEncoder<'a> { - fn try_new(array: &'a MapArray, options: &EncoderOptions) -> Result { + fn try_new( + field: &'a FieldRef, + array: &'a MapArray, + options: &'a EncoderOptions, + ) -> Result { let values = array.values(); let keys = array.keys(); @@ -502,17 +721,17 @@ impl<'a> MapEncoder<'a> { ))); } - let (keys, key_nulls) = make_encoder_impl(keys, options)?; - let (values, value_nulls) = make_encoder_impl(values, options)?; + let keys = make_encoder(field, keys, options)?; + let values = make_encoder(field, values, options)?; // We sanity check nulls as these are currently not enforced by MapArray (#1697) - if is_some_and(key_nulls, |x| x.null_count() != 0) { + if keys.has_nulls() { return Err(ArrowError::InvalidArgumentError( "Encountered nulls in MapArray keys".to_string(), )); } - if is_some_and(array.entries().nulls(), |x| x.null_count() != 0) { + if array.entries().nulls().is_some_and(|x| x.null_count() != 0) { return Err(ArrowError::InvalidArgumentError( "Encountered nulls in MapArray entries".to_string(), )); @@ -522,8 +741,7 @@ impl<'a> MapEncoder<'a> { offsets: array.offsets().clone(), keys, values, - value_nulls, - explicit_nulls: options.explicit_nulls, + explicit_nulls: options.explicit_nulls(), }) } } @@ -536,8 +754,9 @@ impl Encoder for MapEncoder<'_> { let mut is_first = true; out.push(b'{'); + for idx in start..end { - let is_null = is_some_and(self.value_nulls.as_ref(), |n| n.is_null(idx)); + let is_null = self.values.is_null(idx); if is_null && !self.explicit_nulls { continue; } @@ -550,9 +769,10 @@ impl Encoder for MapEncoder<'_> { self.keys.encode(idx, out); out.push(b':'); - match is_null { - true => out.extend_from_slice(b"null"), - false => self.values.encode(idx, out), + if is_null { + out.extend_from_slice(b"null"); + } else { + self.values.encode(idx, out); } } out.push(b'}'); diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs index 5d3e558480ca..ee1b5fabe538 100644 --- a/arrow-json/src/writer/mod.rs +++ b/arrow-json/src/writer/mod.rs @@ -106,13 +106,13 @@ //! ``` mod encoder; -use std::{fmt::Debug, io::Write}; +use std::{fmt::Debug, io::Write, sync::Arc}; use crate::StructMode; use arrow_array::*; use arrow_schema::*; -use encoder::{make_encoder, EncoderOptions}; +pub use encoder::{make_encoder, Encoder, EncoderFactory, EncoderOptions, NullableEncoder}; /// This trait defines how to format a sequence of JSON objects to a /// byte stream. @@ -225,7 +225,7 @@ impl WriterBuilder { /// Returns `true` if this writer is configured to keep keys with null values. pub fn explicit_nulls(&self) -> bool { - self.0.explicit_nulls + self.0.explicit_nulls() } /// Set whether to keep keys with null values, or to omit writing them. @@ -251,13 +251,13 @@ impl WriterBuilder { /// Default is to skip nulls (set to `false`). If `struct_mode == ListOnly`, /// nulls will be written explicitly regardless of this setting. pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self { - self.0.explicit_nulls = explicit_nulls; + self.0 = self.0.with_explicit_nulls(explicit_nulls); self } /// Returns if this writer is configured to write structs as JSON Objects or Arrays. pub fn struct_mode(&self) -> StructMode { - self.0.struct_mode + self.0.struct_mode() } /// Set the [`StructMode`] for the writer, which determines whether structs @@ -266,7 +266,16 @@ impl WriterBuilder { /// `ListOnly`, nulls will be written explicitly regardless of the /// `explicit_nulls` setting. pub fn with_struct_mode(mut self, struct_mode: StructMode) -> Self { - self.0.struct_mode = struct_mode; + self.0 = self.0.with_struct_mode(struct_mode); + self + } + + /// Set an encoder factory to use when creating encoders for writing JSON. + /// + /// This can be used to override how some types are encoded or to provide + /// a fallback for types that are not supported by the default encoder. + pub fn with_encoder_factory(mut self, factory: Arc) -> Self { + self.0 = self.0.with_encoder_factory(factory); self } @@ -351,8 +360,16 @@ where } let array = StructArray::from(batch.clone()); - let mut encoder = make_encoder(&array, &self.options)?; + let field = Arc::new(Field::new_struct( + "", + batch.schema().fields().clone(), + false, + )); + + let mut encoder = make_encoder(&field, &array, &self.options)?; + // Validate that the root is not nullable + assert!(!encoder.has_nulls(), "root cannot be nullable"); for idx in 0..batch.num_rows() { self.format.start_row(&mut buffer, is_first_row)?; is_first_row = false; @@ -419,15 +436,19 @@ where #[cfg(test)] mod tests { use core::str; + use std::collections::HashMap; use std::fs::{read_to_string, File}; use std::io::{BufReader, Seek}; use std::sync::Arc; + use arrow_array::cast::AsArray; use serde_json::{json, Value}; + use super::LineDelimited; + use super::{Encoder, WriterBuilder}; use arrow_array::builder::*; use arrow_array::types::*; - use arrow_buffer::{i256, Buffer, NullBuffer, OffsetBuffer, ToByteSlice}; + use arrow_buffer::{i256, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, ToByteSlice}; use arrow_data::ArrayData; use crate::reader::*; @@ -446,7 +467,7 @@ mod tests { .map(|s| (!s.is_empty()).then(|| serde_json::from_slice(s).unwrap())) .collect(); - assert_eq!(expected, actual); + assert_eq!(actual, expected); } #[test] @@ -1891,7 +1912,7 @@ mod tests { let json_str = str::from_utf8(&json).unwrap(); assert_eq!( json_str, - r#"[{"my_dict":"a"},{"my_dict":null},{"my_dict":null}]"# + r#"[{"my_dict":"a"},{"my_dict":null},{"my_dict":""}]"# ) } @@ -2036,4 +2057,414 @@ mod tests { } assert_json_eq(&buf, expected); } + + fn make_fallback_encoder_test_data() -> (RecordBatch, Arc) { + // Note: this is not intended to be an efficient implementation. + // Just a simple example to demonstrate how to implement a custom encoder. + #[derive(Debug)] + enum UnionValue { + Int32(i32), + String(String), + } + + #[derive(Debug)] + struct UnionEncoder { + array: Vec>, + } + + impl Encoder for UnionEncoder { + fn encode(&mut self, idx: usize, out: &mut Vec) { + match &self.array[idx] { + None => out.extend_from_slice(b"null"), + Some(UnionValue::Int32(v)) => out.extend_from_slice(v.to_string().as_bytes()), + Some(UnionValue::String(v)) => { + out.extend_from_slice(format!("\"{}\"", v).as_bytes()) + } + } + } + } + + #[derive(Debug)] + struct UnionEncoderFactory; + + impl EncoderFactory for UnionEncoderFactory { + fn make_default_encoder<'a>( + &self, + _field: &'a FieldRef, + array: &'a dyn Array, + _options: &'a EncoderOptions, + ) -> Result>, ArrowError> { + let data_type = array.data_type(); + let fields = match data_type { + DataType::Union(fields, UnionMode::Sparse) => fields, + _ => return Ok(None), + }; + // check that the fields are supported + let fields = fields.iter().map(|(_, f)| f).collect::>(); + for f in fields.iter() { + match f.data_type() { + DataType::Null => {} + DataType::Int32 => {} + DataType::Utf8 => {} + _ => return Ok(None), + } + } + let (_, type_ids, _, buffers) = array.as_union().clone().into_parts(); + let mut values = Vec::with_capacity(type_ids.len()); + for idx in 0..type_ids.len() { + let type_id = type_ids[idx]; + let field = &fields[type_id as usize]; + let value = match field.data_type() { + DataType::Null => None, + DataType::Int32 => Some(UnionValue::Int32( + buffers[type_id as usize] + .as_primitive::() + .value(idx), + )), + DataType::Utf8 => Some(UnionValue::String( + buffers[type_id as usize] + .as_string::() + .value(idx) + .to_string(), + )), + _ => unreachable!(), + }; + values.push(value); + } + let array_encoder = + Box::new(UnionEncoder { array: values }) as Box; + let nulls = array.nulls().cloned(); + Ok(Some(NullableEncoder::new(array_encoder, nulls))) + } + } + + let int_array = Int32Array::from(vec![Some(1), None, None]); + let string_array = StringArray::from(vec![None, Some("a"), None]); + let null_array = NullArray::new(3); + let type_ids = [0_i8, 1, 2].into_iter().collect::>(); + + let union_fields = [ + (0, Arc::new(Field::new("A", DataType::Int32, false))), + (1, Arc::new(Field::new("B", DataType::Utf8, false))), + (2, Arc::new(Field::new("C", DataType::Null, false))), + ] + .into_iter() + .collect::(); + + let children = vec![ + Arc::new(int_array) as Arc, + Arc::new(string_array), + Arc::new(null_array), + ]; + + let array = UnionArray::try_new(union_fields.clone(), type_ids, None, children).unwrap(); + + let float_array = Float64Array::from(vec![Some(1.0), None, Some(3.4)]); + + let fields = vec![ + Field::new( + "union", + DataType::Union(union_fields, UnionMode::Sparse), + true, + ), + Field::new("float", DataType::Float64, true), + ]; + + let batch = RecordBatch::try_new( + Arc::new(Schema::new(fields)), + vec![ + Arc::new(array) as Arc, + Arc::new(float_array) as Arc, + ], + ) + .unwrap(); + + (batch, Arc::new(UnionEncoderFactory)) + } + + #[test] + fn test_fallback_encoder_factory_line_delimited_implicit_nulls() { + let (batch, encoder_factory) = make_fallback_encoder_test_data(); + + let mut buf = Vec::new(); + { + let mut writer = WriterBuilder::new() + .with_encoder_factory(encoder_factory) + .with_explicit_nulls(false) + .build::<_, LineDelimited>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + writer.finish().unwrap(); + } + + println!("{}", str::from_utf8(&buf).unwrap()); + + assert_json_eq( + &buf, + r#"{"union":1,"float":1.0} +{"union":"a"} +{"union":null,"float":3.4} +"#, + ); + } + + #[test] + fn test_fallback_encoder_factory_line_delimited_explicit_nulls() { + let (batch, encoder_factory) = make_fallback_encoder_test_data(); + + let mut buf = Vec::new(); + { + let mut writer = WriterBuilder::new() + .with_encoder_factory(encoder_factory) + .with_explicit_nulls(true) + .build::<_, LineDelimited>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + writer.finish().unwrap(); + } + + assert_json_eq( + &buf, + r#"{"union":1,"float":1.0} +{"union":"a","float":null} +{"union":null,"float":3.4} +"#, + ); + } + + #[test] + fn test_fallback_encoder_factory_array_implicit_nulls() { + let (batch, encoder_factory) = make_fallback_encoder_test_data(); + + let json_value: Value = { + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_encoder_factory(encoder_factory) + .build::<_, JsonArray>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + writer.finish().unwrap(); + serde_json::from_slice(&buf).unwrap() + }; + + let expected = json!([ + {"union":1,"float":1.0}, + {"union":"a"}, + {"float":3.4,"union":null}, + ]); + + assert_eq!(json_value, expected); + } + + #[test] + fn test_fallback_encoder_factory_array_explicit_nulls() { + let (batch, encoder_factory) = make_fallback_encoder_test_data(); + + let json_value: Value = { + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_encoder_factory(encoder_factory) + .with_explicit_nulls(true) + .build::<_, JsonArray>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + writer.finish().unwrap(); + serde_json::from_slice(&buf).unwrap() + }; + + let expected = json!([ + {"union":1,"float":1.0}, + {"union":"a", "float": null}, + {"union":null,"float":3.4}, + ]); + + assert_eq!(json_value, expected); + } + + #[test] + fn test_default_encoder_byte_array() { + struct IntArrayBinaryEncoder { + array: B, + } + + impl<'a, B> Encoder for IntArrayBinaryEncoder + where + B: ArrayAccessor, + { + fn encode(&mut self, idx: usize, out: &mut Vec) { + out.push(b'['); + let child = self.array.value(idx); + for (idx, byte) in child.iter().enumerate() { + write!(out, "{byte}").unwrap(); + if idx < child.len() - 1 { + out.push(b','); + } + } + out.push(b']'); + } + } + + #[derive(Debug)] + struct IntArayBinaryEncoderFactory; + + impl EncoderFactory for IntArayBinaryEncoderFactory { + fn make_default_encoder<'a>( + &self, + _field: &'a FieldRef, + array: &'a dyn Array, + _options: &'a EncoderOptions, + ) -> Result>, ArrowError> { + match array.data_type() { + DataType::Binary => { + let array = array.as_binary::(); + let encoder = IntArrayBinaryEncoder { array }; + let array_encoder = Box::new(encoder) as Box; + let nulls = array.nulls().cloned(); + Ok(Some(NullableEncoder::new(array_encoder, nulls))) + } + _ => Ok(None), + } + } + } + + let binary_array = BinaryArray::from_opt_vec(vec![Some(b"a"), None, Some(b"b")]); + let float_array = Float64Array::from(vec![Some(1.0), Some(2.3), None]); + let fields = vec![ + Field::new("bytes", DataType::Binary, true), + Field::new("float", DataType::Float64, true), + ]; + let batch = RecordBatch::try_new( + Arc::new(Schema::new(fields)), + vec![ + Arc::new(binary_array) as Arc, + Arc::new(float_array) as Arc, + ], + ) + .unwrap(); + + let json_value: Value = { + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_encoder_factory(Arc::new(IntArayBinaryEncoderFactory)) + .build::<_, JsonArray>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + writer.finish().unwrap(); + serde_json::from_slice(&buf).unwrap() + }; + + let expected = json!([ + {"bytes": [97], "float": 1.0}, + {"float": 2.3}, + {"bytes": [98]}, + ]); + + assert_eq!(json_value, expected); + } + + #[test] + fn test_encoder_factory_customize_dictionary() { + // Test that we can customize the encoding of T even when it shows up as Dictionary<_, T>. + + // No particular reason to choose this example. + // Just trying to add some variety to the test cases and demonstrate use cases of the encoder factory. + struct PaddedInt32Encoder { + array: Int32Array, + } + + impl Encoder for PaddedInt32Encoder { + fn encode(&mut self, idx: usize, out: &mut Vec) { + let value = self.array.value(idx); + write!(out, "\"{value:0>8}\"").unwrap(); + } + } + + #[derive(Debug)] + struct CustomEncoderFactory; + + impl EncoderFactory for CustomEncoderFactory { + fn make_default_encoder<'a>( + &self, + field: &'a FieldRef, + array: &'a dyn Array, + _options: &'a EncoderOptions, + ) -> Result>, ArrowError> { + // The point here is: + // 1. You can use information from Field to determine how to do the encoding. + // 2. For dictionary arrays the Field is always the outer field but the array may be the keys or values array + // and thus the data type of `field` may not match the data type of `array`. + let padded = field + .metadata() + .get("padded") + .map(|v| v == "true") + .unwrap_or_default(); + match (array.data_type(), padded) { + (DataType::Int32, true) => { + let array = array.as_primitive::(); + let nulls = array.nulls().cloned(); + let encoder = PaddedInt32Encoder { + array: array.clone(), + }; + let array_encoder = Box::new(encoder) as Box; + Ok(Some(NullableEncoder::new(array_encoder, nulls))) + } + _ => Ok(None), + } + } + } + + let to_json = |batch| { + let mut buf = Vec::new(); + let mut writer = WriterBuilder::new() + .with_encoder_factory(Arc::new(CustomEncoderFactory)) + .build::<_, JsonArray>(&mut buf); + writer.write_batches(&[batch]).unwrap(); + writer.finish().unwrap(); + serde_json::from_slice::(&buf).unwrap() + }; + + // Control case: no dictionary wrapping works as expected. + let array = Int32Array::from(vec![Some(1), None, Some(2)]); + let field = Arc::new(Field::new("int", DataType::Int32, true).with_metadata( + HashMap::from_iter(vec![("padded".to_string(), "true".to_string())]), + )); + let batch = RecordBatch::try_new( + Arc::new(Schema::new(vec![field.clone()])), + vec![Arc::new(array)], + ) + .unwrap(); + + let json_value = to_json(&batch); + + let expected = json!([ + {"int": "00000001"}, + {}, + {"int": "00000002"}, + ]); + + assert_eq!(json_value, expected); + + // Now make a dictionary batch + let mut array_builder = PrimitiveDictionaryBuilder::::new(); + array_builder.append_value(1); + array_builder.append_null(); + array_builder.append_value(1); + let array = array_builder.finish(); + let field = Field::new( + "int", + DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Int32)), + true, + ) + .with_metadata(HashMap::from_iter(vec![( + "padded".to_string(), + "true".to_string(), + )])); + let batch = RecordBatch::try_new(Arc::new(Schema::new(vec![field])), vec![Arc::new(array)]) + .unwrap(); + + let json_value = to_json(&batch); + + let expected = json!([ + {"int": "00000001"}, + {}, + {"int": "00000001"}, + ]); + + assert_eq!(json_value, expected); + } } diff --git a/arrow-ord/Cargo.toml b/arrow-ord/Cargo.toml index 8d74d2f97d72..560b19b3f609 100644 --- a/arrow-ord/Cargo.toml +++ b/arrow-ord/Cargo.toml @@ -42,4 +42,4 @@ arrow-select = { workspace = true } [dev-dependencies] half = { version = "2.1", default-features = false, features = ["num-traits"] } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng"] } diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index a6804cd6661f..7894999157c7 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -4151,7 +4151,7 @@ mod tests { fn test_partial_rand_sort() { let size = 1000u32; let mut rng = StdRng::seed_from_u64(42); - let mut before: Vec = (0..size).map(|_| rng.gen::()).collect(); + let mut before: Vec = (0..size).map(|_| rng.random::()).collect(); let mut d = before.clone(); let last = (rng.next_u32() % size) as usize; d.sort_unstable(); diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 4ead95fcb912..72603b5d527d 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -25,7 +25,7 @@ authors = ["Apache Arrow "] license = "Apache-2.0" keywords = [ "arrow" ] edition = "2021" -rust-version = "1.70" +rust-version = "1.81" publish = false [lib] @@ -34,4 +34,4 @@ crate-type = ["cdylib"] [dependencies] arrow = { path = "../arrow", features = ["pyarrow"] } -pyo3 = { version = "0.23", features = ["extension-module"] } +pyo3 = { version = "0.24.1", features = ["extension-module"] } diff --git a/arrow-row/Cargo.toml b/arrow-row/Cargo.toml index 90d99684d265..1bb63e93a40b 100644 --- a/arrow-row/Cargo.toml +++ b/arrow-row/Cargo.toml @@ -44,7 +44,5 @@ half = { version = "2.1", default-features = false } [dev-dependencies] arrow-cast = { workspace = true } arrow-ord = { workspace = true } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } - -[features] +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index d0fad12210db..0e075d587454 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1433,9 +1433,9 @@ unsafe fn decode_column( #[cfg(test)] mod tests { - use rand::distributions::uniform::SampleUniform; - use rand::distributions::{Distribution, Standard}; - use rand::{thread_rng, Rng}; + use rand::distr::uniform::SampleUniform; + use rand::distr::{Distribution, StandardUniform}; + use rand::{rng, Rng}; use arrow_array::builder::*; use arrow_array::types::*; @@ -2193,11 +2193,11 @@ mod tests { fn generate_primitive_array(len: usize, valid_percent: f64) -> PrimitiveArray where K: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) - .map(|_| rng.gen_bool(valid_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random())) .collect() } @@ -2205,12 +2205,12 @@ mod tests { len: usize, valid_percent: f64, ) -> GenericStringArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); String::from_utf8(bytes).unwrap() }) }) @@ -2218,12 +2218,12 @@ mod tests { } fn generate_string_view(len: usize, valid_percent: f64) -> StringViewArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); String::from_utf8(bytes).unwrap() }) }) @@ -2231,12 +2231,12 @@ mod tests { } fn generate_byte_view(len: usize, valid_percent: f64) -> BinaryViewArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes: Vec<_> = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes: Vec<_> = (0..len).map(|_| rng.random_range(0..128)).collect(); bytes }) }) @@ -2252,13 +2252,13 @@ mod tests { K: ArrowDictionaryKeyType, K::Native: SampleUniform, { - let mut rng = thread_rng(); + let mut rng = rng(); let min_key = K::Native::from_usize(0).unwrap(); let max_key = K::Native::from_usize(values.len()).unwrap(); let keys: PrimitiveArray = (0..len) .map(|_| { - rng.gen_bool(valid_percent) - .then(|| rng.gen_range(min_key..max_key)) + rng.random_bool(valid_percent) + .then(|| rng.random_range(min_key..max_key)) }) .collect(); @@ -2277,15 +2277,15 @@ mod tests { } fn generate_fixed_size_binary(len: usize, valid_percent: f64) -> FixedSizeBinaryArray { - let mut rng = thread_rng(); - let width = rng.gen_range(0..20); + let mut rng = rng(); + let width = rng.random_range(0..20); let mut builder = FixedSizeBinaryBuilder::new(width); let mut b = vec![0; width as usize]; for _ in 0..len { - match rng.gen_bool(valid_percent) { + match rng.random_bool(valid_percent) { true => { - b.iter_mut().for_each(|x| *x = rng.gen()); + b.iter_mut().for_each(|x| *x = rng.random()); builder.append_value(&b).unwrap(); } false => builder.append_null(), @@ -2296,8 +2296,8 @@ mod tests { } fn generate_struct(len: usize, valid_percent: f64) -> StructArray { - let mut rng = thread_rng(); - let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent))); + let mut rng = rng(); + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); let a = generate_primitive_array::(len, valid_percent); let b = generate_strings::(len, valid_percent); let fields = Fields::from(vec![ @@ -2312,18 +2312,18 @@ mod tests { where F: FnOnce(usize) -> ArrayRef, { - let mut rng = thread_rng(); - let offsets = OffsetBuffer::::from_lengths((0..len).map(|_| rng.gen_range(0..10))); + let mut rng = rng(); + let offsets = OffsetBuffer::::from_lengths((0..len).map(|_| rng.random_range(0..10))); let values_len = offsets.last().unwrap().to_usize().unwrap(); let values = values(values_len); - let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent))); + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); ListArray::new(field, offsets, values, Some(nulls)) } fn generate_column(len: usize) -> ArrayRef { - let mut rng = thread_rng(); - match rng.gen_range(0..16) { + let mut rng = rng(); + match rng.random_range(0..16) { 0 => Arc::new(generate_primitive_array::(len, 0.8)), 1 => Arc::new(generate_primitive_array::(len, 0.8)), 2 => Arc::new(generate_primitive_array::(len, 0.8)), @@ -2333,14 +2333,14 @@ mod tests { 6 => Arc::new(generate_strings::(len, 0.8)), 7 => Arc::new(generate_dictionary::( // Cannot test dictionaries containing null values because of #2687 - Arc::new(generate_strings::(rng.gen_range(1..len), 1.0)), + Arc::new(generate_strings::(rng.random_range(1..len), 1.0)), len, 0.8, )), 8 => Arc::new(generate_dictionary::( // Cannot test dictionaries containing null values because of #2687 Arc::new(generate_primitive_array::( - rng.gen_range(1..len), + rng.random_range(1..len), 1.0, )), len, @@ -2390,15 +2390,15 @@ mod tests { #[cfg_attr(miri, ignore)] fn fuzz_test() { for _ in 0..100 { - let mut rng = thread_rng(); - let num_columns = rng.gen_range(1..5); - let len = rng.gen_range(5..100); + let mut rng = rng(); + let num_columns = rng.random_range(1..5); + let len = rng.random_range(5..100); let arrays: Vec<_> = (0..num_columns).map(|_| generate_column(len)).collect(); let options: Vec<_> = (0..num_columns) .map(|_| SortOptions { - descending: rng.gen_bool(0.5), - nulls_first: rng.gen_bool(0.5), + descending: rng.random_bool(0.5), + nulls_first: rng.random_bool(0.5), }) .collect(); diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml index ffea42db6653..3b8b7d91e7be 100644 --- a/arrow-schema/Cargo.toml +++ b/arrow-schema/Cargo.toml @@ -26,7 +26,7 @@ license = { workspace = true } keywords = { workspace = true } include = { workspace = true } edition = { workspace = true } -rust-version = "1.64" +rust-version = { workspace = true } [lib] name = "arrow_schema" diff --git a/arrow-schema/src/extension/canonical/bool8.rs b/arrow-schema/src/extension/canonical/bool8.rs index 3f6c50cb3e5e..fdd25677ed0e 100644 --- a/arrow-schema/src/extension/canonical/bool8.rs +++ b/arrow-schema/src/extension/canonical/bool8.rs @@ -47,7 +47,7 @@ impl ExtensionType for Bool8 { } fn deserialize_metadata(metadata: Option<&str>) -> Result { - if metadata.map_or(false, str::is_empty) { + if metadata.is_some_and(str::is_empty) { Ok("") } else { Err(ArrowError::InvalidArgumentError( diff --git a/arrow-select/Cargo.toml b/arrow-select/Cargo.toml index 023788799c94..0d412d15126b 100644 --- a/arrow-select/Cargo.toml +++ b/arrow-select/Cargo.toml @@ -45,4 +45,4 @@ ahash = { version = "0.8", default-features = false} default = [] [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index c91732848653..7bb140d37f51 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -864,8 +864,10 @@ mod tests { use arrow_array::builder::*; use arrow_array::cast::as_run_array; use arrow_array::types::*; - use rand::distributions::{Alphanumeric, Standard}; + use rand::distr::uniform::{UniformSampler, UniformUsize}; + use rand::distr::{Alphanumeric, StandardUniform}; use rand::prelude::*; + use rand::rng; use super::*; @@ -1475,9 +1477,9 @@ mod tests { } fn test_slices_fuzz(mask_len: usize, offset: usize, truncate: usize) { - let mut rng = thread_rng(); + let mut rng = rng(); - let bools: Vec = std::iter::from_fn(|| Some(rng.gen())) + let bools: Vec = std::iter::from_fn(|| Some(rng.random())) .take(mask_len) .collect(); @@ -1516,15 +1518,19 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_test_slices_iterator() { - let mut rng = thread_rng(); + let mut rng = rng(); + let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); for _ in 0..100 { - let mask_len = rng.gen_range(0..1024); + let mask_len = rng.random_range(0..1024); let max_offset = 64.min(mask_len); - let offset = rng.gen::().checked_rem(max_offset).unwrap_or(0); + let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0); let max_truncate = 128.min(mask_len - offset); - let truncate = rng.gen::().checked_rem(max_truncate).unwrap_or(0); + let truncate = uusize + .sample(&mut rng) + .checked_rem(max_truncate) + .unwrap_or(0); test_slices_fuzz(mask_len, offset, truncate); } @@ -1549,11 +1555,11 @@ mod tests { /// Generates an array of length `len` with `valid_percent` non-null values fn gen_primitive(len: usize, valid_percent: f64) -> Vec> where - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) - .map(|_| rng.gen_bool(valid_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random())) .collect() } @@ -1563,11 +1569,11 @@ mod tests { valid_percent: f64, str_len_range: std::ops::Range, ) -> Vec> { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(str_len_range.clone()); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(str_len_range.clone()); (0..len) .map(|_| char::from(rng.sample(Alphanumeric))) .collect() @@ -1584,24 +1590,24 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_filter() { - let mut rng = thread_rng(); + let mut rng = rng(); for i in 0..100 { let filter_percent = match i { 0..=4 => 1., 5..=10 => 0., - _ => rng.gen_range(0.0..1.0), + _ => rng.random_range(0.0..1.0), }; - let valid_percent = rng.gen_range(0.0..1.0); + let valid_percent = rng.random_range(0.0..1.0); - let array_len = rng.gen_range(32..256); - let array_offset = rng.gen_range(0..10); + let array_len = rng.random_range(32..256); + let array_offset = rng.random_range(0..10); // Construct a predicate - let filter_offset = rng.gen_range(0..10); - let filter_truncate = rng.gen_range(0..10); - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.gen_bool(filter_percent))) + let filter_offset = rng.random_range(0..10); + let filter_truncate = rng.random_range(0..10); + let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random_bool(filter_percent))) .take(array_len + filter_offset - filter_truncate) .collect(); diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index 4b90114a4bbc..dc729da7e6c3 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -120,7 +120,7 @@ mod tests { use arrow_array::{Int32Array, NullArray, StringArray, StructArray}; use arrow_data::ArrayData; use arrow_schema::{Field, Fields}; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_nullif_int_array() { @@ -497,11 +497,13 @@ mod tests { #[test] fn nullif_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); let arrays = [ Int32Array::from(vec![0; 128]), - (0..128).map(|_| rng.gen_bool(0.5).then_some(0)).collect(), + (0..128) + .map(|_| rng.random_bool(0.5).then_some(0)) + .collect(), ]; for a in arrays { @@ -511,11 +513,11 @@ mod tests { let a = a.slice(a_offset, a_length); for i in 1..65 { - let b_start_offset = rng.gen_range(0..i); - let b_end_offset = rng.gen_range(0..i); + let b_start_offset = rng.random_range(0..i); + let b_end_offset = rng.random_range(0..i); let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset) - .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5))) + .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5))) .collect(); let b = b.slice(b_start_offset, a_length); diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 88231b7f6160..1c6dca1a40c0 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -53,8 +53,8 @@ arrow-schema = { workspace = true } arrow-select = { workspace = true } arrow-string = { workspace = true } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } -pyo3 = { version = "0.23", default-features = false, optional = true } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } +pyo3 = { version = "0.24.1", default-features = false, optional = true } half = { version = "2.1", default-features = false, optional = true } [package.metadata.docs.rs] @@ -86,7 +86,7 @@ canonical_extension_types = ["arrow-schema/canonical_extension_types"] chrono = { workspace = true } criterion = { version = "0.5", default-features = false } half = { version = "2.1", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } serde = { version = "1.0", default-features = false, features = ["derive"] } # used in examples memmap2 = "0.9.3" diff --git a/arrow/benches/aggregate_kernels.rs b/arrow/benches/aggregate_kernels.rs index 6e224a48c4e6..25dbe3548496 100644 --- a/arrow/benches/aggregate_kernels.rs +++ b/arrow/benches/aggregate_kernels.rs @@ -18,7 +18,7 @@ #[macro_use] extern crate criterion; use criterion::{Criterion, Throughput}; -use rand::distributions::{Distribution, Standard}; +use rand::distr::{Distribution, StandardUniform}; extern crate arrow; @@ -31,7 +31,7 @@ const BATCH_SIZE: usize = 64 * 1024; fn primitive_benchmark(c: &mut Criterion, name: &str) where - Standard: Distribution, + StandardUniform: Distribution, { let nonnull_array = create_primitive_array::(BATCH_SIZE, 0.0); let nullable_array = create_primitive_array::(BATCH_SIZE, 0.5); diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs index fd83ad5c2a10..c256d1523b06 100644 --- a/arrow/benches/array_from_vec.rs +++ b/arrow/benches/array_from_vec.rs @@ -99,10 +99,10 @@ fn decimal_benchmark(c: &mut Criterion) { // bench decimal128 array // create option array let size: usize = 1 << 15; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut array = vec![]; for _ in 0..size { - array.push(Some(rng.gen_range::(0..9999999999))); + array.push(Some(rng.random_range::(0..9999999999))); } c.bench_function("decimal128_array_from_vec 32768", |b| { b.iter(|| decimal128_array_from_vec(array.as_slice())) @@ -112,9 +112,9 @@ fn decimal_benchmark(c: &mut Criterion) { // create option> array let size = 1 << 10; let mut array = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for _ in 0..size { - let decimal = i256::from_i128(rng.gen_range::(0..9999999999999)); + let decimal = i256::from_i128(rng.random_range::(0..9999999999999)); array.push(Some(decimal)); } diff --git a/arrow/benches/boolean_append_packed.rs b/arrow/benches/boolean_append_packed.rs index 40873422dbd5..508720eb346f 100644 --- a/arrow/benches/boolean_append_packed.rs +++ b/arrow/benches/boolean_append_packed.rs @@ -17,22 +17,22 @@ use arrow::array::BooleanBufferBuilder; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn rand_bytes(len: usize) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let mut buf = vec![0_u8; len]; rng.fill(buf.as_mut_slice()); buf } fn boolean_append_packed(c: &mut Criterion) { - let mut rng = thread_rng(); + let mut rng = rng(); let source = rand_bytes(1024); let ranges: Vec<_> = (0..100) .map(|_| { - let start: usize = rng.gen_range(0..1024 * 8); - let end: usize = rng.gen_range(start..1024 * 8); + let start: usize = rng.random_range(0..1024 * 8); + let end: usize = rng.random_range(start..1024 * 8); start..end }) .collect(); diff --git a/arrow/benches/buffer_create.rs b/arrow/benches/buffer_create.rs index e7d24c2166d7..f6199ccab55c 100644 --- a/arrow/benches/buffer_create.rs +++ b/arrow/benches/buffer_create.rs @@ -19,7 +19,7 @@ extern crate criterion; use arrow::util::test_util::seedable_rng; use criterion::Criterion; -use rand::distributions::Uniform; +use rand::distr::Uniform; use rand::Rng; extern crate arrow; @@ -110,7 +110,7 @@ fn from_slice(data: &[Vec], capacity: usize) -> Buffer { fn create_data(size: usize) -> Vec> { let rng = &mut seedable_rng(); - let range = Uniform::new(0, 33); + let range = Uniform::new(0, 33).unwrap(); (0..size) .map(|_| { @@ -125,7 +125,7 @@ fn create_data(size: usize) -> Vec> { fn create_data_bool(size: usize) -> Vec> { let rng = &mut seedable_rng(); - let range = Uniform::new(0, 33); + let range = Uniform::new(0, 33).unwrap(); (0..size) .map(|_| { diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs index 166e97bfad81..4f5f38eadfcb 100644 --- a/arrow/benches/builder.rs +++ b/arrow/benches/builder.rs @@ -22,7 +22,7 @@ extern crate rand; use std::mem::size_of; use criterion::*; -use rand::distributions::Standard; +use rand::distr::StandardUniform; use arrow::array::*; use arrow::util::test_util::seedable_rng; @@ -68,7 +68,7 @@ fn bench_primitive_nulls(c: &mut Criterion) { fn bench_bool(c: &mut Criterion) { let data: Vec = seedable_rng() - .sample_iter(&Standard) + .sample_iter(&StandardUniform) .take(BATCH_SIZE) .collect(); let data_len = data.len(); @@ -110,10 +110,10 @@ fn bench_string(c: &mut Criterion) { fn bench_decimal128(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { - decimal_builder.append_value(rng.gen_range::(0..9999999999)); + decimal_builder.append_value(rng.random_range::(0..9999999999)); } black_box( decimal_builder @@ -128,11 +128,11 @@ fn bench_decimal128(c: &mut Criterion) { fn bench_decimal256(c: &mut Criterion) { c.bench_function("bench_decimal256_builder", |b| { b.iter(|| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut decimal_builder = Decimal256Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { decimal_builder - .append_value(i256::from_i128(rng.gen_range::(0..99999999999))); + .append_value(i256::from_i128(rng.random_range::(0..99999999999))); } black_box( decimal_builder diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index 934a595850d8..a68fc48d30d7 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -18,7 +18,7 @@ #[macro_use] extern crate criterion; use criterion::Criterion; -use rand::distributions::{Distribution, Standard, Uniform}; +use rand::distr::{Distribution, StandardUniform, Uniform}; use rand::Rng; use chrono::DateTime; @@ -34,7 +34,7 @@ use arrow::util::test_util::seedable_rng; fn build_array(size: usize) -> ArrayRef where - Standard: Distribution, + StandardUniform: Distribution, { let array = create_primitive_array::(size, 0.1); Arc::new(array) @@ -46,10 +46,10 @@ fn build_utf8_date_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); - let range = Uniform::new(0, 737776); + let range = Uniform::new(0, 737776).unwrap(); for _ in 0..size { - if with_nulls && rng.gen::() > 0.8 { + if with_nulls && rng.random::() > 0.8 { builder.append_null(); } else { let string = NaiveDate::from_num_days_from_ce_opt(rng.sample(range)) @@ -66,10 +66,10 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); - let range = Uniform::new(0, 1608071414123); + let range = Uniform::new(0, 1608071414123).unwrap(); for _ in 0..size { - if with_nulls && rng.gen::() > 0.8 { + if with_nulls && rng.random::() > 0.8 { builder.append_null(); } else { let string = DateTime::from_timestamp(rng.sample(range), 0) @@ -87,7 +87,7 @@ fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut builder = Decimal128Builder::with_capacity(size); for _ in 0..size { - builder.append_value(rng.gen_range::(0..1000000000)); + builder.append_value(rng.random_range::(0..1000000000)); } Arc::new( builder @@ -102,7 +102,7 @@ fn build_decimal256_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut builder = Decimal256Builder::with_capacity(size); let mut bytes = [0; 32]; for _ in 0..size { - let num = rng.gen_range::(0..1000000000); + let num = rng.random_range::(0..1000000000); bytes[0..16].clone_from_slice(&num.to_le_bytes()); builder.append_value(i256::from_le_bytes(bytes)); } diff --git a/arrow/benches/comparison_kernels.rs b/arrow/benches/comparison_kernels.rs index 4c4a63a775a7..84fd47acc1b2 100644 --- a/arrow/benches/comparison_kernels.rs +++ b/arrow/benches/comparison_kernels.rs @@ -72,8 +72,8 @@ fn bench_string_regexp_is_match_scalar(arr_a: &StringArray, value_b: &str) { fn make_string_array(size: usize, rng: &mut StdRng) -> impl Iterator> + '_ { (0..size).map(|_| { - let len = rng.gen_range(0..64); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + let len = rng.random_range(0..64); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); Some(String::from_utf8(bytes).unwrap()) }) } diff --git a/arrow/benches/csv_reader.rs b/arrow/benches/csv_reader.rs index 74a47ef892e0..331ff9edd5b9 100644 --- a/arrow/benches/csv_reader.rs +++ b/arrow/benches/csv_reader.rs @@ -61,45 +61,45 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = seedable_rng(); // Single Primitive Column tests - let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024))); + let values = Int32Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024))); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i32_small(0)", cols); - let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Int32Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i32(0)", cols); - let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024))); + let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024))); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 u64_small(0)", cols); - let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 u64(0)", cols); - let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024) - 512)); + let values = Int64Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024) - 512)); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i64_small(0)", cols); - let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Int64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i64(0)", cols); let cols = vec![Arc::new(Float32Array::from_iter_values( - (0..4096).map(|_| rng.gen_range(0..1024000) as f32 / 1000.), + (0..4096).map(|_| rng.random_range(0..1024000) as f32 / 1000.), )) as _]; do_bench(c, "4096 f32_small(0)", cols); - let values = Float32Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Float32Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 f32(0)", cols); let cols = vec![Arc::new(Float64Array::from_iter_values( - (0..4096).map(|_| rng.gen_range(0..1024000) as f64 / 1000.), + (0..4096).map(|_| rng.random_range(0..1024000) as f64 / 1000.), )) as _]; do_bench(c, "4096 f64_small(0)", cols); - let values = Float64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Float64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 f64(0)", cols); diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs index be812a225ca2..dfa4f5992023 100644 --- a/arrow/benches/decimal_validate.rs +++ b/arrow/benches/decimal_validate.rs @@ -35,11 +35,11 @@ fn validate_decimal256_array(array: Decimal256Array) { } fn validate_decimal128_benchmark(c: &mut Criterion) { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let size: i128 = 20000; let mut decimal_builder = Decimal128Builder::with_capacity(size as usize); for _ in 0..size { - decimal_builder.append_value(rng.gen_range::(0..999999999999)); + decimal_builder.append_value(rng.random_range::(0..999999999999)); } let decimal_array = decimal_builder .finish() @@ -55,11 +55,11 @@ fn validate_decimal128_benchmark(c: &mut Criterion) { } fn validate_decimal256_benchmark(c: &mut Criterion) { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let size: i128 = 20000; let mut decimal_builder = Decimal256Builder::with_capacity(size as usize); for _ in 0..size { - let v = rng.gen_range::(0..999999999999999); + let v = rng.random_range::(0..999999999999999); let decimal = i256::from_i128(v); decimal_builder.append_value(decimal); } diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs index 0941f1e3fd33..ed7ac12379d4 100644 --- a/arrow/benches/interleave_kernels.rs +++ b/arrow/benches/interleave_kernels.rs @@ -54,8 +54,8 @@ fn bench_values(c: &mut Criterion, name: &str, len: usize, values: &[&dyn Array] let mut rng = seedable_rng(); let indices: Vec<_> = (0..len) .map(|_| { - let array_idx = rng.gen_range(0..values.len()); - let value_idx = rng.gen_range(0..values[array_idx].len()); + let array_idx = rng.random_range(0..values.len()); + let value_idx = rng.random_range(0..values[array_idx].len()); (array_idx, value_idx) }) .collect(); diff --git a/arrow/benches/json_writer.rs b/arrow/benches/json_writer.rs index 48be0bccb462..ff76ecdd6253 100644 --- a/arrow/benches/json_writer.rs +++ b/arrow/benches/json_writer.rs @@ -61,7 +61,7 @@ fn create_mixed(len: usize) -> RecordBatch { fn create_nulls(len: usize) -> NullBuffer { let mut rng = seedable_rng(); - BooleanBuffer::from_iter((0..len).map(|_| rng.gen_bool(0.2))).into() + BooleanBuffer::from_iter((0..len).map(|_| rng.random_bool(0.2))).into() } fn create_offsets(len: usize) -> (usize, OffsetBuffer) { @@ -70,7 +70,7 @@ fn create_offsets(len: usize) -> (usize, OffsetBuffer) { let mut offsets = Vec::with_capacity(len + 1); offsets.push(0); for _ in 0..len { - let len = rng.gen_range(0..10); + let len = rng.random_range(0..10); offsets.push(last_offset + len); last_offset += len; } diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs index b04e5cd84926..67591194ae6d 100644 --- a/arrow/benches/mutable_array.rs +++ b/arrow/benches/mutable_array.rs @@ -31,8 +31,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> { (0..size) .map(|_| { - let start = rng.gen_range(0..size / 2); - let end = rng.gen_range(start + 1..size); + let start = rng.random_range(0..size / 2); + let end = rng.random_range(start + 1..size); (start, end) }) .collect() diff --git a/arrow/benches/partition_kernels.rs b/arrow/benches/partition_kernels.rs index fce8634a10a0..e6a067def292 100644 --- a/arrow/benches/partition_kernels.rs +++ b/arrow/benches/partition_kernels.rs @@ -27,12 +27,12 @@ use arrow::{ datatypes::{Float64Type, UInt8Type}, }; use arrow_ord::partition::partition; -use rand::distributions::{Distribution, Standard}; +use rand::distr::{Distribution, StandardUniform}; use std::iter; fn create_array(size: usize, with_nulls: bool) -> ArrayRef where - Standard: Distribution, + StandardUniform: Distribution, { let null_density = if with_nulls { 0.5 } else { 0.0 }; let array = create_primitive_array::(size, null_density); diff --git a/arrow/benches/primitive_run_take.rs b/arrow/benches/primitive_run_take.rs index c10c16bfee3a..cabf9c118f97 100644 --- a/arrow/benches/primitive_run_take.rs +++ b/arrow/benches/primitive_run_take.rs @@ -28,10 +28,10 @@ fn create_random_index(size: usize, null_density: f32, max_value: usize) -> UInt let mut rng = seedable_rng(); let mut builder = UInt32Builder::with_capacity(size); for _ in 0..size { - if rng.gen::() < null_density { + if rng.random::() < null_density { builder.append_null(); } else { - let value = rng.gen_range::(0u32..max_value as u32); + let value = rng.random_range::(0u32..max_value as u32); builder.append_value(value); } } diff --git a/arrow/benches/string_dictionary_builder.rs b/arrow/benches/string_dictionary_builder.rs index 424400674cd8..a39fd5d03847 100644 --- a/arrow/benches/string_dictionary_builder.rs +++ b/arrow/benches/string_dictionary_builder.rs @@ -18,17 +18,17 @@ use arrow::array::StringDictionaryBuilder; use arrow::datatypes::Int32Type; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; /// Note: this is best effort, not all keys are necessarily present or unique fn build_strings(dict_size: usize, total_size: usize, key_len: usize) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let values: Vec = (0..dict_size) - .map(|_| (0..key_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..key_len).map(|_| rng.random::()).collect()) .collect(); (0..total_size) - .map(|_| values[rng.gen_range(0..dict_size)].clone()) + .map(|_| values[rng.random_range(0..dict_size)].clone()) .collect() } diff --git a/arrow/benches/string_run_iterator.rs b/arrow/benches/string_run_iterator.rs index ac5cf7838408..32088573dc25 100644 --- a/arrow/benches/string_run_iterator.rs +++ b/arrow/benches/string_run_iterator.rs @@ -18,17 +18,17 @@ use arrow::array::{Int32RunArray, StringArray, StringRunBuilder}; use arrow::datatypes::Int32Type; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn build_strings_runs( physical_array_len: usize, logical_array_len: usize, string_len: usize, ) -> Int32RunArray { - let mut rng = thread_rng(); + let mut rng = rng(); let run_len = logical_array_len / physical_array_len; let mut values: Vec = (0..physical_array_len) - .map(|_| (0..string_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..string_len).map(|_| rng.random::()).collect()) .flat_map(|s| std::iter::repeat(s).take(run_len)) .collect(); while values.len() < logical_array_len { diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index 77ec54c97bc5..a09064839f8a 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -32,10 +32,10 @@ fn create_random_index(size: usize, null_density: f32) -> UInt32Array { let mut rng = seedable_rng(); let mut builder = UInt32Builder::with_capacity(size); for _ in 0..size { - if rng.gen::() < null_density { + if rng.random::() < null_density { builder.append_null(); } else { - let value = rng.gen_range::(0u32..size as u32); + let value = rng.random_range::(0u32..size as u32); builder.append_value(value); } } diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 53e01034122b..387d9b973a9c 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -22,12 +22,12 @@ use crate::datatypes::*; use crate::util::test_util::seedable_rng; use arrow_buffer::{Buffer, IntervalMonthDayNano}; use half::f16; -use rand::distributions::uniform::SampleUniform; -use rand::thread_rng; +use rand::distr::uniform::SampleUniform; +use rand::rng; use rand::Rng; use rand::SeedableRng; use rand::{ - distributions::{Alphanumeric, Distribution, Standard}, + distr::{Alphanumeric, Distribution, StandardUniform}, prelude::StdRng, }; use std::ops::Range; @@ -36,16 +36,16 @@ use std::ops::Range; pub fn create_primitive_array(size: usize, null_density: f32) -> PrimitiveArray where T: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -60,16 +60,16 @@ pub fn create_primitive_array_with_seed( ) -> PrimitiveArray where T: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = StdRng::seed_from_u64(seed); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -86,10 +86,14 @@ pub fn create_month_day_nano_array_with_seed( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(IntervalMonthDayNano::new(rng.gen(), rng.gen(), rng.gen())) + Some(IntervalMonthDayNano::new( + rng.random(), + rng.random(), + rng.random(), + )) } }) .collect() @@ -98,15 +102,15 @@ pub fn create_month_day_nano_array_with_seed( /// Creates a random (but fixed-seeded) array of a given size and null density pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray where - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let value = rng.gen::() < true_density; + let value = rng.random::() < true_density; Some(value) } }) @@ -134,10 +138,10 @@ fn create_string_array_with_max_len( let rng = &mut seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let str_len = rng.gen_range(0..max_str_len); + let str_len = rng.random_range(0..max_str_len); let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); let value = String::from_utf8(value).unwrap(); Some(value) @@ -156,7 +160,7 @@ pub fn create_string_array_with_len( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); @@ -183,10 +187,10 @@ fn create_string_view_array_with_max_len( let rng = &mut seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let str_len = rng.gen_range(0..max_str_len); + let str_len = rng.random_range(0..max_str_len); let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); let value = String::from_utf8(value).unwrap(); Some(value) @@ -209,10 +213,10 @@ pub fn create_string_view_array_with_len( // if mixed, we creates first half that string length small than 12 bytes and second half large than 12 bytes if mixed { for _ in 0..size / 2 { - lengths.push(rng.gen_range(1..12)); + lengths.push(rng.random_range(1..12)); } for _ in size / 2..size { - lengths.push(rng.gen_range(12..=std::cmp::max(30, str_len))); + lengths.push(rng.random_range(12..=std::cmp::max(30, str_len))); } } else { lengths.resize(size, str_len); @@ -221,7 +225,7 @@ pub fn create_string_view_array_with_len( lengths .into_iter() .map(|len| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value: Vec = rng.sample_iter(&Alphanumeric).take(len).collect(); @@ -242,7 +246,7 @@ pub fn create_string_dict_array( let data: Vec<_> = (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); @@ -296,7 +300,7 @@ pub fn create_string_array_for_runs( string_len: usize, ) -> Vec { assert!(logical_array_len >= physical_array_len); - let mut rng = thread_rng(); + let mut rng = rng(); // typical length of each run let run_len = logical_array_len / physical_array_len; @@ -305,7 +309,7 @@ pub fn create_string_array_for_runs( let mut run_len_extra = logical_array_len % physical_array_len; let mut values: Vec = (0..physical_array_len) - .map(|_| (0..string_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..string_len).map(|_| rng.random::()).collect()) .flat_map(|s| { let mut take_len = run_len; if run_len_extra > 0 { @@ -332,12 +336,12 @@ pub fn create_binary_array( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng - .sample_iter::(Standard) - .take(range_rng.gen_range(0..8)) + .sample_iter::(StandardUniform) + .take(range_rng.random_range(0..8)) .collect::>(); Some(value) } @@ -351,11 +355,11 @@ pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> Fix FixedSizeBinaryArray::try_from_sparse_iter_with_size( (0..size).map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng - .sample_iter::(Standard) + .sample_iter::(StandardUniform) .take(value_len) .collect::>(); Some(value) @@ -375,7 +379,7 @@ pub fn create_dict_from_values( ) -> DictionaryArray where K: ArrowDictionaryKeyType, - Standard: Distribution, + StandardUniform: Distribution, K::Native: SampleUniform, { let min_key = K::Native::from_usize(0).unwrap(); @@ -393,7 +397,7 @@ pub fn create_sparse_dict_from_values( ) -> DictionaryArray where K: ArrowDictionaryKeyType, - Standard: Distribution, + StandardUniform: Distribution, K::Native: SampleUniform, { let mut rng = seedable_rng(); @@ -401,11 +405,14 @@ where DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone())); let keys: Buffer = (0..size) - .map(|_| rng.gen_range(key_range.clone())) + .map(|_| rng.random_range(key_range.clone())) .collect(); - let nulls: Option = - (null_density != 0.).then(|| (0..size).map(|_| rng.gen_bool(null_density as _)).collect()); + let nulls: Option = (null_density != 0.).then(|| { + (0..size) + .map(|_| rng.random_bool(null_density as _)) + .collect() + }); let data = ArrayDataBuilder::new(data_type) .len(size) @@ -424,10 +431,10 @@ pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f16::NAN) } else { - Some(f16::from_f32(rng.gen())) + Some(f16::from_f32(rng.random())) } }) .collect() @@ -439,10 +446,10 @@ pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f32::NAN) } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -454,10 +461,10 @@ pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f64::NAN) } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index d980f749852f..42a0798f5540 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -19,8 +19,10 @@ use std::sync::Arc; -use rand::distributions::uniform::SampleRange; -use rand::{distributions::uniform::SampleUniform, Rng}; +use rand::{ + distr::uniform::{SampleRange, SampleUniform}, + Rng, +}; use crate::array::*; use crate::error::{ArrowError, Result}; @@ -380,7 +382,7 @@ fn create_random_offsets( offsets.push(current_offset); (0..size).for_each(|_| { - current_offset += rng.gen_range(min..max); + current_offset += rng.random_range(min..max); offsets.push(current_offset); }); @@ -393,7 +395,7 @@ fn create_random_null_buffer(size: usize, null_density: f32) -> Buffer { { let mut_slice = mut_buf.as_slice_mut(); (0..size).for_each(|i| { - if rng.gen::() >= null_density { + if rng.random::() >= null_density { bit_util::set_bit(mut_slice, i) } }) @@ -412,7 +414,7 @@ pub trait RandomTemporalValue: ArrowTemporalType { where Self::Native: SampleUniform, { - rng.gen_range(Self::value_range()) + rng.random_range(Self::value_range()) } /// Generate a random value of the type @@ -513,7 +515,7 @@ where (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { Some(T::random(&mut rng)) diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs index 2d718d392baf..566ccc6ab536 100644 --- a/arrow/src/util/test_util.rs +++ b/arrow/src/util/test_util.rs @@ -25,7 +25,7 @@ pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; let mut rng = seedable_rng(); for _ in 0..n { - result.push(rng.gen_range(0..255)); + result.push(rng.random_range(0..255)); } result } diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index e0a9be67919b..4cb0c4c4e325 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -83,7 +83,7 @@ zstd = { version = "0.13", default-features = false } serde_json = { version = "1.0", features = ["std"], default-features = false } arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] } tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } object_store = { version = "0.11.0", default-features = false, features = ["azure"] } [package.metadata.docs.rs] diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs index e5165fee212c..0e887c31c594 100644 --- a/parquet/benches/arrow_reader.rs +++ b/parquet/benches/arrow_reader.rs @@ -37,7 +37,7 @@ use parquet::{ data_type::{ByteArrayType, Int32Type, Int64Type}, schema::types::{ColumnDescPtr, SchemaDescPtr}, }; -use rand::distributions::uniform::SampleUniform; +use rand::distr::uniform::SampleUniform; use rand::{rngs::StdRng, Rng, SeedableRng}; use std::{collections::VecDeque, sync::Arc}; @@ -119,14 +119,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the Float16 value - let value = f16::from_f32(rng.gen_range(min..max)); + let value = f16::from_f32(rng.random_range(min..max)); // Float16 in parquet is stored little-endian let bytes = match column_desc.physical_type() { Type::FIXED_LEN_BYTE_ARRAY => { @@ -177,14 +177,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the decimal value - let value = rng.gen_range(min..max); + let value = rng.random_range(min..max); // decimal of parquet use the big-endian to store let bytes = match column_desc.physical_type() { Type::BYTE_ARRAY => { @@ -235,14 +235,14 @@ fn build_encoded_flba_bytes_page_iterator( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the FLBA(BYTE_LENGTH) value - let value = (0..BYTE_LENGTH).map(|_| rng.gen()).collect::>(); + let value = (0..BYTE_LENGTH).map(|_| rng.random()).collect::>(); let value = ::T::from(value); values.push(value); @@ -284,13 +284,13 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { - let value = FromPrimitive::from_usize(rng.gen_range(min..max)).unwrap(); + let value = FromPrimitive::from_usize(rng.random_range(min..max)).unwrap(); values.push(value); } def_levels.push(def_level); @@ -336,14 +336,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // select random value from list of unique values - let value = unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)]; + let value = unique_values[rng.random_range(0..NUM_UNIQUE_VALUES)]; values.push(value); } def_levels.push(def_level); @@ -393,7 +393,7 @@ fn build_plain_encoded_byte_array_page_iterator_inner( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level @@ -452,14 +452,15 @@ fn build_dictionary_encoded_string_page_iterator( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // select random value from list of unique values - let string_value = unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)].as_str(); + let string_value = + unique_values[rng.random_range(0..NUM_UNIQUE_VALUES)].as_str(); values.push(parquet::data_type::ByteArray::from(string_value)); } def_levels.push(def_level); @@ -512,12 +513,12 @@ fn build_string_list_page_iterator( let mut rep_levels = Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN); for k in 0..VALUES_PER_PAGE { rep_levels.push(0); - if rng.gen::() < null_density { + if rng.random::() < null_density { // Null list def_levels.push(0); continue; } - let len = rng.gen_range(0..MAX_LIST_LEN); + let len = rng.random_range(0..MAX_LIST_LEN); if len == 0 { // Empty list def_levels.push(1); @@ -527,7 +528,7 @@ fn build_string_list_page_iterator( (1..len).for_each(|_| rep_levels.push(1)); for l in 0..len { - if rng.gen::() < null_density { + if rng.random::() < null_density { // Null element def_levels.push(2); } else { diff --git a/parquet/benches/compression.rs b/parquet/benches/compression.rs index 2275a89405d9..5c9b0d1f7549 100644 --- a/parquet/benches/compression.rs +++ b/parquet/benches/compression.rs @@ -18,7 +18,7 @@ use criterion::*; use parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use parquet::compression::create_codec; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::prelude::*; fn do_bench(c: &mut Criterion, name: &str, uncompressed: &[u8]) { @@ -76,7 +76,7 @@ fn criterion_benchmark(c: &mut Criterion) { // Create a collection of 64 words let words: Vec> = (0..64) .map(|_| { - let len = rng.gen_range(1..12); + let len = rng.random_range(1..12); rng.sample_iter(&Alphanumeric).take(len).collect() }) .collect(); @@ -84,7 +84,7 @@ fn criterion_benchmark(c: &mut Criterion) { // Build data by concatenating these words randomly together let mut uncompressed = Vec::with_capacity(DATA_SIZE); while uncompressed.len() < DATA_SIZE { - let word = &words[rng.gen_range(0..words.len())]; + let word = &words[rng.random_range(0..words.len())]; uncompressed.extend_from_slice(&word[..word.len().min(DATA_SIZE - uncompressed.len())]) } assert_eq!(uncompressed.len(), DATA_SIZE); diff --git a/parquet/benches/encoding.rs b/parquet/benches/encoding.rs index 8e61666e6345..68f215d4ea78 100644 --- a/parquet/benches/encoding.rs +++ b/parquet/benches/encoding.rs @@ -86,12 +86,12 @@ fn criterion_benchmark(c: &mut Criterion) { let mut d128s = Vec::new(); for _ in 0..n { f16s.push(FixedLenByteArray::from( - f16::from_f32(rng.gen::()).to_le_bytes().to_vec(), + f16::from_f32(rng.random::()).to_le_bytes().to_vec(), )); - f32s.push(rng.gen::()); - f64s.push(rng.gen::()); + f32s.push(rng.random::()); + f64s.push(rng.random::()); d128s.push(FixedLenByteArray::from( - rng.gen::().to_be_bytes().to_vec(), + rng.random::().to_be_bytes().to_vec(), )); } diff --git a/parquet/benches/row_selector.rs b/parquet/benches/row_selector.rs index 32f0d6a56064..9fa634d09dbc 100644 --- a/parquet/benches/row_selector.rs +++ b/parquet/benches/row_selector.rs @@ -31,9 +31,9 @@ use rand::Rng; /// /// * A `BooleanArray` instance with randomly selected rows based on the provided ratio. fn generate_random_row_selection(total_rows: usize, selection_ratio: f64) -> BooleanArray { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let bools: Vec = (0..total_rows) - .map(|_| rng.gen_bool(selection_ratio)) + .map(|_| rng.random_bool(selection_ratio)) .collect(); BooleanArray::from(bools) } diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 8df659060040..6d6bbdc7b804 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -329,7 +329,7 @@ impl ByteViewArrayDecoderPlain { let to_read = len.min(self.max_remaining_values); - let buf = self.buf.as_ref(); + let buf: &[u8] = self.buf.as_ref(); let mut read = 0; output.views.reserve(to_read); @@ -405,7 +405,7 @@ impl ByteViewArrayDecoderPlain { pub fn skip(&mut self, to_skip: usize) -> Result { let to_skip = to_skip.min(self.max_remaining_values); let mut skip = 0; - let buf = self.buf.as_ref(); + let buf: &[u8] = self.buf.as_ref(); while self.offset < self.buf.len() && skip != to_skip { if self.offset + 4 > buf.len() { diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 709d0f8bb16e..c76c41db312d 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -361,7 +361,7 @@ mod tests { use arrow_array::{Array, Date32Array, PrimitiveArray}; use arrow::datatypes::DataType::{Date32, Decimal128}; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::collections::VecDeque; #[allow(clippy::too_many_arguments)] diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 6eba04c86f91..0365bd1a4d15 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -926,7 +926,7 @@ mod tests { use bytes::Bytes; use half::f16; use num::PrimInt; - use rand::{thread_rng, Rng, RngCore}; + use rand::{rng, Rng, RngCore}; use tempfile::tempfile; use arrow_array::builder::*; @@ -1403,7 +1403,7 @@ mod tests { impl RandGen for RandFixedLenGen { fn gen(len: i32) -> FixedLenByteArray { let mut v = vec![0u8; len as usize]; - thread_rng().fill_bytes(&mut v); + rng().fill_bytes(&mut v); ByteArray::from(v).into() } } @@ -2058,10 +2058,13 @@ mod tests { fn with_row_selections(self) -> Self { assert!(self.row_filter.is_none(), "Must set row selection first"); - let mut rng = thread_rng(); - let step = rng.gen_range(self.record_batch_size..self.num_rows); - let row_selections = - create_test_selection(step, self.num_row_groups * self.num_rows, rng.gen::()); + let mut rng = rng(); + let step = rng.random_range(self.record_batch_size..self.num_rows); + let row_selections = create_test_selection( + step, + self.num_row_groups * self.num_rows, + rng.random::(), + ); Self { row_selections: Some(row_selections), ..self @@ -2074,9 +2077,9 @@ mod tests { None => self.num_row_groups * self.num_rows, }; - let mut rng = thread_rng(); + let mut rng = rng(); Self { - row_filter: Some((0..row_count).map(|_| rng.gen_bool(0.9)).collect()), + row_filter: Some((0..row_count).map(|_| rng.random_bool(0.9)).collect()), ..self } } @@ -2290,7 +2293,7 @@ mod tests { //according to null_percent generate def_levels let (repetition, def_levels) = match opts.null_percent.as_ref() { Some(null_percent) => { - let mut rng = thread_rng(); + let mut rng = rng(); let def_levels: Vec> = (0..opts.num_row_groups) .map(|_| { @@ -4134,7 +4137,7 @@ mod tests { #[test] fn test_list_selection_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); let schema = Arc::new(Schema::new(vec![Field::new_list( "list", Field::new_list( @@ -4150,26 +4153,26 @@ mod tests { let mut list_a_builder = ListBuilder::new(ListBuilder::new(Int32Builder::new())); for _ in 0..2048 { - if rng.gen_bool(0.2) { + if rng.random_bool(0.2) { list_a_builder.append(false); continue; } - let list_a_len = rng.gen_range(0..10); + let list_a_len = rng.random_range(0..10); let list_b_builder = list_a_builder.values(); for _ in 0..list_a_len { - if rng.gen_bool(0.2) { + if rng.random_bool(0.2) { list_b_builder.append(false); continue; } - let list_b_len = rng.gen_range(0..10); + let list_b_len = rng.random_range(0..10); let int_builder = list_b_builder.values(); for _ in 0..list_b_len { - match rng.gen_bool(0.2) { + match rng.random_bool(0.2) { true => int_builder.append_null(), - false => int_builder.append_value(rng.gen()), + false => int_builder.append_value(rng.random()), } } list_b_builder.append(true) diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 378d2253f19a..ffcf39df0e23 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -641,7 +641,7 @@ fn union_row_selections(left: &[RowSelector], right: &[RowSelector]) -> RowSelec mod tests { use super::*; use crate::format::PageLocation; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_from_filters() { @@ -1013,14 +1013,14 @@ mod tests { #[test] fn test_and_fuzz() { - let mut rand = thread_rng(); + let mut rand = rng(); for _ in 0..100 { - let a_len = rand.gen_range(10..100); - let a_bools: Vec<_> = (0..a_len).map(|_| rand.gen_bool(0.2)).collect(); + let a_len = rand.random_range(10..100); + let a_bools: Vec<_> = (0..a_len).map(|_| rand.random_bool(0.2)).collect(); let a = RowSelection::from_filters(&[BooleanArray::from(a_bools.clone())]); let b_len: usize = a_bools.iter().map(|x| *x as usize).sum(); - let b_bools: Vec<_> = (0..b_len).map(|_| rand.gen_bool(0.8)).collect(); + let b_bools: Vec<_> = (0..b_len).map(|_| rand.random_bool(0.8)).collect(); let b = RowSelection::from_filters(&[BooleanArray::from(b_bools.clone())]); let mut expected_bools = vec![false; a_len]; diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 2c8a59399de1..2b7856e9dc31 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -765,9 +765,8 @@ where self.batch_size, ) .await - .map_err(|err| { + .inspect_err(|_| { self.state = StreamState::Error; - err })?; self.reader = Some(reader_factory); @@ -1070,7 +1069,7 @@ mod tests { }; use arrow_schema::{DataType, Field, Schema}; use futures::{StreamExt, TryStreamExt}; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tempfile::tempfile; @@ -1400,7 +1399,7 @@ mod tests { assert_eq!(metadata.num_row_groups(), 1); - let mut rand = thread_rng(); + let mut rand = rng(); for _ in 0..100 { let mut expected_rows = 0; @@ -1409,7 +1408,7 @@ mod tests { let mut selectors = vec![]; while total_rows < 7300 { - let row_count: usize = rand.gen_range(1..100); + let row_count: usize = rand.random_range(1..100); let row_count = row_count.min(7300 - total_rows); @@ -1436,7 +1435,7 @@ mod tests { .await .unwrap(); - let col_idx: usize = rand.gen_range(0..13); + let col_idx: usize = rand.random_range(0..13); let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]); let stream = builder @@ -1467,7 +1466,7 @@ mod tests { assert_eq!(metadata.num_row_groups(), 1); - let mut rand = thread_rng(); + let mut rand = rng(); let mut expected_rows = 0; let mut total_rows = 0; @@ -1480,7 +1479,7 @@ mod tests { }); while total_rows < 7300 { - let row_count: usize = rand.gen_range(1..100); + let row_count: usize = rand.random_range(1..100); let row_count = row_count.min(7300 - total_rows); @@ -1507,7 +1506,7 @@ mod tests { .await .unwrap(); - let col_idx: usize = rand.gen_range(0..13); + let col_idx: usize = rand.random_range(0..13); let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]); let stream = builder diff --git a/parquet/src/arrow/buffer/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs index e7aea56a7f05..1d2c953abcbb 100644 --- a/parquet/src/arrow/buffer/bit_util.rs +++ b/parquet/src/arrow/buffer/bit_util.rs @@ -65,12 +65,12 @@ pub fn sign_extend_be(b: &[u8]) -> [u8; N] { mod tests { use super::*; use arrow_array::builder::BooleanBufferBuilder; - use rand::prelude::*; + use rand::{prelude::*, rng}; #[test] fn test_bit_fns() { - let mut rng = thread_rng(); - let mask_length = rng.gen_range(1..1024); + let mut rng = rng(); + let mask_length = rng.random_range(1..1024); let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) .take(mask_length) .collect(); @@ -92,8 +92,8 @@ mod tests { assert_eq!(count_set_bits(&[0xFF], 1..1), 0); for _ in 0..20 { - let start = rng.gen_range(0..bools.len()); - let end = rng.gen_range(start..bools.len()); + let start = rng.random_range(0..bools.len()); + let end = rng.random_range(start..bools.len()); let actual = count_set_bits(nulls.as_slice(), start..end); let expected = bools[start..end].iter().filter(|x| **x).count(); diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index fcd04fbb9bbe..a90b3c4ec795 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -351,17 +351,17 @@ mod tests { use super::*; use crate::encodings::rle::RleEncoder; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_packed_decoder() { - let mut rng = thread_rng(); - let len: usize = rng.gen_range(512..1024); + let mut rng = rng(); + let len: usize = rng.random_range(512..1024); let mut expected = BooleanBufferBuilder::new(len); let mut encoder = RleEncoder::new(1, 1024); for _ in 0..len { - let bool = rng.gen_bool(0.8); + let bool = rng.random_bool(0.8); encoder.put(bool as u64); expected.append(bool); } @@ -379,7 +379,7 @@ mod tests { break; } - let to_read = rng.gen_range(1..=remaining); + let to_read = rng.random_range(1..=remaining); decoder.read(&mut decoded, to_read).unwrap(); } @@ -389,15 +389,15 @@ mod tests { #[test] fn test_packed_decoder_skip() { - let mut rng = thread_rng(); - let len: usize = rng.gen_range(512..1024); + let mut rng = rng(); + let len: usize = rng.random_range(512..1024); let mut expected = BooleanBufferBuilder::new(len); let mut encoder = RleEncoder::new(1, 1024); let mut total_value = 0; for _ in 0..len { - let bool = rng.gen_bool(0.8); + let bool = rng.random_bool(0.8); encoder.put(bool as u64); expected.append(bool); if bool { @@ -421,8 +421,8 @@ mod tests { if remaining_levels == 0 { break; } - let to_read_or_skip_level = rng.gen_range(1..=remaining_levels); - if rng.gen_bool(0.5) { + let to_read_or_skip_level = rng.random_range(1..=remaining_levels); + if rng.random_bool(0.5) { let (skip_val_num, skip_level_num) = decoder.skip(to_read_or_skip_level).unwrap(); skip_value += skip_val_num; skip_level += skip_level_num diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs index cb68351d06fb..b6998057845d 100644 --- a/parquet/src/column/reader.rs +++ b/parquet/src/column/reader.rs @@ -583,7 +583,7 @@ fn parse_v1_level( mod tests { use super::*; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::{collections::VecDeque, sync::Arc}; use crate::basic::Type as PhysicalType; diff --git a/parquet/src/column/reader/decoder.rs b/parquet/src/column/reader/decoder.rs index afd58b3cd195..a8766e82114b 100644 --- a/parquet/src/column/reader/decoder.rs +++ b/parquet/src/column/reader/decoder.rs @@ -484,7 +484,7 @@ impl RepetitionLevelDecoder for RepetitionLevelDecoderImpl { mod tests { use super::*; use crate::encodings::rle::RleEncoder; - use rand::prelude::*; + use rand::{prelude::*, rng}; #[test] fn test_skip_padding() { @@ -509,9 +509,9 @@ mod tests { #[test] fn test_skip_rep_levels() { for _ in 0..10 { - let mut rng = thread_rng(); + let mut rng = rng(); let total_len = 10000_usize; - let mut encoded: Vec = (0..total_len).map(|_| rng.gen_range(0..5)).collect(); + let mut encoded: Vec = (0..total_len).map(|_| rng.random_range(0..5)).collect(); encoded[0] = 0; let mut encoder = RleEncoder::new(3, 1024); for v in &encoded { @@ -526,8 +526,8 @@ mod tests { let mut remaining_records = total_records; let mut remaining_levels = encoded.len(); loop { - let skip = rng.gen_bool(0.5); - let records = rng.gen_range(1..=remaining_records.min(5)); + let skip = rng.random_bool(0.5); + let records = rng.random_range(1..=remaining_records.min(5)); let (records_read, levels_read) = if skip { decoder.skip_rep_levels(records, remaining_levels).unwrap() } else { diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 5f34f34cbb7a..ae418237515e 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -1528,7 +1528,7 @@ mod tests { schema::parser::parse_message_type, }; use core::str; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::{fs::File, sync::Arc}; use crate::column::{ diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs index 67d23ce03ca2..d6e32600d321 100644 --- a/parquet/src/encodings/rle.rs +++ b/parquet/src/encodings/rle.rs @@ -528,7 +528,7 @@ mod tests { use super::*; use crate::util::bit_util::ceil; - use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng}; + use rand::{self, distr::StandardUniform, rng, Rng, SeedableRng}; const MAX_WIDTH: usize = 32; @@ -1019,15 +1019,18 @@ mod tests { for _ in 0..niters { values.clear(); - let rng = thread_rng(); - let seed_vec: Vec = rng.sample_iter::(&Standard).take(seed_len).collect(); + let rng = rng(); + let seed_vec: Vec = rng + .sample_iter::(&StandardUniform) + .take(seed_len) + .collect(); let mut seed = [0u8; 32]; seed.copy_from_slice(&seed_vec[0..seed_len]); let mut gen = rand::rngs::StdRng::from_seed(seed); let mut parity = false; for _ in 0..ngroups { - let mut group_size = gen.gen_range(1..20); + let mut group_size = gen.random_range(1..20); if group_size > max_group_size { group_size = 1; } diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index b4c929ce0186..8f6c2d8f8184 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -716,7 +716,7 @@ mod tests { use super::*; use crate::util::test_common::rand_gen::random_numbers; - use rand::distributions::{Distribution, Standard}; + use rand::distr::{Distribution, StandardUniform}; use std::fmt::Debug; #[test] @@ -1066,7 +1066,7 @@ mod tests { fn test_put_aligned_rand_numbers(total: usize, num_bits: usize) where T: Copy + FromBytes + AsBytes + Debug + PartialEq, - Standard: Distribution, + StandardUniform: Distribution, { assert!(num_bits <= 32); assert!(total % 2 == 0); diff --git a/parquet/src/util/test_common/rand_gen.rs b/parquet/src/util/test_common/rand_gen.rs index ec80d3a593ae..36a7e4b52a1c 100644 --- a/parquet/src/util/test_common/rand_gen.rs +++ b/parquet/src/util/test_common/rand_gen.rs @@ -19,8 +19,8 @@ use crate::basic::Encoding; use crate::column::page::Page; use bytes::Bytes; use rand::{ - distributions::{uniform::SampleUniform, Distribution, Standard}, - thread_rng, Rng, + distr::{uniform::SampleUniform, Distribution, StandardUniform}, + rng, Rng, }; use std::collections::VecDeque; @@ -44,51 +44,55 @@ pub trait RandGen { impl RandGen for BoolType { fn gen(_: i32) -> bool { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int32Type { fn gen(_: i32) -> i32 { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int64Type { fn gen(_: i32) -> i64 { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int96Type { fn gen(_: i32) -> Int96 { - let mut rng = thread_rng(); + let mut rng = rng(); let mut result = Int96::new(); - result.set_data(rng.gen::(), rng.gen::(), rng.gen::()); + result.set_data( + rng.random::(), + rng.random::(), + rng.random::(), + ); result } } impl RandGen for FloatType { fn gen(_: i32) -> f32 { - thread_rng().gen::() + rng().random::() } } impl RandGen for DoubleType { fn gen(_: i32) -> f64 { - thread_rng().gen::() + rng().random::() } } impl RandGen for ByteArrayType { fn gen(_: i32) -> ByteArray { - let mut rng = thread_rng(); + let mut rng = rng(); let mut result = ByteArray::new(); let mut value = vec![]; - let len = rng.gen_range(0..128); + let len = rng.random_range(0..128); for _ in 0..len { - value.push(rng.gen_range(0..255)); + value.push(rng.random_range(0..255)); } result.set_data(Bytes::from(value)); result @@ -105,28 +109,28 @@ impl RandGen for FixedLenByteArrayType { pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..n { - result.push(rng.gen_range(0..255)); + result.push(rng.random_range(0..255)); } result } pub fn random_numbers(n: usize) -> Vec where - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); - Standard.sample_iter(&mut rng).take(n).collect() + let mut rng = rng(); + StandardUniform.sample_iter(&mut rng).take(n).collect() } pub fn random_numbers_range(n: usize, low: T, high: T, result: &mut Vec) where T: PartialOrd + SampleUniform + Copy, { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..n { - result.push(rng.gen_range(low..high)); + result.push(rng.random_range(low..high)); } }