Skip to content

Commit 613e93e

Browse files
alambBugenZhaoXiangpengHaoRachelintJesse-Bakker
authored
Merge 53.0.0-dev dev branch to main (#6126)
* bump `tonic` to 0.12 and `prost` to 0.13 for `arrow-flight` (#6041) * bump `tonic` to 0.12 and `prost` to 0.13 for `arrow-flight` Signed-off-by: Bugen Zhao <[email protected]> * fix example tests Signed-off-by: Bugen Zhao <[email protected]> --------- Signed-off-by: Bugen Zhao <[email protected]> * Remove `impl<T: AsRef<[u8]>> From<T> for Buffer` that easily accidentally copies data (#6043) * deprecate auto copy, ask explicit reference * update comments * make cargo doc happy * Make display of interval types more pretty (#6006) * improve dispaly for interval. * update test in pretty, and fix display problem. * tmp * fix tests in arrow-cast. * fix tests in pretty. * fix style. * Update snafu (#5930) * Update Parquet thrift generated structures (#6045) * update to latest thrift (as of 11 Jul 2024) from parquet-format * pass None for optional size statistics * escape HTML tags * don't need to escape brackets in arrays * Revert "Revert "Write Bloom filters between row groups instead of the end (#…" (#5933) This reverts commit 22e0b44. * Revert "Update snafu (#5930)" (#6069) This reverts commit 756b1fb. * Update pyo3 requirement from 0.21.1 to 0.22.1 (fixed) (#6075) * Update pyo3 requirement from 0.21.1 to 0.22.1 Updates the requirements on [pyo3](https://github.com/pyo3/pyo3) to permit the latest version. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md) - [Commits](PyO3/pyo3@v0.21.1...v0.22.1) --- updated-dependencies: - dependency-name: pyo3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <[email protected]> * refactor: remove deprecated `FromPyArrow::from_pyarrow` "GIL Refs" are being phased out. * chore: update `pyo3` in integration tests --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * remove repeated codes to make the codes more concise. (#6080) * Add `unencoded_byte_array_data_bytes` to `ParquetMetaData` (#6068) * update to latest thrift (as of 11 Jul 2024) from parquet-format * pass None for optional size statistics * escape HTML tags * don't need to escape brackets in arrays * add support for unencoded_byte_array_data_bytes * add comments * change sig of ColumnMetrics::update_variable_length_bytes() * rename ParquetOffsetIndex to OffsetSizeIndex * rename some functions * suggestion from review Co-authored-by: Andrew Lamb <[email protected]> * add Default trait to ColumnMetrics as suggested in review * rename OffsetSizeIndex to OffsetIndexMetaData --------- Co-authored-by: Andrew Lamb <[email protected]> * Update pyo3 requirement from 0.21.1 to 0.22.2 (#6085) Updates the requirements on [pyo3](https://github.com/pyo3/pyo3) to permit the latest version. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/v0.22.2/CHANGELOG.md) - [Commits](PyO3/pyo3@v0.21.1...v0.22.2) --- updated-dependencies: - dependency-name: pyo3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Deprecate read_page_locations() and simplify offset index in `ParquetMetaData` (#6095) * deprecate read_page_locations * add to_thrift() to OffsetIndexMetaData * Update parquet/src/column/writer/mod.rs Co-authored-by: Ed Seidl <[email protected]> --------- Signed-off-by: Bugen Zhao <[email protected]> Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Bugen Zhao <[email protected]> Co-authored-by: Xiangpeng Hao <[email protected]> Co-authored-by: kamille <[email protected]> Co-authored-by: Jesse <[email protected]> Co-authored-by: Ed Seidl <[email protected]> Co-authored-by: Marco Neumann <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1 parent 1ff4e21 commit 613e93e

File tree

37 files changed

+1245
-374
lines changed

37 files changed

+1245
-374
lines changed

arrow-buffer/src/buffer/immutable.rs

+23-10
Original file line numberDiff line numberDiff line change
@@ -356,16 +356,29 @@ impl Buffer {
356356
}
357357
}
358358

359-
/// Creating a `Buffer` instance by copying the memory from a `AsRef<[u8]>` into a newly
360-
/// allocated memory region.
361-
impl<T: AsRef<[u8]>> From<T> for Buffer {
362-
fn from(p: T) -> Self {
363-
// allocate aligned memory buffer
364-
let slice = p.as_ref();
365-
let len = slice.len();
366-
let mut buffer = MutableBuffer::new(len);
367-
buffer.extend_from_slice(slice);
368-
buffer.into()
359+
/// Note that here we deliberately do not implement
360+
/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
361+
/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
362+
/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
363+
/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
364+
///
365+
/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
366+
/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
367+
impl From<&[u8]> for Buffer {
368+
fn from(p: &[u8]) -> Self {
369+
Self::from_slice_ref(p)
370+
}
371+
}
372+
373+
impl<const N: usize> From<[u8; N]> for Buffer {
374+
fn from(p: [u8; N]) -> Self {
375+
Self::from_slice_ref(p)
376+
}
377+
}
378+
379+
impl<const N: usize> From<&[u8; N]> for Buffer {
380+
fn from(p: &[u8; N]) -> Self {
381+
Self::from_slice_ref(p)
369382
}
370383
}
371384

arrow-cast/src/cast/mod.rs

+13-13
Original file line numberDiff line numberDiff line change
@@ -4409,8 +4409,8 @@ mod tests {
44094409
IntervalUnit::YearMonth,
44104410
IntervalYearMonthArray,
44114411
vec![
4412-
Some("1 years 1 mons 0 days 0 hours 0 mins 0.00 secs"),
4413-
Some("2 years 7 mons 0 days 0 hours 0 mins 0.00 secs"),
4412+
Some("1 years 1 mons"),
4413+
Some("2 years 7 mons"),
44144414
None,
44154415
None,
44164416
None,
@@ -4433,9 +4433,9 @@ mod tests {
44334433
IntervalUnit::DayTime,
44344434
IntervalDayTimeArray,
44354435
vec![
4436-
Some("0 years 0 mons 390 days 0 hours 0 mins 0.000 secs"),
4437-
Some("0 years 0 mons 930 days 0 hours 0 mins 0.000 secs"),
4438-
Some("0 years 0 mons 30 days 0 hours 0 mins 0.000 secs"),
4436+
Some("390 days"),
4437+
Some("930 days"),
4438+
Some("30 days"),
44394439
None,
44404440
None,
44414441
]
@@ -4461,16 +4461,16 @@ mod tests {
44614461
IntervalUnit::MonthDayNano,
44624462
IntervalMonthDayNanoArray,
44634463
vec![
4464-
Some("0 years 13 mons 1 days 0 hours 0 mins 0.000000000 secs"),
4464+
Some("13 mons 1 days"),
44654465
None,
4466-
Some("0 years 31 mons 35 days 0 hours 0 mins 0.001400000 secs"),
4467-
Some("0 years 0 mons 3 days 0 hours 0 mins 0.000000000 secs"),
4468-
Some("0 years 0 mons 0 days 0 hours 0 mins 8.000000000 secs"),
4466+
Some("31 mons 35 days 0.001400000 secs"),
4467+
Some("3 days"),
4468+
Some("8.000000000 secs"),
44694469
None,
4470-
Some("0 years 0 mons 1 days 0 hours 0 mins 29.800000000 secs"),
4471-
Some("0 years 3 mons 0 days 0 hours 0 mins 1.000000000 secs"),
4472-
Some("0 years 0 mons 0 days 0 hours 8 mins 0.000000000 secs"),
4473-
Some("0 years 63 mons 9 days 19 hours 9 mins 2.222000000 secs"),
4470+
Some("1 days 29.800000000 secs"),
4471+
Some("3 mons 1.000000000 secs"),
4472+
Some("8 mins"),
4473+
Some("63 mons 9 days 19 hours 9 mins 2.222000000 secs"),
44744474
None,
44754475
]
44764476
);

arrow-cast/src/display.rs

+115-40
Original file line numberDiff line numberDiff line change
@@ -654,73 +654,148 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalYearMonthType> {
654654
let years = (interval / 12_f64).floor();
655655
let month = interval - (years * 12_f64);
656656

657-
write!(
658-
f,
659-
"{years} years {month} mons 0 days 0 hours 0 mins 0.00 secs",
660-
)?;
657+
write!(f, "{years} years {month} mons",)?;
661658
Ok(())
662659
}
663660
}
664661

665662
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalDayTimeType> {
666663
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
667664
let value = self.value(idx);
665+
let mut prefix = "";
668666

669-
let secs = value.milliseconds / 1_000;
667+
if value.days != 0 {
668+
write!(f, "{prefix}{} days", value.days)?;
669+
prefix = " ";
670+
}
671+
672+
if value.milliseconds != 0 {
673+
let millis_fmt = MillisecondsFormatter {
674+
milliseconds: value.milliseconds,
675+
prefix,
676+
};
677+
678+
f.write_fmt(format_args!("{millis_fmt}"))?;
679+
}
680+
681+
Ok(())
682+
}
683+
}
684+
685+
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
686+
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
687+
let value = self.value(idx);
688+
let mut prefix = "";
689+
690+
if value.months != 0 {
691+
write!(f, "{prefix}{} mons", value.months)?;
692+
prefix = " ";
693+
}
694+
695+
if value.days != 0 {
696+
write!(f, "{prefix}{} days", value.days)?;
697+
prefix = " ";
698+
}
699+
700+
if value.nanoseconds != 0 {
701+
let nano_fmt = NanosecondsFormatter {
702+
nanoseconds: value.nanoseconds,
703+
prefix,
704+
};
705+
f.write_fmt(format_args!("{nano_fmt}"))?;
706+
}
707+
708+
Ok(())
709+
}
710+
}
711+
712+
struct NanosecondsFormatter<'a> {
713+
nanoseconds: i64,
714+
prefix: &'a str,
715+
}
716+
717+
impl<'a> Display for NanosecondsFormatter<'a> {
718+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
719+
let mut prefix = self.prefix;
720+
721+
let secs = self.nanoseconds / 1_000_000_000;
670722
let mins = secs / 60;
671723
let hours = mins / 60;
672724

673725
let secs = secs - (mins * 60);
674726
let mins = mins - (hours * 60);
675727

676-
let milliseconds = value.milliseconds % 1_000;
728+
let nanoseconds = self.nanoseconds % 1_000_000_000;
677729

678-
let secs_sign = if secs < 0 || milliseconds < 0 {
679-
"-"
680-
} else {
681-
""
682-
};
730+
if hours != 0 {
731+
write!(f, "{prefix}{} hours", hours)?;
732+
prefix = " ";
733+
}
734+
735+
if mins != 0 {
736+
write!(f, "{prefix}{} mins", mins)?;
737+
prefix = " ";
738+
}
739+
740+
if secs != 0 || nanoseconds != 0 {
741+
let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
742+
write!(
743+
f,
744+
"{prefix}{}{}.{:09} secs",
745+
secs_sign,
746+
secs.abs(),
747+
nanoseconds.abs()
748+
)?;
749+
}
683750

684-
write!(
685-
f,
686-
"0 years 0 mons {} days {} hours {} mins {}{}.{:03} secs",
687-
value.days,
688-
hours,
689-
mins,
690-
secs_sign,
691-
secs.abs(),
692-
milliseconds.abs(),
693-
)?;
694751
Ok(())
695752
}
696753
}
697754

698-
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
699-
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
700-
let value = self.value(idx);
755+
struct MillisecondsFormatter<'a> {
756+
milliseconds: i32,
757+
prefix: &'a str,
758+
}
759+
760+
impl<'a> Display for MillisecondsFormatter<'a> {
761+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
762+
let mut prefix = self.prefix;
701763

702-
let secs = value.nanoseconds / 1_000_000_000;
764+
let secs = self.milliseconds / 1_000;
703765
let mins = secs / 60;
704766
let hours = mins / 60;
705767

706768
let secs = secs - (mins * 60);
707769
let mins = mins - (hours * 60);
708770

709-
let nanoseconds = value.nanoseconds % 1_000_000_000;
710-
711-
let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
712-
713-
write!(
714-
f,
715-
"0 years {} mons {} days {} hours {} mins {}{}.{:09} secs",
716-
value.months,
717-
value.days,
718-
hours,
719-
mins,
720-
secs_sign,
721-
secs.abs(),
722-
nanoseconds.abs(),
723-
)?;
771+
let milliseconds = self.milliseconds % 1_000;
772+
773+
if hours != 0 {
774+
write!(f, "{prefix}{} hours", hours,)?;
775+
prefix = " ";
776+
}
777+
778+
if mins != 0 {
779+
write!(f, "{prefix}{} mins", mins,)?;
780+
prefix = " ";
781+
}
782+
783+
if secs != 0 || milliseconds != 0 {
784+
let secs_sign = if secs < 0 || milliseconds < 0 {
785+
"-"
786+
} else {
787+
""
788+
};
789+
790+
write!(
791+
f,
792+
"{prefix}{}{}.{:03} secs",
793+
secs_sign,
794+
secs.abs(),
795+
milliseconds.abs()
796+
)?;
797+
}
798+
724799
Ok(())
725800
}
726801
}

arrow-cast/src/pretty.rs

+27-27
Original file line numberDiff line numberDiff line change
@@ -986,16 +986,16 @@ mod tests {
986986
let table = pretty_format_batches(&[batch]).unwrap().to_string();
987987

988988
let expected = vec![
989-
"+----------------------------------------------------+",
990-
"| IntervalDayTime |",
991-
"+----------------------------------------------------+",
992-
"| 0 years 0 mons -1 days 0 hours -10 mins 0.000 secs |",
993-
"| 0 years 0 mons 0 days 0 hours 0 mins -1.001 secs |",
994-
"| 0 years 0 mons 0 days 0 hours 0 mins -0.001 secs |",
995-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.001 secs |",
996-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.010 secs |",
997-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.100 secs |",
998-
"+----------------------------------------------------+",
989+
"+------------------+",
990+
"| IntervalDayTime |",
991+
"+------------------+",
992+
"| -1 days -10 mins |",
993+
"| -1.001 secs |",
994+
"| -0.001 secs |",
995+
"| 0.001 secs |",
996+
"| 0.010 secs |",
997+
"| 0.100 secs |",
998+
"+------------------+",
999999
];
10001000

10011001
let actual: Vec<&str> = table.lines().collect();
@@ -1032,23 +1032,23 @@ mod tests {
10321032
let table = pretty_format_batches(&[batch]).unwrap().to_string();
10331033

10341034
let expected = vec![
1035-
"+-----------------------------------------------------------+",
1036-
"| IntervalMonthDayNano |",
1037-
"+-----------------------------------------------------------+",
1038-
"| 0 years -1 mons -1 days 0 hours -10 mins 0.000000000 secs |",
1039-
"| 0 years 0 mons 0 days 0 hours 0 mins -1.000000001 secs |",
1040-
"| 0 years 0 mons 0 days 0 hours 0 mins -0.000000001 secs |",
1041-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000000001 secs |",
1042-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000000010 secs |",
1043-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000000100 secs |",
1044-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000001000 secs |",
1045-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000010000 secs |",
1046-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.000100000 secs |",
1047-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.001000000 secs |",
1048-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.010000000 secs |",
1049-
"| 0 years 0 mons 0 days 0 hours 0 mins 0.100000000 secs |",
1050-
"| 0 years 0 mons 0 days 0 hours 0 mins 1.000000000 secs |",
1051-
"+-----------------------------------------------------------+",
1035+
"+--------------------------+",
1036+
"| IntervalMonthDayNano |",
1037+
"+--------------------------+",
1038+
"| -1 mons -1 days -10 mins |",
1039+
"| -1.000000001 secs |",
1040+
"| -0.000000001 secs |",
1041+
"| 0.000000001 secs |",
1042+
"| 0.000000010 secs |",
1043+
"| 0.000000100 secs |",
1044+
"| 0.000001000 secs |",
1045+
"| 0.000010000 secs |",
1046+
"| 0.000100000 secs |",
1047+
"| 0.001000000 secs |",
1048+
"| 0.010000000 secs |",
1049+
"| 0.100000000 secs |",
1050+
"| 1.000000000 secs |",
1051+
"+--------------------------+",
10521052
];
10531053

10541054
let actual: Vec<&str> = table.lines().collect();

arrow-flight/Cargo.toml

+6-5
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ bytes = { version = "1", default-features = false }
4444
futures = { version = "0.3", default-features = false, features = ["alloc"] }
4545
once_cell = { version = "1", optional = true }
4646
paste = { version = "1.0" }
47-
prost = { version = "0.12.3", default-features = false, features = ["prost-derive"] }
47+
prost = { version = "0.13.1", default-features = false, features = ["prost-derive"] }
4848
# For Timestamp type
49-
prost-types = { version = "0.12.3", default-features = false }
49+
prost-types = { version = "0.13.1", default-features = false }
5050
tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] }
51-
tonic = { version = "0.11.0", default-features = false, features = ["transport", "codegen", "prost"] }
51+
tonic = { version = "0.12.0", default-features = false, features = ["transport", "codegen", "prost"] }
5252

5353
# CLI-related dependencies
5454
anyhow = { version = "1.0", optional = true }
@@ -70,8 +70,9 @@ cli = ["anyhow", "arrow-cast/prettyprint", "clap", "tracing-log", "tracing-subsc
7070
[dev-dependencies]
7171
arrow-cast = { workspace = true, features = ["prettyprint"] }
7272
assert_cmd = "2.0.8"
73-
http = "0.2.9"
74-
http-body = "0.4.5"
73+
http = "1.1.0"
74+
http-body = "1.0.0"
75+
hyper-util = "0.1"
7576
pin-project-lite = "0.2"
7677
tempfile = "3.3"
7778
tokio-stream = { version = "0.1", features = ["net"] }

arrow-flight/examples/flight_sql_server.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,8 @@ impl ProstMessageExt for FetchResults {
783783
#[cfg(test)]
784784
mod tests {
785785
use super::*;
786-
use futures::TryStreamExt;
786+
use futures::{TryFutureExt, TryStreamExt};
787+
use hyper_util::rt::TokioIo;
787788
use std::fs;
788789
use std::future::Future;
789790
use std::net::SocketAddr;
@@ -843,7 +844,8 @@ mod tests {
843844
.serve_with_incoming(stream);
844845

845846
let request_future = async {
846-
let connector = service_fn(move |_| UnixStream::connect(path.clone()));
847+
let connector =
848+
service_fn(move |_| UnixStream::connect(path.clone()).map_ok(TokioIo::new));
847849
let channel = Endpoint::try_from("http://example.com")
848850
.unwrap()
849851
.connect_with_connector(connector)

0 commit comments

Comments
 (0)