From 6086695f920e24d26236edacb6d3f369f064541f Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 27 Jul 2024 18:43:11 -0400 Subject: [PATCH 1/4] update --- arrow-row/src/lib.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index a6fd03b5bcec..3e2b04fe5ad9 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -835,10 +835,17 @@ impl Rows { /// Returns the row at index `row` pub fn row(&self, row: usize) -> Row<'_> { - let end = self.offsets[row + 1]; - let start = self.offsets[row]; + assert!(row + 1 < self.offsets.len()); + unsafe { self.row_unchecked(row) } + } + + /// Returns the row at index `row` without bounds checking + pub unsafe fn row_unchecked(&self, row: usize) -> Row<'_> { + let end = unsafe { self.offsets.get_unchecked(row + 1) }; + let start = unsafe { self.offsets.get_unchecked(row) }; + let data = unsafe { self.buffer.get_unchecked(*start..*end) }; Row { - data: &self.buffer[start..end], + data, config: &self.config, } } @@ -898,7 +905,7 @@ impl<'a> Iterator for RowsIter<'a> { if self.end == self.start { return None; } - let row = self.rows.row(self.start); + let row = unsafe { self.rows.row_unchecked(self.start) }; self.start += 1; Some(row) } From 5a15a786485d32e9452edef2b3fa374dcb51c994 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 27 Jul 2024 19:24:54 -0400 Subject: [PATCH 2/4] update comment --- arrow-row/src/lib.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 3e2b04fe5ad9..2d9af757550e 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -839,10 +839,13 @@ impl Rows { unsafe { self.row_unchecked(row) } } - /// Returns the row at index `row` without bounds checking - pub unsafe fn row_unchecked(&self, row: usize) -> Row<'_> { - let end = unsafe { self.offsets.get_unchecked(row + 1) }; - let start = unsafe { self.offsets.get_unchecked(row) }; + /// Returns the row at `index` without bounds checking + /// + /// # Safety + /// Caller must ensure that `index` is less than the number of offsets (#rows + 1) + pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> { + let end = unsafe { self.offsets.get_unchecked(index + 1) }; + let start = unsafe { self.offsets.get_unchecked(index) }; let data = unsafe { self.buffer.get_unchecked(*start..*end) }; Row { data, @@ -905,6 +908,8 @@ impl<'a> Iterator for RowsIter<'a> { if self.end == self.start { return None; } + + // SAFETY: We have checked that `start` is less than `end` let row = unsafe { self.rows.row_unchecked(self.start) }; self.start += 1; Some(row) @@ -927,7 +932,8 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> { if self.end == self.start { return None; } - let row = self.rows.row(self.end); + // Safety: We have checked that `start` is less than `end` + let row = unsafe { self.rows.row_unchecked(self.end) }; self.end -= 1; Some(row) } From f8aa2e86c9ba33eb0d41d23246dccdfc57e3eb7c Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sun, 4 Aug 2024 21:11:20 -0400 Subject: [PATCH 3/4] update row-iter bench --- arrow/benches/row_format.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 0fb63b5b3240..79f91e2b4c6d 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -56,6 +56,24 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec) { }); } +fn bench_iter(c: &mut Criterion) { + let col = create_string_view_array_with_len(40960, 0., 100, false); + let converter = RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap(); + let rows = converter + .convert_columns(&vec![Arc::new(col) as ArrayRef]) + .unwrap(); + + c.bench_function(&"iterate rows", |b| { + b.iter(|| { + black_box({ + for r in rows.iter() { + std::hint::black_box(r.as_ref()); + } + }) + }) + }); +} + fn row_bench(c: &mut Criterion) { let cols = vec![Arc::new(create_primitive_array::(4096, 0.)) as ArrayRef]; do_bench(c, "4096 u64(0)", cols); @@ -145,6 +163,8 @@ fn row_bench(c: &mut Criterion) { Arc::new(create_primitive_array::(4096, 0.)) as ArrayRef, ]; do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols); + + bench_iter(c); } criterion_group!(benches, row_bench); From b6635a1b5bcb30e9b3de0068eea0fa7c79e2645d Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sun, 4 Aug 2024 21:40:04 -0400 Subject: [PATCH 4/4] make clippy happy --- arrow/benches/row_format.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 79f91e2b4c6d..773cdc652097 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -60,16 +60,14 @@ fn bench_iter(c: &mut Criterion) { let col = create_string_view_array_with_len(40960, 0., 100, false); let converter = RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap(); let rows = converter - .convert_columns(&vec![Arc::new(col) as ArrayRef]) + .convert_columns(&[Arc::new(col) as ArrayRef]) .unwrap(); - c.bench_function(&"iterate rows", |b| { + c.bench_function("iterate rows", |b| { b.iter(|| { - black_box({ - for r in rows.iter() { - std::hint::black_box(r.as_ref()); - } - }) + for r in rows.iter() { + std::hint::black_box(r.as_ref()); + } }) }); }