Skip to content

Commit de6a759

Browse files
authored
chore: add docs, part of #37 (apache#6433)
* chore: add docs, part of #37 - add pragma `#![warn(missing_docs)]` to the following - `arrow-array` - `arrow-cast` - `arrow-csv` - `arrow-data` - `arrow-json` - `arrow-ord` - `arrow-pyarrow-integration-testing` - `arrow-row` - `arrow-schema` - `arrow-select` - `arrow-string` - `arrow` - `parquet_derive` - add docs to those that generated lint warnings - Remove `bitflags` workaround in `arrow-schema` At some point, a change in `bitflags v2.3.0` had started generating lint warnings in `arrow-schema`, This was handled using a [workaround](apache#4233) [Issue](bitflags/bitflags#356) `bitflags v2.3.1` fixed the issue hence the workaround is no longer needed. * fix: resolve comments on PR apache#6433
1 parent 7191f4d commit de6a759

File tree

30 files changed

+147
-35
lines changed

30 files changed

+147
-35
lines changed

arrow-array/src/builder/generic_bytes_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ mod tests {
537537
write!(builder, "buz").unwrap();
538538
builder.append_value("");
539539
let a = builder.finish();
540-
let r: Vec<_> = a.iter().map(|x| x.unwrap()).collect();
540+
let r: Vec<_> = a.iter().flatten().collect();
541541
assert_eq!(r, &["foo", "bar\n", "fizbuz"])
542542
}
543543
}

arrow-array/src/builder/generic_bytes_view_builder.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,8 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
555555

556556
#[cfg(test)]
557557
mod tests {
558+
use core::str;
559+
558560
use super::*;
559561
use crate::Array;
560562

@@ -642,7 +644,7 @@ mod tests {
642644
let array = v.finish_cloned();
643645
array.to_data().validate_full().unwrap();
644646
assert_eq!(array.data_buffers().len(), 5);
645-
let actual: Vec<_> = array.iter().map(Option::unwrap).collect();
647+
let actual: Vec<_> = array.iter().flatten().collect();
646648
assert_eq!(
647649
actual,
648650
&[
@@ -692,13 +694,13 @@ mod tests {
692694
let mut exp_builder = StringViewBuilder::new();
693695
let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE);
694696

695-
let long_string = String::from_utf8(vec![b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
697+
let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
696698

697699
for i in 0..9 {
698700
// 8k, 16k, 32k, 64k, 128k, 256k, 512k, 1M, 2M
699701
for _ in 0..(2_u32.pow(i)) {
700-
exp_builder.append_value(&long_string);
701-
fixed_builder.append_value(&long_string);
702+
exp_builder.append_value(long_string);
703+
fixed_builder.append_value(long_string);
702704
}
703705
exp_builder.flush_in_progress();
704706
fixed_builder.flush_in_progress();
@@ -721,7 +723,7 @@ mod tests {
721723
}
722724

723725
// Add one more value, and the buffer stop growing.
724-
exp_builder.append_value(&long_string);
726+
exp_builder.append_value(long_string);
725727
exp_builder.flush_in_progress();
726728
assert_eq!(
727729
exp_builder.completed.last().unwrap().capacity(),

arrow-cast/src/cast/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9394,7 +9394,7 @@ mod tests {
93949394
Some(vec![Some(0), None, Some(2)]),
93959395
]);
93969396
let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap();
9397-
let r: Vec<_> = a.as_string::<i32>().iter().map(|x| x.unwrap()).collect();
9397+
let r: Vec<_> = a.as_string::<i32>().iter().flatten().collect();
93989398
assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]);
93999399
}
94009400
#[test]

arrow-csv/src/writer.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ mod tests {
442442
};
443443
use arrow_array::types::*;
444444
use arrow_buffer::i256;
445+
use core::str;
445446
use std::io::{Cursor, Read, Seek};
446447
use std::sync::Arc;
447448

@@ -508,7 +509,7 @@ Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
508509
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
509510
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
510511
"#;
511-
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
512+
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
512513
}
513514

514515
#[test]
@@ -558,7 +559,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
558559
,
559560
0.290472,0.290472
560561
"#;
561-
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
562+
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
562563
}
563564

564565
#[test]

arrow-data/src/byte_view.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ pub struct ByteView {
4040

4141
impl ByteView {
4242
#[inline(always)]
43+
/// Convert `ByteView` to `u128` by concatenating the fields
4344
pub fn as_u128(self) -> u128 {
4445
(self.length as u128)
4546
| ((self.prefix as u128) << 32)

arrow-data/src/data.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ pub struct ArrayData {
231231
nulls: Option<NullBuffer>,
232232
}
233233

234+
/// A thread-safe, shared reference to the Arrow array data.
234235
pub type ArrayDataRef = Arc<ArrayData>;
235236

236237
impl ArrayData {
@@ -1747,7 +1748,12 @@ pub enum BufferSpec {
17471748
/// for array slicing and interoperability with `Vec`, which cannot be over-aligned.
17481749
///
17491750
/// Note that these alignment requirements will vary between architectures
1750-
FixedWidth { byte_width: usize, alignment: usize },
1751+
FixedWidth {
1752+
/// The width of each element in bytes
1753+
byte_width: usize,
1754+
/// The alignment required by Rust for an array of the corresponding primitive
1755+
alignment: usize,
1756+
},
17511757
/// Variable width, such as string data for utf8 data
17521758
VariableWidth,
17531759
/// Buffer holds a bitmap.
@@ -1783,6 +1789,7 @@ pub struct ArrayDataBuilder {
17831789

17841790
impl ArrayDataBuilder {
17851791
#[inline]
1792+
/// Creates a new array data builder
17861793
pub const fn new(data_type: DataType) -> Self {
17871794
Self {
17881795
data_type,
@@ -1796,61 +1803,72 @@ impl ArrayDataBuilder {
17961803
}
17971804
}
17981805

1806+
/// Creates a new array data builder from an existing one, changing the data type
17991807
pub fn data_type(self, data_type: DataType) -> Self {
18001808
Self { data_type, ..self }
18011809
}
18021810

18031811
#[inline]
18041812
#[allow(clippy::len_without_is_empty)]
1813+
/// Sets the length of the [ArrayData]
18051814
pub const fn len(mut self, n: usize) -> Self {
18061815
self.len = n;
18071816
self
18081817
}
18091818

1819+
/// Sets the null buffer of the [ArrayData]
18101820
pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
18111821
self.nulls = nulls;
18121822
self.null_count = None;
18131823
self.null_bit_buffer = None;
18141824
self
18151825
}
18161826

1827+
/// Sets the null count of the [ArrayData]
18171828
pub fn null_count(mut self, null_count: usize) -> Self {
18181829
self.null_count = Some(null_count);
18191830
self
18201831
}
18211832

1833+
/// Sets the `null_bit_buffer` of the [ArrayData]
18221834
pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
18231835
self.nulls = None;
18241836
self.null_bit_buffer = buf;
18251837
self
18261838
}
18271839

1840+
/// Sets the offset of the [ArrayData]
18281841
#[inline]
18291842
pub const fn offset(mut self, n: usize) -> Self {
18301843
self.offset = n;
18311844
self
18321845
}
18331846

1847+
/// Sets the buffers of the [ArrayData]
18341848
pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
18351849
self.buffers = v;
18361850
self
18371851
}
18381852

1853+
/// Adds a single buffer to the [ArrayData]'s buffers
18391854
pub fn add_buffer(mut self, b: Buffer) -> Self {
18401855
self.buffers.push(b);
18411856
self
18421857
}
18431858

1844-
pub fn add_buffers(mut self, bs: Vec<Buffer>) -> Self {
1859+
/// Adds multiple buffers to the [ArrayData]'s buffers
1860+
pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
18451861
self.buffers.extend(bs);
18461862
self
18471863
}
18481864

1865+
/// Sets the child data of the [ArrayData]
18491866
pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
18501867
self.child_data = v;
18511868
self
18521869
}
18531870

1871+
/// Adds a single child data to the [ArrayData]'s child data
18541872
pub fn add_child_data(mut self, r: ArrayData) -> Self {
18551873
self.child_data.push(r);
18561874
self
@@ -1873,22 +1891,25 @@ impl ArrayDataBuilder {
18731891

18741892
/// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag
18751893
unsafe fn build_impl(self) -> ArrayData {
1876-
let nulls = self.nulls.or_else(|| {
1877-
let buffer = self.null_bit_buffer?;
1878-
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
1879-
Some(match self.null_count {
1880-
Some(n) => NullBuffer::new_unchecked(buffer, n),
1881-
None => NullBuffer::new(buffer),
1894+
let nulls = self
1895+
.nulls
1896+
.or_else(|| {
1897+
let buffer = self.null_bit_buffer?;
1898+
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
1899+
Some(match self.null_count {
1900+
Some(n) => NullBuffer::new_unchecked(buffer, n),
1901+
None => NullBuffer::new(buffer),
1902+
})
18821903
})
1883-
});
1904+
.filter(|b| b.null_count() != 0);
18841905

18851906
ArrayData {
18861907
data_type: self.data_type,
18871908
len: self.len,
18881909
offset: self.offset,
18891910
buffers: self.buffers,
18901911
child_data: self.child_data,
1891-
nulls: nulls.filter(|b| b.null_count() != 0),
1912+
nulls,
18921913
}
18931914
}
18941915

arrow-data/src/decimal.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
19+
//!
20+
//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.
21+
1822
use arrow_buffer::i256;
1923
use arrow_schema::ArrowError;
2024

arrow-data/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//!
2020
//! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array)
2121
22+
#![warn(missing_docs)]
2223
mod data;
2324
pub use data::*;
2425

arrow-data/src/transform/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Low-level array data abstractions.
19+
//!
20+
//! Provides utilities for creating, manipulating, and converting Arrow arrays
21+
//! made of primitive types, strings, and nested types.
22+
1823
use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
1924
use crate::bit_mask::set_bits;
2025
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};

arrow-json/src/reader/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ mod tests {
10071007
let map_values = map.values().as_list::<i32>();
10081008
assert_eq!(map.value_offsets(), &[0, 1, 3, 5]);
10091009

1010-
let k: Vec<_> = map_keys.iter().map(|x| x.unwrap()).collect();
1010+
let k: Vec<_> = map_keys.iter().flatten().collect();
10111011
assert_eq!(&k, &["a", "a", "b", "c", "a"]);
10121012

10131013
let list_values = map_values.values().as_string::<i32>();

arrow-ord/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
//! ```
4444
//!
4545
46+
#![warn(missing_docs)]
4647
pub mod cmp;
4748
#[doc(hidden)]
4849
pub mod comparison;

arrow-ord/src/rank.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Provides `rank` function to assign a rank to each value in an array
19+
1820
use arrow_array::cast::AsArray;
1921
use arrow_array::types::*;
2022
use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray};

arrow-ord/src/sort.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,9 @@ where
635635
/// One column to be used in lexicographical sort
636636
#[derive(Clone, Debug)]
637637
pub struct SortColumn {
638+
/// The column to sort
638639
pub values: ArrayRef,
640+
/// Sort options for this column
639641
pub options: Option<SortOptions>,
640642
}
641643

arrow-pyarrow-integration-testing/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! This library demonstrates a minimal usage of Rust's C data interface to pass
1919
//! arrays from and to Python.
2020
21+
#![warn(missing_docs)]
2122
use std::sync::Arc;
2223

2324
use arrow::array::new_empty_array;

arrow-row/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
//! [compared]: PartialOrd
126126
//! [compare]: PartialOrd
127127
128+
#![warn(missing_docs)]
128129
use std::cmp::Ordering;
129130
use std::hash::{Hash, Hasher};
130131
use std::sync::Arc;

arrow-schema/src/datatype.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,11 +420,13 @@ pub enum IntervalUnit {
420420
MonthDayNano,
421421
}
422422

423-
// Sparse or Dense union layouts
423+
/// Sparse or Dense union layouts
424424
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
425425
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
426426
pub enum UnionMode {
427+
/// Sparse union layout
427428
Sparse,
429+
/// Dense union layout
428430
Dense,
429431
}
430432

arrow-schema/src/error.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,39 @@ use std::error::Error;
2626
pub enum ArrowError {
2727
/// Returned when functionality is not yet available.
2828
NotYetImplemented(String),
29+
/// Wraps an external error.
2930
ExternalError(Box<dyn Error + Send + Sync>),
31+
/// Error during casting from one type to another.
3032
CastError(String),
33+
/// Memory or buffer error.
3134
MemoryError(String),
35+
/// Error during parsing from a string.
3236
ParseError(String),
37+
/// Error during schema-related operations.
3338
SchemaError(String),
39+
/// Error during computation.
3440
ComputeError(String),
41+
/// Error during division by zero.
3542
DivideByZero,
43+
/// Error when an arithmetic operation overflows.
3644
ArithmeticOverflow(String),
45+
/// Error during CSV-related operations.
3746
CsvError(String),
47+
/// Error during JSON-related operations.
3848
JsonError(String),
49+
/// Error during IO operations.
3950
IoError(String, std::io::Error),
51+
/// Error during IPC operations in `arrow-ipc` or `arrow-flight`.
4052
IpcError(String),
53+
/// Error indicating that an unexpected or bad argument was passed to a function.
4154
InvalidArgumentError(String),
55+
/// Error during Parquet operations.
4256
ParquetError(String),
4357
/// Error during import or export to/from the C Data Interface
4458
CDataInterface(String),
59+
/// Error when a dictionary key is bigger than the key type
4560
DictionaryKeyOverflowError,
61+
/// Error when the run end index in a REE array is bigger than the array length
4662
RunEndIndexOverflowError,
4763
}
4864

0 commit comments

Comments
 (0)