Skip to content

Commit 3ebb033

Browse files
authored
Fix Clippy for the Rust 1.80 release (#6116)
* Fix clippy lints in arrow-data * Fix clippy errors in arrow-array * fix clippy in concat * Clippy in arrow-string * remove unecessary feature in arrow-array * fix clippy in arrow-cast * Fix clippy in parquet crate * Fix clippy in arrow-flight
1 parent fa2fbfd commit 3ebb033

File tree

21 files changed

+108
-147
lines changed

21 files changed

+108
-147
lines changed

arrow-array/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ hashbrown = { version = "0.14.2", default-features = false }
5252

5353
[features]
5454
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
55+
force_validate = []
5556

5657
[dev-dependencies]
5758
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }

arrow-array/src/array/byte_view_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ use super::ByteArrayType;
6666
/// * Strings with length <= 12 are stored directly in the view.
6767
///
6868
/// * Strings with length > 12: The first four bytes are stored inline in the
69-
/// view and the entire string is stored in one of the buffers.
69+
/// view and the entire string is stored in one of the buffers.
7070
///
7171
/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
7272
/// than they must point into a valid buffer. However, they can be out of order,

arrow-array/src/array/string_array.rs

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -376,70 +376,6 @@ mod tests {
376376
.expect("All null array has valid array data");
377377
}
378378

379-
#[cfg(feature = "test_utils")]
380-
#[test]
381-
fn bad_size_collect_string() {
382-
use crate::util::test_util::BadIterator;
383-
let data = vec![Some("foo"), None, Some("bar")];
384-
let expected: StringArray = data.clone().into_iter().collect();
385-
386-
// Iterator reports too many items
387-
let arr: StringArray = BadIterator::new(3, 10, data.clone()).collect();
388-
assert_eq!(expected, arr);
389-
390-
// Iterator reports too few items
391-
let arr: StringArray = BadIterator::new(3, 1, data.clone()).collect();
392-
assert_eq!(expected, arr);
393-
}
394-
395-
#[cfg(feature = "test_utils")]
396-
#[test]
397-
fn bad_size_collect_large_string() {
398-
use crate::util::test_util::BadIterator;
399-
let data = vec![Some("foo"), None, Some("bar")];
400-
let expected: LargeStringArray = data.clone().into_iter().collect();
401-
402-
// Iterator reports too many items
403-
let arr: LargeStringArray = BadIterator::new(3, 10, data.clone()).collect();
404-
assert_eq!(expected, arr);
405-
406-
// Iterator reports too few items
407-
let arr: LargeStringArray = BadIterator::new(3, 1, data.clone()).collect();
408-
assert_eq!(expected, arr);
409-
}
410-
411-
#[cfg(feature = "test_utils")]
412-
#[test]
413-
fn bad_size_iter_values_string() {
414-
use crate::util::test_util::BadIterator;
415-
let data = vec!["foo", "bar", "baz"];
416-
let expected: StringArray = data.clone().into_iter().map(Some).collect();
417-
418-
// Iterator reports too many items
419-
let arr = StringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
420-
assert_eq!(expected, arr);
421-
422-
// Iterator reports too few items
423-
let arr = StringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
424-
assert_eq!(expected, arr);
425-
}
426-
427-
#[cfg(feature = "test_utils")]
428-
#[test]
429-
fn bad_size_iter_values_large_string() {
430-
use crate::util::test_util::BadIterator;
431-
let data = vec!["foo", "bar", "baz"];
432-
let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();
433-
434-
// Iterator reports too many items
435-
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
436-
assert_eq!(expected, arr);
437-
438-
// Iterator reports too few items
439-
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
440-
assert_eq!(expected, arr);
441-
}
442-
443379
fn _test_generic_string_array_from_list_array<O: OffsetSizeTrait>() {
444380
let values = b"HelloArrowAndParquet";
445381
// "ArrowAndParquet"

arrow-cast/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ features = ["prettyprint"]
3838

3939
[features]
4040
prettyprint = ["comfy-table"]
41+
force_validate = []
4142

4243
[dependencies]
4344
arrow-array = { workspace = true }

arrow-cast/src/cast/mod.rs

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -568,28 +568,27 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
568568
/// Accepts [`CastOptions`] to specify cast behavior. See also [`cast()`].
569569
///
570570
/// # Behavior
571-
/// * Boolean to Utf8: `true` => '1', `false` => `0`
572-
/// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
571+
/// * `Boolean` to `Utf8`: `true` => '1', `false` => `0`
572+
/// * `Utf8` to `Boolean`: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
573573
/// short variants are accepted, other strings return null or error
574-
/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
574+
/// * `Utf8` to Numeric: strings that can't be parsed to numbers return null, float strings
575575
/// in integer casts return null
576-
/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
577-
/// * List to List: the underlying data type is cast
578-
/// * List to FixedSizeList: the underlying data type is cast. If safe is true and a list element
579-
/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
580-
/// * Primitive to List: a list array with 1 value per slot is created
581-
/// * Date32 and Date64: precision lost when going to higher interval
582-
/// * Time32 and Time64: precision lost when going to higher interval
583-
/// * Timestamp and Date{32|64}: precision lost when going to higher interval
584-
/// * Temporal to/from backing primitive: zero-copy with data type change
585-
/// * Casting from `float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
586-
/// (i.e. casting `6.4999` to Decimal(10, 1) becomes `6.5`). Prior to version `26.0.0`,
587-
/// casting would truncate instead (i.e. outputs `6.4` instead)
576+
/// * Numeric to `Boolean`: 0 returns `false`, any other value returns `true`
577+
/// * `List` to `List`: the underlying data type is cast
578+
/// * `List` to `FixedSizeList`: the underlying data type is cast. If safe is true and a list element
579+
/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
580+
/// * Primitive to `List`: a list array with 1 value per slot is created
581+
/// * `Date32` and `Date64`: precision lost when going to higher interval
582+
/// * `Time32 and `Time64`: precision lost when going to higher interval
583+
/// * `Timestamp` and `Date{32|64}`: precision lost when going to higher interval
584+
/// * Temporal to/from backing Primitive: zero-copy with data type change
585+
/// * `Float32/Float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
586+
/// (i.e. casting `6.4999` to `Decimal(10, 1)` becomes `6.5`).
588587
///
589588
/// Unsupported Casts (check with `can_cast_types` before calling):
590589
/// * To or from `StructArray`
591-
/// * List to primitive
592-
/// * Interval and duration
590+
/// * `List` to `Primitive`
591+
/// * `Interval` and `Duration`
593592
///
594593
/// # Timestamps and Timezones
595594
///

arrow-cast/src/parse.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,12 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
285285
/// variants and converts it to nanoseconds since midnight.
286286
///
287287
/// Examples of accepted inputs:
288+
///
288289
/// * `09:26:56.123 AM`
289290
/// * `23:59:59`
290291
/// * `6:00 pm`
291-
//
292-
/// Internally, this function uses the `chrono` library for the
293-
/// time parsing
292+
///
293+
/// Internally, this function uses the `chrono` library for the time parsing
294294
///
295295
/// ## Timezone / Offset Handling
296296
///

arrow-data/src/data.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,8 +1179,10 @@ impl ArrayData {
11791179
///
11801180
/// Does not (yet) check
11811181
/// 1. Union type_ids are valid see [#85](https://github.com/apache/arrow-rs/issues/85)
1182-
/// Validates the the null count is correct and that any
1183-
/// nullability requirements of its children are correct
1182+
/// 2. the the null count is correct and that any
1183+
/// 3. nullability requirements of its children are correct
1184+
///
1185+
/// [#85]: https://github.com/apache/arrow-rs/issues/85
11841186
pub fn validate_nulls(&self) -> Result<(), ArrowError> {
11851187
if let Some(nulls) = &self.nulls {
11861188
let actual = nulls.len() - nulls.inner().count_set_bits();

arrow-data/src/equal/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,17 +138,24 @@ fn equal_range(
138138
}
139139

140140
/// Logically compares two [ArrayData].
141+
///
141142
/// Two arrays are logically equal if and only if:
142143
/// * their data types are equal
143144
/// * their lengths are equal
144145
/// * their null counts are equal
145146
/// * their null bitmaps are equal
146147
/// * each of their items are equal
147-
/// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
148+
///
149+
/// Two items are equal when their in-memory representation is physically equal
150+
/// (i.e. has the same bit content).
151+
///
148152
/// The physical comparison depend on the data type.
153+
///
149154
/// # Panics
150-
/// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
151-
/// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)
155+
///
156+
/// This function may panic whenever any of the [ArrayData] does not follow the
157+
/// Arrow specification. (e.g. wrong number of buffers, buffer `len` does not
158+
/// correspond to the declared `len`)
152159
pub fn equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
153160
utils::base_equal(lhs, rhs)
154161
&& lhs.null_count() == rhs.null_count()

arrow-flight/src/decode.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,8 @@ impl futures::Stream for FlightRecordBatchStream {
225225
/// Example usecases
226226
///
227227
/// 1. Using this low level stream it is possible to receive a steam
228-
/// of RecordBatches in FlightData that have different schemas by
229-
/// handling multiple schema messages separately.
228+
/// of RecordBatches in FlightData that have different schemas by
229+
/// handling multiple schema messages separately.
230230
pub struct FlightDataDecoder {
231231
/// Underlying data stream
232232
response: BoxStream<'static, Result<FlightData>>,

arrow-flight/src/encode.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,14 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
3838
/// several have already been successfully produced.
3939
///
4040
/// # Caveats
41-
/// 1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`], [`DictionaryArray`](arrow_array::array::DictionaryArray)s
42-
/// are converted to their underlying types prior to transport.
43-
/// When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
44-
/// [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
45-
/// See <https://github.com/apache/arrow-rs/issues/3389>.
41+
/// 1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`],
42+
/// [`DictionaryArray`]s are converted to their underlying types prior to
43+
/// transport.
44+
/// When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
45+
/// [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
46+
/// See <https://github.com/apache/arrow-rs/issues/3389>.
47+
///
48+
/// [`DictionaryArray`]: arrow_array::array::DictionaryArray
4649
///
4750
/// # Example
4851
/// ```no_run

arrow-flight/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
//! This crate contains:
2727
//!
2828
//! 1. Low level [prost] generated structs
29-
//! for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
30-
//! [`Location`] and [`Ticket`].
29+
//! for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
30+
//! [`Location`] and [`Ticket`].
3131
//!
3232
//! 2. Low level [tonic] generated [`flight_service_client`] and
33-
//! [`flight_service_server`].
33+
//! [`flight_service_server`].
3434
//!
3535
//! 3. Experimental support for [Flight SQL] in [`sql`]. Requires the
36-
//! `flight-sql-experimental` feature of this crate to be activated.
36+
//! `flight-sql-experimental` feature of this crate to be activated.
3737
//!
3838
//! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
3939
#![allow(rustdoc::invalid_html_tags)]

arrow-select/src/concat.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,13 +612,13 @@ mod tests {
612612
fn test_string_dictionary_merge() {
613613
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
614614
for i in 0..20 {
615-
builder.append(&i.to_string()).unwrap();
615+
builder.append(i.to_string()).unwrap();
616616
}
617617
let input_1 = builder.finish();
618618

619619
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
620620
for i in 0..30 {
621-
builder.append(&i.to_string()).unwrap();
621+
builder.append(i.to_string()).unwrap();
622622
}
623623
let input_2 = builder.finish();
624624

arrow-string/src/substring.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ use std::sync::Arc;
3434
/// # Arguments
3535
///
3636
/// * `start` - The start index of all substrings.
37-
/// If `start >= 0`, then count from the start of the string,
38-
/// otherwise count from the end of the string.
37+
/// If `start >= 0`, then count from the start of the string,
38+
/// otherwise count from the end of the string.
3939
///
4040
/// * `length`(option) - The length of all substrings.
41-
/// If `length` is [None], then the substring is from `start` to the end of the string.
41+
/// If `length` is [None], then the substring is from `start` to the end of the string.
4242
///
4343
/// Attention: Both `start` and `length` are counted by byte, not by char.
4444
///
@@ -53,10 +53,13 @@ use std::sync::Arc;
5353
/// ```
5454
///
5555
/// # Error
56-
/// - The function errors when the passed array is not a [`GenericStringArray`], [`GenericBinaryArray`], [`FixedSizeBinaryArray`]
57-
/// or [`DictionaryArray`] with supported array type as its value type.
58-
/// - The function errors if the offset of a substring in the input array is at invalid char boundary (only for \[Large\]String array).
59-
/// It is recommended to use [`substring_by_char`] if the input array may contain non-ASCII chars.
56+
/// - The function errors when the passed array is not a [`GenericStringArray`],
57+
/// [`GenericBinaryArray`], [`FixedSizeBinaryArray`] or [`DictionaryArray`]
58+
/// with supported array type as its value type.
59+
/// - The function errors if the offset of a substring in the input array is
60+
/// at invalid char boundary (only for \[Large\]String array).
61+
/// It is recommended to use [`substring_by_char`] if the input array may
62+
/// contain non-ASCII chars.
6063
///
6164
/// ## Example of trying to get an invalid utf-8 format substring
6265
/// ```
@@ -155,22 +158,25 @@ pub fn substring(
155158
}
156159
}
157160

161+
/// Substrings based on character index
162+
///
158163
/// # Arguments
159164
/// * `array` - The input string array
160165
///
161166
/// * `start` - The start index of all substrings.
162-
/// If `start >= 0`, then count from the start of the string,
163-
/// otherwise count from the end of the string.
167+
/// If `start >= 0`, then count from the start of the string,
168+
/// otherwise count from the end of the string.
164169
///
165170
/// * `length`(option) - The length of all substrings.
166-
/// If `length` is `None`, then the substring is from `start` to the end of the string.
171+
/// If `length` is `None`, then the substring is from `start` to the end of the string.
167172
///
168173
/// Attention: Both `start` and `length` are counted by char.
169174
///
170175
/// # Performance
171-
/// This function is slower than [substring].
172-
/// Theoretically, the time complexity is `O(n)` where `n` is the length of the value buffer.
173-
/// It is recommended to use [substring] if the input array only contains ASCII chars.
176+
///
177+
/// This function is slower than [substring]. Theoretically, the time complexity
178+
/// is `O(n)` where `n` is the length of the value buffer. It is recommended to
179+
/// use [substring] if the input array only contains ASCII chars.
174180
///
175181
/// # Basic usage
176182
/// ```
@@ -396,7 +402,7 @@ mod tests {
396402
/// A helper macro to test the substring functions.
397403
/// # Arguments
398404
/// * `cases` - The test cases which is a vector of `(input, start, len, result)`.
399-
/// Please look at [`gen_test_cases`] to find how to generate it.
405+
/// Please look at [`gen_test_cases`] to find how to generate it.
400406
/// * `array_ty` - The array type.
401407
/// * `substring_fn` - Either [`substring`] or [`substring_by_char`].
402408
macro_rules! do_test {

arrow/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ pyarrow = ["pyo3", "ffi"]
7575
# force_validate runs full data validation for all arrays that are created
7676
# this is not enabled by default as it is too computationally expensive
7777
# but is run as part of our CI checks
78-
force_validate = ["arrow-data/force_validate"]
78+
force_validate = ["arrow-array/force_validate", "arrow-data/force_validate"]
7979
# Enable ffi support
8080
ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"]
8181
chrono-tz = ["arrow-array/chrono-tz"]

parquet/src/arrow/arrow_reader/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,11 +353,11 @@ impl ArrowReaderOptions {
353353
/// This structure allows
354354
///
355355
/// 1. Loading metadata for a file once and then using that same metadata to
356-
/// construct multiple separate readers, for example, to distribute readers
357-
/// across multiple threads
356+
/// construct multiple separate readers, for example, to distribute readers
357+
/// across multiple threads
358358
///
359359
/// 2. Using a cached copy of the [`ParquetMetadata`] rather than reading it
360-
/// from the file each time a reader is constructed.
360+
/// from the file each time a reader is constructed.
361361
///
362362
/// [`ParquetMetadata`]: crate::file::metadata::ParquetMetaData
363363
#[derive(Debug, Clone)]
@@ -553,10 +553,10 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
553553
/// This interface allows:
554554
///
555555
/// 1. Loading metadata once and using it to create multiple builders with
556-
/// potentially different settings or run on different threads
556+
/// potentially different settings or run on different threads
557557
///
558558
/// 2. Using a cached copy of the metadata rather than re-reading it from the
559-
/// file each time a reader is constructed.
559+
/// file each time a reader is constructed.
560560
///
561561
/// See the docs on [`ArrowReaderMetadata`] for more details
562562
///

0 commit comments

Comments
 (0)