Skip to content

Commit 5595019

Browse files
authored
chore: add docs, part of #37 (apache#6496)
- add pragma `#![warn(missing_docs)]` to `parquet` This is the final component in the effort to make Arrow fully-documented. The entire project now generates warning for missing docs, if any. - `arrow-flight`: replace `tonic`'s deprecated `compile_with_config` with suggested method - new deprecation: The following types were not used anywhere and were possibly strays. They've been marked as deprecated and will be removed in future versions. - `parquet::data_types::SliceAsBytesDataType` - `parquet::column::writer::Level`
1 parent d030177 commit 5595019

File tree

24 files changed

+262
-22
lines changed

24 files changed

+262
-22
lines changed

arrow-flight/gen/src/main.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
2626
let proto_path = Path::new("../format/Flight.proto");
2727

2828
tonic_build::configure()
29-
// protoc in unbuntu builder needs this option
29+
// protoc in Ubuntu builder needs this option
3030
.protoc_arg("--experimental_allow_proto3_optional")
3131
.out_dir("src")
3232
.compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?;
@@ -37,7 +37,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3737
.open("src/arrow.flight.protocol.rs")?;
3838
let mut buffer = String::new();
3939
file.read_to_string(&mut buffer)?;
40-
// append warning that file was auto-generate
40+
// append warning that file was auto-generated
4141
let mut file = OpenOptions::new()
4242
.write(true)
4343
.truncate(true)
@@ -49,7 +49,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
4949
let proto_path = Path::new("../format/FlightSql.proto");
5050

5151
tonic_build::configure()
52-
// protoc in ubuntu builder needs this option
52+
// protoc in Ubuntu builder needs this option
5353
.protoc_arg("--experimental_allow_proto3_optional")
5454
.out_dir("src/sql")
5555
.compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?;

parquet/src/arrow/async_reader/metadata.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use std::ops::Range;
2929

3030
/// A data source that can be used with [`MetadataLoader`] to load [`ParquetMetaData`]
3131
pub trait MetadataFetch {
32+
/// Fetches a range of bytes asynchronously
3233
fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>>;
3334
}
3435

parquet/src/basic.rs

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,21 @@ pub use crate::format::{
4747
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
4848
#[allow(non_camel_case_types)]
4949
pub enum Type {
50+
/// A boolean value.
5051
BOOLEAN,
52+
/// 32-bit signed integer.
5153
INT32,
54+
/// 64-bit signed integer.
5255
INT64,
56+
/// 96-bit signed integer for timestamps.
5357
INT96,
58+
/// IEEE 754 single-precision floating point value.
5459
FLOAT,
60+
/// IEEE 754 double-precision floating point value.
5561
DOUBLE,
62+
/// Arbitrary length byte array.
5663
BYTE_ARRAY,
64+
/// Fixed length byte array.
5765
FIXED_LEN_BYTE_ARRAY,
5866
}
5967

@@ -70,6 +78,7 @@ pub enum Type {
7078
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7179
#[allow(non_camel_case_types)]
7280
pub enum ConvertedType {
81+
/// No type conversion.
7382
NONE,
7483
/// A BYTE_ARRAY actually contains UTF8 encoded chars.
7584
UTF8,
@@ -171,31 +180,53 @@ pub enum ConvertedType {
171180
/// [`ConvertedType`]. Please see the README.md for more details.
172181
#[derive(Debug, Clone, PartialEq, Eq)]
173182
pub enum LogicalType {
183+
/// A UTF8 encoded string.
174184
String,
185+
/// A map of key-value pairs.
175186
Map,
187+
/// A list of elements.
176188
List,
189+
/// A set of predefined values.
177190
Enum,
191+
/// A decimal value with a specified scale and precision.
178192
Decimal {
193+
/// The number of digits in the decimal.
179194
scale: i32,
195+
/// The location of the decimal point.
180196
precision: i32,
181197
},
198+
/// A date stored as days since Unix epoch.
182199
Date,
200+
/// A time stored as [`TimeUnit`] since midnight.
183201
Time {
202+
/// Whether the time is adjusted to UTC.
184203
is_adjusted_to_u_t_c: bool,
204+
/// The unit of time.
185205
unit: TimeUnit,
186206
},
207+
/// A timestamp stored as [`TimeUnit`] since Unix epoch.
187208
Timestamp {
209+
/// Whether the timestamp is adjusted to UTC.
188210
is_adjusted_to_u_t_c: bool,
211+
/// The unit of time.
189212
unit: TimeUnit,
190213
},
214+
/// An integer with a specified bit width and signedness.
191215
Integer {
216+
/// The number of bits in the integer.
192217
bit_width: i8,
218+
/// Whether the integer is signed.
193219
is_signed: bool,
194220
},
221+
/// An unknown logical type.
195222
Unknown,
223+
/// A JSON document.
196224
Json,
225+
/// A BSON document.
197226
Bson,
227+
/// A UUID.
198228
Uuid,
229+
/// A 16-bit floating point number.
199230
Float16,
200231
}
201232

@@ -350,13 +381,21 @@ impl FromStr for Encoding {
350381
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
351382
#[allow(non_camel_case_types)]
352383
pub enum Compression {
384+
/// No compression.
353385
UNCOMPRESSED,
386+
/// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
354387
SNAPPY,
388+
/// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
355389
GZIP(GzipLevel),
390+
/// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
356391
LZO,
392+
/// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
357393
BROTLI(BrotliLevel),
394+
/// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
358395
LZ4,
396+
/// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
359397
ZSTD(ZstdLevel),
398+
/// [LZ4 compression](https://lz4.org/).
360399
LZ4_RAW,
361400
}
362401

@@ -447,16 +486,20 @@ impl FromStr for Compression {
447486
}
448487

449488
// ----------------------------------------------------------------------
450-
// Mirrors `parquet::PageType`
451-
489+
/// Mirrors [parquet::PageType]
490+
///
452491
/// Available data pages for Parquet file format.
453492
/// Note that some of the page types may not be supported.
454493
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455494
#[allow(non_camel_case_types)]
456495
pub enum PageType {
496+
/// Data page Parquet 1.0
457497
DATA_PAGE,
498+
/// Index page
458499
INDEX_PAGE,
500+
/// Dictionary page
459501
DICTIONARY_PAGE,
502+
/// Data page Parquet 2.0
460503
DATA_PAGE_V2,
461504
}
462505

parquet/src/column/page.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,29 +31,51 @@ use crate::format::PageHeader;
3131
/// used to store uncompressed bytes of the page.
3232
#[derive(Clone)]
3333
pub enum Page {
34+
/// Data page Parquet format v1.
3435
DataPage {
36+
/// The underlying data buffer
3537
buf: Bytes,
38+
/// Number of values in this page
3639
num_values: u32,
40+
/// Encoding for values in this page
3741
encoding: Encoding,
42+
/// Definition level encoding
3843
def_level_encoding: Encoding,
44+
/// Repetition level encoding
3945
rep_level_encoding: Encoding,
46+
/// Optional statistics for this page
4047
statistics: Option<Statistics>,
4148
},
49+
/// Data page Parquet format v2.
4250
DataPageV2 {
51+
/// The underlying data buffer
4352
buf: Bytes,
53+
/// Number of values in this page
4454
num_values: u32,
55+
/// Encoding for values in this page
4556
encoding: Encoding,
57+
/// Number of null values in this page
4658
num_nulls: u32,
59+
/// Number of rows in this page
4760
num_rows: u32,
61+
/// Length of definition levels
4862
def_levels_byte_len: u32,
63+
/// Length of repetition levels
4964
rep_levels_byte_len: u32,
65+
/// Is this page compressed
5066
is_compressed: bool,
67+
/// Optional statistics for this page
5168
statistics: Option<Statistics>,
5269
},
70+
/// Dictionary page.
5371
DictionaryPage {
72+
/// The underlying data buffer
5473
buf: Bytes,
74+
/// Number of values in this page
5575
num_values: u32,
76+
/// Encoding for values in this page
5677
encoding: Encoding,
78+
/// Is dictionary page sorted
5779
is_sorted: bool,
5880
},
5981
}
@@ -235,11 +257,17 @@ impl CompressedPage {
235257

236258
/// Contains page write metrics.
237259
pub struct PageWriteSpec {
260+
/// The type of page being written
238261
pub page_type: PageType,
262+
/// The total size of the page, before compression
239263
pub uncompressed_size: usize,
264+
/// The compressed size of the page
240265
pub compressed_size: usize,
266+
/// The number of values in the page
241267
pub num_values: u32,
268+
/// The offset of the page in the column chunk
242269
pub offset: u64,
270+
/// The number of bytes written to the underlying sink
243271
pub bytes_written: u64,
244272
}
245273

parquet/src/column/reader.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,21 @@ pub(crate) mod decoder;
3434

3535
/// Column reader for a Parquet type.
3636
pub enum ColumnReader {
37+
/// Column reader for boolean type
3738
BoolColumnReader(ColumnReaderImpl<BoolType>),
39+
/// Column reader for int32 type
3840
Int32ColumnReader(ColumnReaderImpl<Int32Type>),
41+
/// Column reader for int64 type
3942
Int64ColumnReader(ColumnReaderImpl<Int64Type>),
43+
/// Column reader for int96 type
4044
Int96ColumnReader(ColumnReaderImpl<Int96Type>),
45+
/// Column reader for float type
4146
FloatColumnReader(ColumnReaderImpl<FloatType>),
47+
/// Column reader for double type
4248
DoubleColumnReader(ColumnReaderImpl<DoubleType>),
49+
/// Column reader for byte array type
4350
ByteArrayColumnReader(ColumnReaderImpl<ByteArrayType>),
51+
/// Column reader for fixed length byte array type
4452
FixedLenByteArrayColumnReader(ColumnReaderImpl<FixedLenByteArrayType>),
4553
}
4654

parquet/src/column/writer/mod.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,21 @@ macro_rules! downcast_writer {
6161

6262
/// Column writer for a Parquet type.
6363
pub enum ColumnWriter<'a> {
64+
/// Column writer for boolean type
6465
BoolColumnWriter(ColumnWriterImpl<'a, BoolType>),
66+
/// Column writer for int32 type
6567
Int32ColumnWriter(ColumnWriterImpl<'a, Int32Type>),
68+
/// Column writer for int64 type
6669
Int64ColumnWriter(ColumnWriterImpl<'a, Int64Type>),
70+
/// Column writer for int96 (timestamp) type
6771
Int96ColumnWriter(ColumnWriterImpl<'a, Int96Type>),
72+
/// Column writer for float type
6873
FloatColumnWriter(ColumnWriterImpl<'a, FloatType>),
74+
/// Column writer for double type
6975
DoubleColumnWriter(ColumnWriterImpl<'a, DoubleType>),
76+
/// Column writer for byte array type
7077
ByteArrayColumnWriter(ColumnWriterImpl<'a, ByteArrayType>),
78+
/// Column writer for fixed length byte array type
7179
FixedLenByteArrayColumnWriter(ColumnWriterImpl<'a, FixedLenByteArrayType>),
7280
}
7381

@@ -90,6 +98,11 @@ impl<'a> ColumnWriter<'a> {
9098
}
9199
}
92100

101+
#[deprecated(
102+
since = "54.0.0",
103+
note = "Seems like a stray and nobody knows what's it for. Will be removed in the next release."
104+
)]
105+
#[allow(missing_docs)]
93106
pub enum Level {
94107
Page,
95108
Column,
@@ -309,6 +322,7 @@ impl<T: Default> ColumnMetrics<T> {
309322
/// Typed column writer for a primitive column.
310323
pub type ColumnWriterImpl<'a, T> = GenericColumnWriter<'a, ColumnValueEncoderImpl<T>>;
311324

325+
/// Generic column writer for a primitive column.
312326
pub struct GenericColumnWriter<'a, E: ColumnValueEncoder> {
313327
// Column writer properties
314328
descr: ColumnDescPtr,
@@ -344,6 +358,7 @@ pub struct GenericColumnWriter<'a, E: ColumnValueEncoder> {
344358
}
345359

346360
impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
361+
/// Returns a new instance of [`GenericColumnWriter`].
347362
pub fn new(
348363
descr: ColumnDescPtr,
349364
props: WriterPropertiesPtr,

0 commit comments

Comments
 (0)