Skip to content

Commit 059b4e4

Browse files
committed
Address MetadataLoader clippy errors, correct docs + add a suffix example
1 parent 76a4633 commit 059b4e4

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

parquet/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ serde_json = { version = "1.0", features = ["std"], default-features = false }
8282
arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] }
8383
tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "io-util", "fs"] }
8484
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
85-
object_store = { version = "0.10.0", default-features = false, features = ["azure"] }
85+
object_store = { version = "0.10.0", default-features = false, features = ["azure", "aws"] }
8686

8787
# TODO: temporary to fix parquet wasm build
8888
# upstream issue: https://github.com/gyscos/zstd-rs/issues/269

parquet/src/arrow/async_reader/metadata.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
6565
let metadata_offset = length + 8;
6666
let meta = fetch.fetch(GetRange::Suffix(metadata_offset)).await?;
6767
let slice = &meta[0..length];
68-
(decode_metadata(&slice)?, None)
68+
(decode_metadata(slice)?, None)
6969
} else {
7070
let metadata_offset = length + 8;
7171
let metadata_start = suffix_len - metadata_offset;
@@ -293,9 +293,7 @@ mod tests {
293293
let range = match range {
294294
GetRange::Bounded(range) => range,
295295
GetRange::Offset(offset) => offset..file_size,
296-
GetRange::Suffix(end_offset) => {
297-
file_size.saturating_sub(end_offset.try_into().unwrap())..file_size
298-
}
296+
GetRange::Suffix(end_offset) => file_size.saturating_sub(end_offset)..file_size,
299297
};
300298
file.seek(SeekFrom::Start(range.start as _))?;
301299
let len = range.end - range.start;

parquet/src/arrow/async_reader/store.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,35 @@ impl From<GetRange> for object_store::GetRange {
4747
/// # use std::io::stdout;
4848
/// # use std::sync::Arc;
4949
/// # use object_store::azure::MicrosoftAzureBuilder;
50+
/// # use object_store::aws::AmazonS3Builder;
5051
/// # use object_store::ObjectStore;
5152
/// # use object_store::path::Path;
5253
/// # use parquet::arrow::async_reader::ParquetObjectReader;
5354
/// # use parquet::arrow::ParquetRecordBatchStreamBuilder;
5455
/// # use parquet::schema::printer::print_parquet_metadata;
5556
/// # async fn run() {
57+
/// // Object Stores that support suffix ranges:
58+
/// // Populate configuration from environment
59+
/// let storage_container = Arc::new(AmazonS3Builder::from_env().build().unwrap());
60+
/// let location = Path::from("path/to/blob.parquet");
61+
///
62+
/// // Show Parquet metadata
63+
/// let reader = ParquetObjectReader::new(storage_container, location);
64+
/// let builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
65+
/// print_parquet_metadata(&mut stdout(), builder.metadata());
66+
/// # }
67+
/// # async fn run_non_suffixed() {
5668
/// // Populate configuration from environment
5769
/// let storage_container = Arc::new(MicrosoftAzureBuilder::from_env().build().unwrap());
5870
/// let location = Path::from("path/to/blob.parquet");
5971
/// let meta = storage_container.head(&location).await.unwrap();
6072
/// println!("Found Blob with {}B at {}", meta.size, meta.location);
6173
///
6274
/// // Show Parquet metadata
63-
/// let reader = ParquetObjectReader::new(storage_container, meta);
75+
/// let reader = ParquetObjectReader::new(storage_container, location).with_file_size(meta.size);
6476
/// let builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
6577
/// print_parquet_metadata(&mut stdout(), builder.metadata());
78+
///
6679
/// # }
6780
/// ```
6881
#[derive(Clone, Debug)]
@@ -76,9 +89,9 @@ pub struct ParquetObjectReader {
7689
}
7790

7891
impl ParquetObjectReader {
79-
/// Creates a new [`ParquetObjectReader`] for the provided [`ObjectStore`] and [`ObjectMeta`]
92+
/// Creates a new [`ParquetObjectReader`] for the provided [`ObjectStore`] and [`Path`]
8093
///
81-
/// [`ObjectMeta`] can be obtained using [`ObjectStore::list`] or [`ObjectStore::head`]
94+
/// [file_size] can be obtained using [`ObjectStore::list`] or [`ObjectStore::head`]
8295
pub fn new(store: Arc<dyn ObjectStore>, location: Path) -> Self {
8396
Self {
8497
store,

0 commit comments

Comments
 (0)