Skip to content

Commit 2cc0c16

Browse files
authored
Deprecate methods from footer.rs in favor of ParquetMetaDataReader (#6451)
* deprecate methods from footer.rs * remove mention of parse_metadata from docs for ParquetMetaData
1 parent 50e9e49 commit 2cc0c16

File tree

8 files changed

+70
-225
lines changed

8 files changed

+70
-225
lines changed

parquet/src/arrow/async_reader/metadata.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
use crate::arrow::async_reader::AsyncFileReader;
1919
use crate::errors::{ParquetError, Result};
20-
use crate::file::footer::{decode_footer, decode_metadata};
21-
use crate::file::metadata::ParquetMetaData;
20+
use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
2221
use crate::file::page_index::index::Index;
2322
use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
2423
use crate::file::FOOTER_SIZE;
@@ -76,7 +75,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
7675
let mut footer = [0; FOOTER_SIZE];
7776
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
7877

79-
let length = decode_footer(&footer)?;
78+
let length = ParquetMetaDataReader::decode_footer(&footer)?;
8079

8180
if file_size < length + FOOTER_SIZE {
8281
return Err(ParquetError::EOF(format!(
@@ -90,13 +89,13 @@ impl<F: MetadataFetch> MetadataLoader<F> {
9089
let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
9190
let metadata_start = file_size - length - FOOTER_SIZE;
9291
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
93-
(decode_metadata(&meta)?, None)
92+
(ParquetMetaDataReader::decode_metadata(&meta)?, None)
9493
} else {
9594
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
9695

9796
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
9897
(
99-
decode_metadata(slice)?,
98+
ParquetMetaDataReader::decode_metadata(slice)?,
10099
Some((footer_start, suffix.slice(..metadata_start))),
101100
)
102101
};

parquet/src/arrow/async_reader/mod.rs

+46-19
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ use crate::bloom_filter::{
104104
};
105105
use crate::column::page::{PageIterator, PageReader};
106106
use crate::errors::{ParquetError, Result};
107-
use crate::file::footer::{decode_footer, decode_metadata};
108-
use crate::file::metadata::{ParquetMetaData, RowGroupMetaData};
107+
use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
109108
use crate::file::page_index::offset_index::OffsetIndexMetaData;
110109
use crate::file::reader::{ChunkReader, Length, SerializedPageReader};
111110
use crate::file::FOOTER_SIZE;
@@ -186,14 +185,14 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
186185
let mut buf = [0_u8; FOOTER_SIZE];
187186
self.read_exact(&mut buf).await?;
188187

189-
let metadata_len = decode_footer(&buf)?;
188+
let metadata_len = ParquetMetaDataReader::decode_footer(&buf)?;
190189
self.seek(SeekFrom::End(-FOOTER_SIZE_I64 - metadata_len as i64))
191190
.await?;
192191

193192
let mut buf = Vec::with_capacity(metadata_len);
194193
self.take(metadata_len as _).read_to_end(&mut buf).await?;
195194

196-
Ok(Arc::new(decode_metadata(&buf)?))
195+
Ok(Arc::new(ParquetMetaDataReader::decode_metadata(&buf)?))
197196
}
198197
.boxed()
199198
}
@@ -909,7 +908,7 @@ mod tests {
909908
};
910909
use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
911910
use crate::arrow::ArrowWriter;
912-
use crate::file::footer::parse_metadata;
911+
use crate::file::metadata::ParquetMetaDataReader;
913912
use crate::file::page_index::index_reader;
914913
use crate::file::properties::WriterProperties;
915914
use arrow::compute::kernels::cmp::eq;
@@ -952,7 +951,9 @@ mod tests {
952951
let path = format!("{testdata}/alltypes_plain.parquet");
953952
let data = Bytes::from(std::fs::read(path).unwrap());
954953

955-
let metadata = parse_metadata(&data).unwrap();
954+
let metadata = ParquetMetaDataReader::new()
955+
.parse_and_finish(&data)
956+
.unwrap();
956957
let metadata = Arc::new(metadata);
957958

958959
assert_eq!(metadata.num_row_groups(), 1);
@@ -1007,7 +1008,9 @@ mod tests {
10071008
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
10081009
let data = Bytes::from(std::fs::read(path).unwrap());
10091010

1010-
let metadata = parse_metadata(&data).unwrap();
1011+
let metadata = ParquetMetaDataReader::new()
1012+
.parse_and_finish(&data)
1013+
.unwrap();
10111014
let metadata = Arc::new(metadata);
10121015

10131016
assert_eq!(metadata.num_row_groups(), 1);
@@ -1073,7 +1076,9 @@ mod tests {
10731076
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
10741077
let data = Bytes::from(std::fs::read(path).unwrap());
10751078

1076-
let metadata = parse_metadata(&data).unwrap();
1079+
let metadata = ParquetMetaDataReader::new()
1080+
.parse_and_finish(&data)
1081+
.unwrap();
10771082
let metadata = Arc::new(metadata);
10781083

10791084
assert_eq!(metadata.num_row_groups(), 1);
@@ -1117,7 +1122,9 @@ mod tests {
11171122
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
11181123
let data = Bytes::from(std::fs::read(path).unwrap());
11191124

1120-
let metadata = parse_metadata(&data).unwrap();
1125+
let metadata = ParquetMetaDataReader::new()
1126+
.parse_and_finish(&data)
1127+
.unwrap();
11211128
let metadata = Arc::new(metadata);
11221129

11231130
assert_eq!(metadata.num_row_groups(), 1);
@@ -1173,7 +1180,9 @@ mod tests {
11731180
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
11741181
let data = Bytes::from(std::fs::read(path).unwrap());
11751182

1176-
let metadata = parse_metadata(&data).unwrap();
1183+
let metadata = ParquetMetaDataReader::new()
1184+
.parse_and_finish(&data)
1185+
.unwrap();
11771186
let metadata = Arc::new(metadata);
11781187

11791188
assert_eq!(metadata.num_row_groups(), 1);
@@ -1238,7 +1247,9 @@ mod tests {
12381247
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
12391248
let data = Bytes::from(std::fs::read(path).unwrap());
12401249

1241-
let metadata = parse_metadata(&data).unwrap();
1250+
let metadata = ParquetMetaDataReader::new()
1251+
.parse_and_finish(&data)
1252+
.unwrap();
12421253
let metadata = Arc::new(metadata);
12431254

12441255
assert_eq!(metadata.num_row_groups(), 1);
@@ -1317,7 +1328,9 @@ mod tests {
13171328
writer.close().unwrap();
13181329

13191330
let data: Bytes = buf.into();
1320-
let metadata = parse_metadata(&data).unwrap();
1331+
let metadata = ParquetMetaDataReader::new()
1332+
.parse_and_finish(&data)
1333+
.unwrap();
13211334
let parquet_schema = metadata.file_metadata().schema_descr_ptr();
13221335

13231336
let test = TestReader {
@@ -1391,7 +1404,9 @@ mod tests {
13911404
writer.close().unwrap();
13921405

13931406
let data: Bytes = buf.into();
1394-
let metadata = parse_metadata(&data).unwrap();
1407+
let metadata = ParquetMetaDataReader::new()
1408+
.parse_and_finish(&data)
1409+
.unwrap();
13951410

13961411
assert_eq!(metadata.num_row_groups(), 2);
13971412

@@ -1479,7 +1494,9 @@ mod tests {
14791494
let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
14801495
let data = Bytes::from(std::fs::read(path).unwrap());
14811496

1482-
let metadata = parse_metadata(&data).unwrap();
1497+
let metadata = ParquetMetaDataReader::new()
1498+
.parse_and_finish(&data)
1499+
.unwrap();
14831500
let parquet_schema = metadata.file_metadata().schema_descr_ptr();
14841501
let metadata = Arc::new(metadata);
14851502

@@ -1529,7 +1546,9 @@ mod tests {
15291546
let path = format!("{testdata}/alltypes_tiny_pages.parquet");
15301547
let data = Bytes::from(std::fs::read(path).unwrap());
15311548

1532-
let metadata = parse_metadata(&data).unwrap();
1549+
let metadata = ParquetMetaDataReader::new()
1550+
.parse_and_finish(&data)
1551+
.unwrap();
15331552

15341553
let offset_index =
15351554
index_reader::read_offset_indexes(&data, metadata.row_group(0).columns())
@@ -1619,7 +1638,9 @@ mod tests {
16191638
let path = format!("{testdata}/alltypes_plain.parquet");
16201639
let data = Bytes::from(std::fs::read(path).unwrap());
16211640

1622-
let metadata = parse_metadata(&data).unwrap();
1641+
let metadata = ParquetMetaDataReader::new()
1642+
.parse_and_finish(&data)
1643+
.unwrap();
16231644
let file_rows = metadata.file_metadata().num_rows() as usize;
16241645
let metadata = Arc::new(metadata);
16251646

@@ -1764,7 +1785,9 @@ mod tests {
17641785
let testdata = arrow::util::test_util::parquet_test_data();
17651786
let path = format!("{testdata}/data_index_bloom_encoding_stats.parquet");
17661787
let data = Bytes::from(std::fs::read(path).unwrap());
1767-
let metadata = parse_metadata(&data).unwrap();
1788+
let metadata = ParquetMetaDataReader::new()
1789+
.parse_and_finish(&data)
1790+
.unwrap();
17681791
let metadata = Arc::new(metadata);
17691792
let async_reader = TestReader {
17701793
data: data.clone(),
@@ -1793,7 +1816,9 @@ mod tests {
17931816
}
17941817

17951818
async fn test_get_row_group_column_bloom_filter(data: Bytes, with_length: bool) {
1796-
let metadata = parse_metadata(&data).unwrap();
1819+
let metadata = ParquetMetaDataReader::new()
1820+
.parse_and_finish(&data)
1821+
.unwrap();
17971822
let metadata = Arc::new(metadata);
17981823

17991824
assert_eq!(metadata.num_row_groups(), 1);
@@ -1933,7 +1958,9 @@ mod tests {
19331958
writer.close().unwrap();
19341959

19351960
let data: Bytes = buf.into();
1936-
let metadata = parse_metadata(&data).unwrap();
1961+
let metadata = ParquetMetaDataReader::new()
1962+
.parse_and_finish(&data)
1963+
.unwrap();
19371964
let parquet_schema = metadata.file_metadata().schema_descr_ptr();
19381965

19391966
let test = TestReader {

parquet/src/bin/parquet-concat.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
use clap::Parser;
4040
use parquet::column::writer::ColumnCloseResult;
4141
use parquet::errors::{ParquetError, Result};
42+
use parquet::file::metadata::ParquetMetaDataReader;
4243
use parquet::file::properties::WriterProperties;
4344
use parquet::file::writer::SerializedFileWriter;
4445
use std::fs::File;
@@ -70,7 +71,7 @@ impl Args {
7071
.iter()
7172
.map(|x| {
7273
let reader = File::open(x)?;
73-
let metadata = parquet::file::footer::parse_metadata(&reader)?;
74+
let metadata = ParquetMetaDataReader::new().parse_and_finish(&reader)?;
7475
Ok((reader, metadata))
7576
})
7677
.collect::<Result<Vec<_>>>()?;

parquet/src/bin/parquet-layout.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use std::fs::File;
3737
use std::io::Read;
3838

3939
use clap::Parser;
40+
use parquet::file::metadata::ParquetMetaDataReader;
4041
use serde::Serialize;
4142
use thrift::protocol::TCompactInputProtocol;
4243

@@ -79,7 +80,7 @@ struct Page {
7980
}
8081

8182
fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
82-
let metadata = parquet::file::footer::parse_metadata(reader)?;
83+
let metadata = ParquetMetaDataReader::new().parse_and_finish(reader)?;
8384
let schema = metadata.file_metadata().schema_descr();
8485

8586
let row_groups = (0..metadata.num_row_groups())

0 commit comments

Comments
 (0)