Skip to content

Commit be347da

Browse files
committed
Raise an error for invalid repetition levels when delimiting records
1 parent f41f590 commit be347da

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

cpp/src/parquet/arrow/arrow_reader_writer_test.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,6 +3990,12 @@ TEST(TestArrowReaderAdHoc, CorruptedSchema) {
39903990
TryReadDataFile(path, ::arrow::StatusCode::IOError);
39913991
}
39923992

3993+
TEST(TestArrowReaderAdHoc, InvalidRepetitionLevels) {
3994+
// GH-45185 - Repetition levels start with 1 instead of 0
3995+
auto path = test::get_data_file("ARROW-GH-45185.parquet", /*is_good=*/false);
3996+
TryReadDataFile(path, ::arrow::StatusCode::IOError);
3997+
}
3998+
39933999
TEST(TestArrowReaderAdHoc, LARGE_MEMORY_TEST(LargeStringColumn)) {
39944000
// ARROW-3762
39954001
::arrow::StringBuilder builder;

cpp/src/parquet/column_reader.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,12 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
16031603
// another record start or exhausting the ColumnChunk
16041604
int64_t level = levels_position_;
16051605
if (at_record_start_) {
1606-
ARROW_DCHECK_EQ(0, rep_levels[levels_position_]);
1606+
if (rep_levels[levels_position_] != 0) {
1607+
std::stringstream ss;
1608+
ss << "The repetition level at the start of a record must be 0 but got "
1609+
<< rep_levels[levels_position_];
1610+
throw ParquetException(ss.str());
1611+
}
16071612
++levels_position_;
16081613
// We have decided to consume the level at this position; therefore we
16091614
// must advance until we find another record boundary

0 commit comments

Comments
 (0)