diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index 142f1017287f..4153ad49edac 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -261,12 +261,11 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) { return; } - uint32_t defineLength = maxDefine_ > 0 - ? pageHeader.data_page_header_v2.definition_levels_byte_length - : 0; - uint32_t repeatLength = maxRepeat_ > 0 - ? pageHeader.data_page_header_v2.repetition_levels_byte_length - : 0; + uint32_t defineLength = + pageHeader.data_page_header_v2.definition_levels_byte_length; + uint32_t repeatLength = + pageHeader.data_page_header_v2.repetition_levels_byte_length; + auto bytes = pageHeader.compressed_page_size; pageData_ = readBytes(bytes, pageBuffer_); diff --git a/velox/dwio/parquet/tests/examples/v2_page.parquet b/velox/dwio/parquet/tests/examples/v2_page.parquet new file mode 100644 index 000000000000..4275539bc1cd Binary files /dev/null and b/velox/dwio/parquet/tests/examples/v2_page.parquet differ diff --git a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp index 5488c0c7d3d9..1a535a148325 100644 --- a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp @@ -1156,3 +1156,28 @@ TEST_F(ParquetReaderTest, readVarbinaryFromFLBA) { result->as()->childAt(0)->asFlatVector()->valueAt( 0)); } + +TEST_F(ParquetReaderTest, testV2PageWithZeroMaxDefRep) { + // enum_type.parquet contains 1 column (ENUM) with 3 rows. + const std::string sample(getExampleFilePath("v2_page.parquet")); + + facebook::velox::dwio::common::ReaderOptions readerOptions{leafPool_.get()}; + auto reader = createReader(sample, readerOptions); + EXPECT_EQ(reader->numberOfRows(), 5ULL); + + auto rowType = reader->typeWithId(); + EXPECT_EQ(rowType->type()->kind(), TypeKind::ROW); + EXPECT_EQ(rowType->size(), 1ULL); + + EXPECT_EQ(rowType->childAt(0)->type()->kind(), TypeKind::BIGINT); + + auto outputRowType = ROW({"regionkey"}, {BIGINT()}); + auto rowReaderOpts = getReaderOpts(outputRowType); + rowReaderOpts.setScanSpec(makeScanSpec(outputRowType)); + auto rowReader = reader->createRowReader(rowReaderOpts); + + auto expected = makeRowVector({makeFlatVector({0, 1, 2, 3, 4})}); + + assertReadWithReaderAndExpected( + outputRowType, *rowReader, expected, *leafPool_); +}