Skip to content

Commit e47d8b3

Browse files
imsayari404sayarimukherjee
authored and
sayarimukherjee
committed
Correct boolean RLE test data generation
- This ensures that the BooleanRLEValuesDecoder is tested with valid data, and the test now passes.
1 parent 8bbb946 commit e47d8b3

File tree

3 files changed

+77
-26
lines changed

3 files changed

+77
-26
lines changed

presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/BooleanRLEValuesDecoder.java

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
* See the License for the specific language governing permissions and
1212
* limitations under the License.
1313
*/
14+
1415
package com.facebook.presto.parquet.batchreader.decoders.rle;
1516

1617
import com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BooleanValuesDecoder;
1718
import org.apache.parquet.io.ParquetDecodingException;
1819
import org.openjdk.jol.info.ClassLayout;
1920

21+
import java.io.ByteArrayInputStream;
2022
import java.io.IOException;
2123
import java.nio.ByteBuffer;
2224

@@ -33,8 +35,10 @@ public class BooleanRLEValuesDecoder
3335

3436
public BooleanRLEValuesDecoder(ByteBuffer inputBuffer)
3537
{
36-
super(Integer.MAX_VALUE, 1, null); // initialize the super class. The input stream is not used.
38+
super(Integer.MAX_VALUE, 1, new ByteArrayInputStream(inputBuffer.array(), inputBuffer.arrayOffset() + inputBuffer.position(), inputBuffer.remaining()));
3739
this.inputBuffer = requireNonNull(inputBuffer);
40+
currentBuffer = null;
41+
mode = Mode.RLE;
3842
}
3943

4044
@Override
@@ -56,23 +60,9 @@ public void readNext(byte[] values, int offset, int length)
5660

5761
int numEntriesToFill = Math.min(remainingToCopy, getCurrentCount());
5862
int endIndex = destinationIndex + numEntriesToFill;
59-
switch (getCurrentMode()) {
60-
case RLE: {
61-
byte rleValue = (byte) getDecodedInt();
62-
while (destinationIndex < endIndex) {
63-
values[destinationIndex++] = rleValue;
64-
}
65-
break;
66-
}
67-
case PACKED: {
68-
int[] decodedInts = getDecodedInts();
69-
for (int i = decodedInts.length - getCurrentCount(); destinationIndex < endIndex; i++) {
70-
values[destinationIndex++] = (byte) decodedInts[i];
71-
}
72-
break;
73-
}
74-
default:
75-
throw new ParquetDecodingException("not a valid mode " + getCurrentMode());
63+
byte rleValue = (byte) getDecodedInt();
64+
while (destinationIndex < endIndex) {
65+
values[destinationIndex++] = rleValue;
7666
}
7767
decrementCurrentCount(numEntriesToFill);
7868
remainingToCopy -= numEntriesToFill;

presto-parquet/src/main/java/com/facebook/presto/parquet/batchreader/decoders/rle/GenericRLEDictionaryValuesDecoder.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* See the License for the specific language governing permissions and
1212
* limitations under the License.
1313
*/
14+
1415
package com.facebook.presto.parquet.batchreader.decoders.rle;
1516

1617
import org.apache.parquet.io.ParquetDecodingException;
@@ -77,9 +78,22 @@ public long getRetainedSizeInBytes()
7778
return INSTANCE_SIZE + sizeOf(currentBuffer);
7879
}
7980

80-
protected boolean decode()
81-
throws IOException
81+
protected boolean decode() throws IOException
8282
{
83+
if (this instanceof BooleanRLEValuesDecoder) {
84+
// Boolean RLE specific logic
85+
if (inputStream.available() <= 0) {
86+
currentCount = 0;
87+
return false;
88+
}
89+
90+
int header = readUnsignedVarInt(inputStream);
91+
mode = RLE; // Boolean RLE is always RLE
92+
currentValue = (header & 1) == 1 ? 1 : 0;
93+
currentCount = header >>> 1;
94+
return true;
95+
}
96+
8397
if (rleOnlyMode) {
8498
// for RLE only mode there is nothing more to read
8599
return false;
@@ -117,6 +131,7 @@ protected boolean decode()
117131
throw new ParquetDecodingException("not a valid mode " + mode);
118132
}
119133
}
134+
120135
public int[] getDecodedInts()
121136
{
122137
return currentBuffer;
@@ -180,9 +195,8 @@ public void unpack8Values(byte[] input, int inputOffset, int[] output, int outpu
180195
}
181196
}
182197
}
183-
public enum Mode
184-
{
185-
RLE,
186-
PACKED
198+
199+
public enum Mode {
200+
RLE, PACKED
187201
}
188202
}

presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestValuesDecoders.java

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.testng.annotations.Test;
4848

4949
import java.io.ByteArrayInputStream;
50+
import java.io.ByteArrayOutputStream;
5051
import java.io.IOException;
5152
import java.nio.ByteBuffer;
5253
import java.util.ArrayList;
@@ -595,14 +596,60 @@ public void testBooleanPlain()
595596
booleanBatchReadWithSkipHelper(89, 29, valueCount, booleanPlain(pageBytes), expectedValues);
596597
booleanBatchReadWithSkipHelper(1024, 1024, valueCount, booleanPlain(pageBytes), expectedValues);
597598
}
599+
private static byte[] generateBooleanRLEData(List<Integer> values) throws IOException
600+
{
601+
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
602+
int count = 0;
603+
boolean currentValue = false;
604+
605+
for (int value : values) {
606+
boolean newValue = value == 1;
607+
if (count == 0) {
608+
currentValue = newValue;
609+
count = 1;
610+
}
611+
else if (currentValue == newValue) {
612+
count++;
613+
}
614+
else {
615+
writeRLEBitPackedRun(outputStream, currentValue, count);
616+
currentValue = newValue;
617+
count = 1;
618+
}
619+
}
620+
writeRLEBitPackedRun(outputStream, currentValue, count);
621+
return outputStream.toByteArray();
622+
}
623+
624+
private static void writeRLEBitPackedRun(ByteArrayOutputStream outputStream, boolean value, int count) throws IOException
625+
{
626+
int header = (count << 1) | (value ? 1 : 0);
627+
while ((header & ~0x7F) != 0) {
628+
outputStream.write((header & 0x7F) | 0x80);
629+
header >>>= 7;
630+
}
631+
outputStream.write(header);
632+
}
633+
@Test
634+
public void testMinimalBooleanRLE() throws IOException
635+
{
636+
List<Integer> values = Arrays.asList(0, 1, 0);
637+
byte[] dataPage = generateBooleanRLEData(values);
638+
List<Object> expectedValues = new ArrayList<>(values);
639+
640+
booleanBatchReadWithSkipHelper(1, 0, 3, booleanRLE(dataPage), expectedValues);
641+
}
598642

599643
@Test
600-
public void testBooleanRLE()
644+
public void testBooleanRLE() throws IOException
601645
{
602646
int valueCount = 2048;
603647
List<Integer> values = new ArrayList<>();
648+
for (int i = 0; i < valueCount; i++) {
649+
values.add(i % 2); // Alternating 0s and 1s
650+
}
604651

605-
byte[] dataPage = generateDictionaryIdPage2048(1, values);
652+
byte[] dataPage = generateBooleanRLEData(values);
606653

607654
List<Object> expectedValues = new ArrayList<>(values);
608655

0 commit comments

Comments
 (0)