-
Notifications
You must be signed in to change notification settings - Fork 3.9k
GH-48194: [C++][Parquet] Fix arrow-bit-utility-test failure on s390x #48195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -365,6 +365,9 @@ inline bool BitReader::GetVlqInt(Int* v) { | |||||||||
| // In all case, we read a byte-aligned value, skipping remaining bits | ||||||||||
| const uint8_t* data = NULLPTR; | ||||||||||
| int max_size = 0; | ||||||||||
| #if ARROW_LITTLE_ENDIAN | ||||||||||
| // The data that we will pass to the LEB128 parser | ||||||||||
| // In all case, we read a byte-aligned value, skipping remaining bits | ||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
|
||||||||||
| // Number of bytes left in the buffered values, not including the current | ||||||||||
| // byte (i.e., there may be an additional fraction of a byte). | ||||||||||
|
|
@@ -381,6 +384,17 @@ inline bool BitReader::GetVlqInt(Int* v) { | |||||||||
| max_size = bytes_left(); | ||||||||||
| data = buffer_ + (max_bytes_ - max_size); | ||||||||||
| } | ||||||||||
| #else | ||||||||||
| // For VLQ reading, always read directly from buffer to avoid endianness issues | ||||||||||
| // with buffered_values_ on big-endian systems like s390x | ||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
| // Calculate current position in buffer accounting for bit offset | ||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
| const int current_byte_offset = byte_offset_ + bit_util::BytesForBits(bit_offset_); | ||||||||||
| const int bytes_left_in_buffer = max_bytes_ - current_byte_offset; | ||||||||||
|
|
||||||||||
| // Always read from buffer directly to avoid endianness issues | ||||||||||
| data = buffer_ + current_byte_offset; | ||||||||||
| max_size = bytes_left_in_buffer; | ||||||||||
|
Comment on lines
+391
to
+396
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this the same logic as arrow/cpp/src/arrow/util/bit_stream_utils_internal.h Lines 380 to 383 in 2fb2f79
If so, should we reuse it something like the following? diff --git a/cpp/src/arrow/util/bit_stream_utils_internal.h b/cpp/src/arrow/util/bit_stream_utils_internal.h
index d8c7317fe8..7352312782 100644
--- a/cpp/src/arrow/util/bit_stream_utils_internal.h
+++ b/cpp/src/arrow/util/bit_stream_utils_internal.h
@@ -366,6 +366,9 @@ inline bool BitReader::GetVlqInt(Int* v) {
const uint8_t* data = NULLPTR;
int max_size = 0;
+#if ARROW_LITTLE_ENDIAN
+ // TODO: Describe why we need this only for little-endian.
+
// Number of bytes left in the buffered values, not including the current
// byte (i.e., there may be an additional fraction of a byte).
const int bytes_left_in_cache =
@@ -377,7 +380,9 @@ inline bool BitReader::GetVlqInt(Int* v) {
data = reinterpret_cast<const uint8_t*>(&buffered_values_) +
bit_util::BytesForBits(bit_offset_);
// Otherwise, we try straight from buffer (ignoring few bytes that may be cached)
- } else {
+ } else
+#endif
+ {
max_size = bytes_left();
data = buffer_ + (max_bytes_ - max_size);
} |
||||||||||
| #endif | ||||||||||
|
|
||||||||||
| const auto bytes_read = bit_util::ParseLeadingLEB128(data, max_size, v); | ||||||||||
| if (ARROW_PREDICT_FALSE(bytes_read == 0)) { | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.