From bfa98a025a1c5834a84a77538616215ee9cf6060 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Thu, 30 Oct 2025 11:42:47 +0530 Subject: [PATCH 1/3] HIVE-27647: LLAP LowLevelCache encounters NullPointerException intermittently --- .../hadoop/hive/llap/LlapCacheAwareFs.java | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java b/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java index 28fa415b43eb..0b94c775dc2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapCacheAwareFs.java @@ -283,7 +283,10 @@ public DiskRangeList createCacheChunk( int chunkPartCount = largeBufCount + ((smallSize > 0) ? 1 : 0); DiskRange[] cacheRanges = new DiskRange[chunkPartCount]; int extraOffsetInChunk = 0; - if (maxAlloc < chunkLength) { + newCacheData = new MemoryBuffer[chunkPartCount]; + int index = 0; + + if (largeBufCount > 0) { largeBuffers = new MemoryBuffer[largeBufCount]; // Note: we don't use StoppableAllocator here - this is not on an IO thread. allocator.allocateMultiple(largeBuffers, maxAlloc, cache.getDataBufferFactory()); @@ -298,8 +301,10 @@ public DiskRangeList createCacheChunk( extraDiskDataOffset += remaining; extraOffsetInChunk += remaining; } + for (MemoryBuffer buf : largeBuffers) { + newCacheData[index++] = buf; + } } - newCacheData = largeBuffers; largeBuffers = null; if (smallSize > 0) { smallBuffer = new MemoryBuffer[1]; @@ -311,15 +316,7 @@ public DiskRangeList createCacheChunk( smallSize, bb, cacheRanges, largeBufCount, chunkFrom + extraOffsetInChunk); extraDiskDataOffset += smallSize; extraOffsetInChunk += smallSize; // Not strictly necessary, no one will look at it. - if (newCacheData == null) { - newCacheData = smallBuffer; - } else { - // TODO: add allocate overload with an offset and length - MemoryBuffer[] combinedCacheData = new MemoryBuffer[largeBufCount + 1]; - System.arraycopy(newCacheData, 0, combinedCacheData, 0, largeBufCount); - newCacheData = combinedCacheData; - newCacheData[largeBufCount] = smallBuffer[0]; - } + newCacheData[index] = smallBuffer[0]; smallBuffer = null; } cache.putFileData(fileKey, cacheRanges, newCacheData, 0, tag); From a89c5451adf027117f7fb95fc39a5f77d00e7e2d Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Thu, 30 Oct 2025 15:54:35 +0530 Subject: [PATCH 2/3] Add QTest for test coverage of the identified scenario --- .../resources/testconfiguration.properties | 1 + .../queries/clientpositive/llap_io_cache.q | 24 ++++++++ .../clientpositive/llap/llap_io_cache.q.out | 60 +++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/llap_io_cache.q create mode 100644 ql/src/test/results/clientpositive/llap/llap_io_cache.q.out diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 336a190c3580..a6b04b23fa25 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -111,6 +111,7 @@ minillap.query.files=\ intersect_distinct.q,\ intersect_merge.q,\ limit_bailout.q,\ + llap_io_cache.q,\ llap_nullscan.q,\ llap_stats.q,\ llap_udf.q,\ diff --git a/ql/src/test/queries/clientpositive/llap_io_cache.q b/ql/src/test/queries/clientpositive/llap_io_cache.q new file mode 100644 index 000000000000..01e57b301c4a --- /dev/null +++ b/ql/src/test/queries/clientpositive/llap_io_cache.q @@ -0,0 +1,24 @@ +set hive.llap.io.enabled=true; +set hive.llap.io.memory.mode=cache; +set hive.llap.io.allocator.alloc.max=16Mb; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE tbl_parq ( + id INT, + payload STRING +) +STORED AS PARQUET +TBLPROPERTIES ( + 'parquet.block.size'='16777216', + 'parquet.page.size'='16777216', + 'parquet.compression'='UNCOMPRESSED' +); + +INSERT OVERWRITE TABLE tbl_parq +SELECT + 1 AS id, + RPAD('x', 16777177, 'x') AS payload; + +SELECT LENGTH(payload) FROM tbl_parq; + +SELECT SUM(LENGTH(payload)) FROM tbl_parq; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out new file mode 100644 index 000000000000..e678ae90ed44 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: CREATE TABLE tbl_parq ( + id INT, + payload STRING +) +STORED AS PARQUET +TBLPROPERTIES ( + 'parquet.block.size'='16777216', + 'parquet.page.size'='16777216', + 'parquet.compression'='UNCOMPRESSED' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_parq +POSTHOOK: query: CREATE TABLE tbl_parq ( + id INT, + payload STRING +) +STORED AS PARQUET +TBLPROPERTIES ( + 'parquet.block.size'='16777216', + 'parquet.page.size'='16777216', + 'parquet.compression'='UNCOMPRESSED' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_parq +PREHOOK: query: INSERT OVERWRITE TABLE tbl_parq +SELECT + 1 AS id, + RPAD('x', 16777177, 'x') AS payload +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_parq +POSTHOOK: query: INSERT OVERWRITE TABLE tbl_parq +SELECT + 1 AS id, + RPAD('x', 16777177, 'x') AS payload +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_parq +POSTHOOK: Lineage: tbl_parq.id SIMPLE [] +POSTHOOK: Lineage: tbl_parq.payload SIMPLE [] +PREHOOK: query: SELECT LENGTH(payload) FROM tbl_parq +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_parq +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT LENGTH(payload) FROM tbl_parq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_parq +POSTHOOK: Output: hdfs://### HDFS PATH ### +16777177 +PREHOOK: query: SELECT SUM(LENGTH(payload)) FROM tbl_parq +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_parq +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT SUM(LENGTH(payload)) FROM tbl_parq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_parq +POSTHOOK: Output: hdfs://### HDFS PATH ### +16777177 From a27005d63edbdaf52d6cd12e46254e523c054daa Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Thu, 30 Oct 2025 19:21:58 +0530 Subject: [PATCH 3/3] Address Review comments - 1 --- ql/src/test/queries/clientpositive/llap_io_cache.q | 4 +++- .../results/clientpositive/llap/llap_io_cache.q.out | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ql/src/test/queries/clientpositive/llap_io_cache.q b/ql/src/test/queries/clientpositive/llap_io_cache.q index 01e57b301c4a..b5ab5b25bae9 100644 --- a/ql/src/test/queries/clientpositive/llap_io_cache.q +++ b/ql/src/test/queries/clientpositive/llap_io_cache.q @@ -3,6 +3,8 @@ set hive.llap.io.memory.mode=cache; set hive.llap.io.allocator.alloc.max=16Mb; set hive.vectorized.execution.enabled=true; +DROP TABLE IF EXISTS tbl_parq; + CREATE TABLE tbl_parq ( id INT, payload STRING @@ -14,7 +16,7 @@ TBLPROPERTIES ( 'parquet.compression'='UNCOMPRESSED' ); -INSERT OVERWRITE TABLE tbl_parq +INSERT INTO TABLE tbl_parq SELECT 1 AS id, RPAD('x', 16777177, 'x') AS payload; diff --git a/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out index e678ae90ed44..765ae2216310 100644 --- a/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_io_cache.q.out @@ -1,3 +1,9 @@ +PREHOOK: query: DROP TABLE IF EXISTS tbl_parq +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: DROP TABLE IF EXISTS tbl_parq +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default PREHOOK: query: CREATE TABLE tbl_parq ( id INT, payload STRING @@ -24,14 +30,14 @@ TBLPROPERTIES ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_parq -PREHOOK: query: INSERT OVERWRITE TABLE tbl_parq +PREHOOK: query: INSERT INTO TABLE tbl_parq SELECT 1 AS id, RPAD('x', 16777177, 'x') AS payload PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@tbl_parq -POSTHOOK: query: INSERT OVERWRITE TABLE tbl_parq +POSTHOOK: query: INSERT INTO TABLE tbl_parq SELECT 1 AS id, RPAD('x', 16777177, 'x') AS payload