diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 9209bd3135099..87d004040c3a0 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -642,9 +642,13 @@ public UTF8String substring(final int start, final int until) { } int j = i; - while (i < numBytes && c < until) { - i += numBytesForFirstByte(getByte(i)); - c += 1; + if (until == Integer.MAX_VALUE) { + i = numBytes; + } else { + while (i < numBytes && c < until) { + i += numBytesForFirstByte(getByte(i)); + c += 1; + } } if (i > j) { @@ -663,9 +667,8 @@ public UTF8String substringSQL(int pos, int length) { // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. - int len = numChars(); // `len + pos` does not overflow as `len >= 0`. - int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0); + int start = (pos > 0) ? pos -1 : ((pos < 0) ? numChars() + pos : 0); int end; if ((long) start + length > Integer.MAX_VALUE) {