|
| 1 | +Description: Fix heap buffer overflow in UTF-8 processing |
| 2 | + Two out-of-bounds read issues were identified in OpenCC's UTF-8 |
| 3 | + processing logic when handling malformed or truncated UTF-8 sequences. |
| 4 | + . |
| 5 | + 1) MaxMatchSegmentation: |
| 6 | + NextCharLength() could return a value larger than the remaining input size. |
| 7 | + The previous logic subtracted this value from a size_t length counter, |
| 8 | + potentially causing underflow and subsequent out-of-bounds reads. |
| 9 | + . |
| 10 | + 2) Conversion: |
| 11 | + Similar length handling could allow reads past the end of the input buffer |
| 12 | + during dictionary matching. |
| 13 | + . |
| 14 | + This patch fixes both issues by: |
| 15 | + - Explicitly tracking the end of the input buffer |
| 16 | + - Recomputing remaining length on each iteration |
| 17 | + - Clamping matched character and key lengths to the remaining buffer size |
| 18 | + - Preventing reads past the null terminator |
| 19 | +Origin: upstream, https://github.com/BYVoid/OpenCC/commit/345c9a50ab07018f1b4439776bad78a0d40778ec |
| 20 | +Bug: https://github.com/BYVoid/OpenCC/issues/997 |
| 21 | +Bug-Debian: https://bugs.debian.org/1126286 |
| 22 | +Forwarded: not-needed |
| 23 | +Last-Update: 2026-04-29 |
| 24 | + |
| 25 | +--- opencc-1.1.9+ds1.orig/src/Conversion.cpp |
| 26 | ++++ opencc-1.1.9+ds1/src/Conversion.cpp |
| 27 | +@@ -25,14 +25,30 @@ using namespace opencc; |
| 28 | + |
| 29 | + std::string Conversion::Convert(const char* phrase) const { |
| 30 | + std::ostringstream buffer; |
| 31 | ++ // Calculate string end to prevent reading beyond null terminator |
| 32 | ++ const char* phraseEnd = phrase; |
| 33 | ++ while (*phraseEnd != '\0') { |
| 34 | ++ phraseEnd++; |
| 35 | ++ } |
| 36 | ++ |
| 37 | + for (const char* pstr = phrase; *pstr != '\0';) { |
| 38 | +- Optional<const DictEntry*> matched = dict->MatchPrefix(pstr); |
| 39 | ++ size_t remainingLength = phraseEnd - pstr; |
| 40 | ++ Optional<const DictEntry*> matched = dict->MatchPrefix(pstr, remainingLength); |
| 41 | + size_t matchedLength; |
| 42 | + if (matched.IsNull()) { |
| 43 | + matchedLength = UTF8Util::NextCharLength(pstr); |
| 44 | ++ // Ensure we don't read beyond the null terminator |
| 45 | ++ if (matchedLength > remainingLength) { |
| 46 | ++ matchedLength = remainingLength; |
| 47 | ++ } |
| 48 | + buffer << UTF8Util::FromSubstr(pstr, matchedLength); |
| 49 | + } else { |
| 50 | + matchedLength = matched.Get()->KeyLength(); |
| 51 | ++ // Defensive: ensure dictionary key length does not exceed remaining input |
| 52 | ++ // (MatchPrefix should already guarantee this, but defense in depth) |
| 53 | ++ if (matchedLength > remainingLength) { |
| 54 | ++ matchedLength = remainingLength; |
| 55 | ++ } |
| 56 | + buffer << matched.Get()->GetDefault(); |
| 57 | + } |
| 58 | + pstr += matchedLength; |
| 59 | +--- opencc-1.1.9+ds1.orig/src/MaxMatchSegmentation.cpp |
| 60 | ++++ opencc-1.1.9+ds1/src/MaxMatchSegmentation.cpp |
| 61 | +@@ -26,12 +26,16 @@ SegmentsPtr MaxMatchSegmentation::Segmen |
| 62 | + }; |
| 63 | + size_t length = text.length(); |
| 64 | + for (const char* pstr = text.c_str(); *pstr != '\0';) { |
| 65 | ++ // Recompute remaining length each iteration to avoid underflow |
| 66 | ++ size_t remainingLength = text.c_str() + text.length() - pstr; |
| 67 | + const Optional<const DictEntry*>& matched = dict->MatchPrefix(pstr, length); |
| 68 | + size_t matchedLength; |
| 69 | + if (matched.IsNull()) { |
| 70 | + matchedLength = UTF8Util::NextCharLength(pstr); |
| 71 | ++ // Clamp matchedLength to remaining buffer size |
| 72 | ++ if (matchedLength > remainingLength) matchedLength = remainingLength; |
| 73 | + segLength += matchedLength; |
| 74 | + } else { |
| 75 | + clearBuffer(); |
| 76 | + matchedLength = matched.Get()->KeyLength(); |
| 77 | + segments->AddSegment(matched.Get()->Key()); |
| 78 | + segStart = pstr + matchedLength; |
| 79 | + } |
| 80 | + pstr += matchedLength; |
| 81 | +- length -= matchedLength; |
| 82 | ++ // No need to track length, we recompute remainingLength each iteration |
| 83 | + } |
| 84 | + clearBuffer(); |
| 85 | + return segments; |
0 commit comments