diff --git a/src/libime/pinyin/pinyinencoder.cpp b/src/libime/pinyin/pinyinencoder.cpp index 671b47b9..c0595baf 100644 --- a/src/libime/pinyin/pinyinencoder.cpp +++ b/src/libime/pinyin/pinyinencoder.cpp @@ -233,20 +233,20 @@ PinyinEncoder::parseUserPinyin(std::string userPinyin, fuzzyFlags, pinyinMap); auto nextMatchAlt = longestMatch(iter + str.size() - 1, end, fuzzyFlags, pinyinMap); - auto matchSize = str.size() + nextMatch.match.size(); auto matchSizeAlt = str.size() - 1 + nextMatchAlt.match.size(); - // comparator is (validPinyin, wholeMatchSize, + // comparator is (validPinyin, whole size>= lhs pinyin, // isCompletePinyin) validPinyin means it's at least some // pinyin, instead of things startsWith i,u,v. Since // longestMatch will now treat string startsWith iuv a whole // segment, we need to compare validity before the length. - // Always prefer longer match and complete pinyin match. - std::tuple compare( - nextMatch.valid, matchSize, nextMatch.isCompletePinyin); - std::tuple compareAlt( - nextMatchAlt.valid, matchSizeAlt, + // If whole size is equal to lhs pinyin, then it should be + // handled by inner segement flag. + std::tuple compare( + nextMatch.valid, true, nextMatch.isCompletePinyin); + std::tuple compareAlt( + nextMatchAlt.valid, matchSizeAlt > str.size(), nextMatchAlt.isCompletePinyin); if (compare >= compareAlt) { diff --git a/test/testpinyinencoder.cpp b/test/testpinyinencoder.cpp index 8033167e..0c306389 100644 --- a/test/testpinyinencoder.cpp +++ b/test/testpinyinencoder.cpp @@ -233,6 +233,8 @@ int main() { check("zhuna", PinyinFuzzyFlag::Inner, {"zhu", "na"}); check("zhuna", PinyinFuzzyFlag::Inner, {"zhun", "a"}); + check("sangeren", PinyinFuzzyFlag::Inner, {"san", "ge", "ren"}); + { PinyinCorrectionProfile profile(BuiltinPinyinCorrectionProfile::Qwerty); auto graph = PinyinEncoder::parseUserPinyin(