@@ -2309,7 +2309,23 @@ final class HtmlEntities {
23092309 * @return The offset after the end of the decoded sequence in {@code html}.
23102310 */
23112311 public static int appendDecodedEntity (
2312- String html , int offset , int limit , StringBuilder sb ) {
2312+ String html , int offset , int limit , StringBuilder sb ) {
2313+ return appendDecodedEntity (html , offset , limit , false , sb );
2314+ }
2315+
2316+ /**
2317+ * Decodes any HTML entity at the given location and appends it to a string
2318+ * builder. This handles both named and numeric entities.
2319+ *
2320+ * @param html HTML text.
2321+ * @param offset the position of the sequence to decode in {@code html}.
2322+ * @param limit the last position that could be part of the sequence to decode
2323+ * in {@code html}.
2324+ * @param sb string builder to append to.
2325+ * @return The offset after the end of the decoded sequence in {@code html}.
2326+ */
2327+ public static int appendDecodedEntity (
2328+ String html , int offset , int limit , boolean inAttribute , StringBuilder sb ) {
23132329 char ch = html .charAt (offset );
23142330 if ('&' != ch ) {
23152331 sb .append (ch );
@@ -2422,7 +2438,7 @@ public static int appendDecodedEntity(
24222438 char nameChar = html .charAt (i );
24232439 t = t .lookup (nameChar );
24242440 if (t == null ) { break ; }
2425- if (t .isTerminal ()) {
2441+ if (t .isTerminal () && mayComplete ( inAttribute , html , i , limit ) ) {
24262442 longestDecode = t ;
24272443 tail = i + 1 ;
24282444 }
@@ -2434,7 +2450,7 @@ public static int appendDecodedEntity(
24342450 if ('Z' >= nameChar && nameChar >= 'A' ) { nameChar |= 32 ; }
24352451 t = t .lookup (nameChar );
24362452 if (t == null ) { break ; }
2437- if (t .isTerminal ()) {
2453+ if (t .isTerminal () && mayComplete ( inAttribute , html , i , limit ) ) {
24382454 longestDecode = t ;
24392455 tail = i + 1 ;
24402456 }
@@ -2456,11 +2472,37 @@ public static int appendDecodedEntity(
24562472
24572473 private static boolean isHtmlIdContinueChar (char ch ) {
24582474 int chLower = ch | 32 ;
2459- return ('0' <= chLower && chLower <= '9' )
2475+ return ('0' <= ch && ch <= '9' )
24602476 || ('a' <= chLower && chLower <= 'z' )
24612477 || ('-' == ch );
24622478 }
24632479
2480+
2481+ /** True if the character at i in html may complete a named character reference */
2482+ private static boolean mayComplete (boolean inAttribute , String html , int i , int limit ) {
2483+ if (inAttribute && html .charAt (i ) != ';' && i + 1 < limit ) {
2484+ // See if the next character blocks treating this as a full match.
2485+ // This avoids problems like "¶" being treated as a decoding in
2486+ // <a href="?foo¶m=1">
2487+ if (continuesCharacterReferenceName (html .charAt (i + 1 ))) {
2488+ return false ;
2489+ }
2490+ }
2491+ return true ;
2492+ }
2493+
2494+ /**
2495+ * @see <a href="https://github.com/OWASP/java-html-sanitizer/issues/254#issuecomment-1080864368"
2496+ * >comments in issue 254</a>
2497+ */
2498+ private static boolean continuesCharacterReferenceName (char ch ) {
2499+ int chLower = ch | 32 ;
2500+ return ('0' <= ch && ch <= '9' )
2501+ || ('a' <= chLower && chLower <= 'z' )
2502+ || (ch == '=' );
2503+ }
2504+
2505+
24642506// /** A possible entity name like "amp" or "gt". */
24652507// public static boolean isEntityName(String name) {
24662508// Trie t = ENTITY_TRIE;
0 commit comments