Skip to content

Commit d34df4b

Browse files
Hotfix/zbug 3867 (#14)
* ZBUG-3867 Wrong sanitised output for link with &param is fixed and version is updated to 6.
1 parent 526eca7 commit d34df4b

File tree

6 files changed

+166
-166
lines changed

6 files changed

+166
-166
lines changed

build.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<!-- PROPERTIES -->
55
<property file='build-custom.properties' />
66

7-
<property name='version' value='20190610.5'/>
7+
<property name='version' value='20190610.6'/>
88
<property name='name' value='owasp-java-html-sanitizer'/>
99
<property name='fullname' value='${name}-${version}'/>
1010
<property name='Title' value='OWASP Java HTML Sanitizer'/>

src/main/java/org/owasp/html/Encoding.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ public final class Encoding {
4343
* @return text/plain
4444
*/
4545
public static String decodeHtml(String s) {
46+
return decodeHtml(s, false);
47+
}
48+
49+
/**
50+
* Decodes HTML entities to produce a string containing only valid
51+
* Unicode scalar values.
52+
*
53+
* @param s text/html
54+
* @param inAttribute is s in an attribute value?
55+
* @return text/plain
56+
*/
57+
public static String decodeHtml(String s, boolean inAttribute) {
4658
int firstAmp = s.indexOf('&');
4759
int safeLimit = longestPrefixOfGoodCodeunits(s);
4860
if ((firstAmp & safeLimit) < 0) { return s; }
@@ -55,7 +67,7 @@ public static String decodeHtml(String s) {
5567
int amp = firstAmp;
5668
while (amp >= 0) {
5769
sb.append(s, pos, amp);
58-
int end = HtmlEntities.appendDecodedEntity(s, amp, n, sb);
70+
int end = HtmlEntities.appendDecodedEntity(s, amp, n, inAttribute, sb);
5971
pos = end;
6072
amp = s.indexOf('&', end);
6173
}

src/main/java/org/owasp/html/HtmlEntities.java

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2309,7 +2309,23 @@ final class HtmlEntities {
23092309
* @return The offset after the end of the decoded sequence in {@code html}.
23102310
*/
23112311
public static int appendDecodedEntity(
2312-
String html, int offset, int limit, StringBuilder sb) {
2312+
String html, int offset, int limit, StringBuilder sb) {
2313+
return appendDecodedEntity(html, offset, limit, false, sb);
2314+
}
2315+
2316+
/**
2317+
* Decodes any HTML entity at the given location and appends it to a string
2318+
* builder. This handles both named and numeric entities.
2319+
*
2320+
* @param html HTML text.
2321+
* @param offset the position of the sequence to decode in {@code html}.
2322+
* @param limit the last position that could be part of the sequence to decode
2323+
* in {@code html}.
2324+
* @param sb string builder to append to.
2325+
* @return The offset after the end of the decoded sequence in {@code html}.
2326+
*/
2327+
public static int appendDecodedEntity(
2328+
String html, int offset, int limit, boolean inAttribute, StringBuilder sb) {
23132329
char ch = html.charAt(offset);
23142330
if ('&' != ch) {
23152331
sb.append(ch);
@@ -2422,7 +2438,7 @@ public static int appendDecodedEntity(
24222438
char nameChar = html.charAt(i);
24232439
t = t.lookup(nameChar);
24242440
if (t == null) { break; }
2425-
if (t.isTerminal()) {
2441+
if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
24262442
longestDecode = t;
24272443
tail = i + 1;
24282444
}
@@ -2434,7 +2450,7 @@ public static int appendDecodedEntity(
24342450
if ('Z' >= nameChar && nameChar >= 'A') { nameChar |= 32; }
24352451
t = t.lookup(nameChar);
24362452
if (t == null) { break; }
2437-
if (t.isTerminal()) {
2453+
if (t.isTerminal() && mayComplete(inAttribute, html, i, limit)) {
24382454
longestDecode = t;
24392455
tail = i + 1;
24402456
}
@@ -2456,11 +2472,37 @@ public static int appendDecodedEntity(
24562472

24572473
private static boolean isHtmlIdContinueChar(char ch) {
24582474
int chLower = ch | 32;
2459-
return ('0' <= chLower && chLower <= '9')
2475+
return ('0' <= ch && ch <= '9')
24602476
|| ('a' <= chLower && chLower <= 'z')
24612477
|| ('-' == ch);
24622478
}
24632479

2480+
2481+
/** True if the character at i in html may complete a named character reference */
2482+
private static boolean mayComplete(boolean inAttribute, String html, int i, int limit) {
2483+
if (inAttribute && html.charAt(i) != ';' && i + 1 < limit) {
2484+
// See if the next character blocks treating this as a full match.
2485+
// This avoids problems like "&para" being treated as a decoding in
2486+
// <a href="?foo&param=1">
2487+
if (continuesCharacterReferenceName(html.charAt(i + 1))) {
2488+
return false;
2489+
}
2490+
}
2491+
return true;
2492+
}
2493+
2494+
/**
2495+
* @see <a href="https://github.com/OWASP/java-html-sanitizer/issues/254#issuecomment-1080864368"
2496+
* >comments in issue 254</a>
2497+
*/
2498+
private static boolean continuesCharacterReferenceName(char ch) {
2499+
int chLower = ch | 32;
2500+
return ('0' <= ch && ch <= '9')
2501+
|| ('a' <= chLower && chLower <= 'z')
2502+
|| (ch == '=');
2503+
}
2504+
2505+
24642506
// /** A possible entity name like "amp" or "gt". */
24652507
// public static boolean isEntityName(String name) {
24662508
// Trie t = ENTITY_TRIE;

src/main/java/org/owasp/html/HtmlSanitizer.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public static void sanitize(
144144
switch (token.type) {
145145
case TEXT:
146146
receiver.text(
147-
Encoding.decodeHtml(htmlContent.substring(token.start, token.end)));
147+
Encoding.decodeHtml(htmlContent.substring(token.start, token.end), false));
148148
break;
149149
case UNESCAPED:
150150
receiver.text(Encoding.stripBannedCodeunits(
@@ -177,8 +177,9 @@ public static void sanitize(
177177
htmlContent.substring(tagBodyToken.start, tagBodyToken.end)));
178178
break;
179179
case ATTRVALUE:
180-
attrs.add(Encoding.decodeHtml(stripQuotes(
181-
htmlContent.substring(tagBodyToken.start, tagBodyToken.end))));
180+
String attributeContentRaw =
181+
stripQuotes(htmlContent.substring(tagBodyToken.start, tagBodyToken.end));
182+
attrs.add(Encoding.decodeHtml(attributeContentRaw, true));
182183
attrsReadyForName = true;
183184
break;
184185
case TAGEND:

0 commit comments

Comments
 (0)