@@ -110,23 +110,30 @@ where
110
110
let mut escaped = None ;
111
111
let mut last_pos = 0 ;
112
112
for i in escapes {
113
- let escaped = escaped. get_or_insert_with ( || Vec :: with_capacity ( raw. len ( ) ) ) ;
113
+ // If we have an escape, the escaped string will be at least some larger than the raw string,
114
+ // reserve a little more space, so we might not resize at all if only a few escapes are found.
115
+ let escaped = escaped. get_or_insert_with ( || String :: with_capacity ( raw. len ( ) + 64 ) ) ;
114
116
let byte = bytes[ i] ;
115
- escaped. extend_from_slice ( & bytes[ last_pos..i] ) ;
117
+ // SAFETY: the escapes iterator should only return indexes of bytes we know how to escape.
118
+ // if one of those bytes are found, it _must_ be a complete character, so `i` must be a
119
+ // character boundary.
120
+ // last_pos will only be either 0 or i+1, and all supported chars are one byte long,
121
+ // last_pos will also always be at a char boundary
122
+ escaped. push_str ( & raw [ last_pos..i] ) ;
116
123
match byte {
117
- b'<' => escaped. extend_from_slice ( b "<") ,
118
- b'>' => escaped. extend_from_slice ( b ">") ,
119
- b'\'' => escaped. extend_from_slice ( b "'") ,
120
- b'&' => escaped. extend_from_slice ( b "&") ,
121
- b'"' => escaped. extend_from_slice ( b """) ,
124
+ b'<' => escaped. push_str ( "<" ) ,
125
+ b'>' => escaped. push_str ( ">" ) ,
126
+ b'\'' => escaped. push_str ( "'" ) ,
127
+ b'&' => escaped. push_str ( "&" ) ,
128
+ b'"' => escaped. push_str ( """ ) ,
122
129
123
130
// This set of escapes handles characters that should be escaped
124
131
// in elements of xs:lists, because those characters works as
125
132
// delimiters of list elements
126
- b'\t' => escaped. extend_from_slice ( b "	") ,
127
- b'\n' => escaped. extend_from_slice ( b " ") ,
128
- b'\r' => escaped. extend_from_slice ( b " ") ,
129
- b' ' => escaped. extend_from_slice ( b " ") ,
133
+ b'\t' => escaped. push_str ( "	" ) ,
134
+ b'\n' => escaped. push_str ( " " ) ,
135
+ b'\r' => escaped. push_str ( " " ) ,
136
+ b' ' => escaped. push_str ( " " ) ,
130
137
_ => unreachable ! (
131
138
"Only '<', '>','\' , '&', '\" ', '\\ t', '\\ r', '\\ n', and ' ' are escaped"
132
139
) ,
@@ -135,14 +142,8 @@ where
135
142
}
136
143
137
144
if let Some ( mut escaped) = escaped {
138
- if let Some ( raw) = bytes. get ( last_pos..) {
139
- escaped. extend_from_slice ( raw) ;
140
- }
141
- // SAFETY: we operate on UTF-8 input and search for an one byte chars only,
142
- // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings
143
- // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }`
144
- // if unsafe code will be allowed
145
- Cow :: Owned ( String :: from_utf8 ( escaped) . unwrap ( ) )
145
+ escaped. push_str ( & raw [ last_pos..] ) ;
146
+ Cow :: Owned ( escaped)
146
147
} else {
147
148
Cow :: Borrowed ( raw)
148
149
}
@@ -182,17 +183,14 @@ where
182
183
match iter. next ( ) {
183
184
Some ( end) if bytes[ end] == b';' => {
184
185
// append valid data
185
- if unescaped. is_none ( ) {
186
- unescaped = Some ( String :: with_capacity ( raw. len ( ) ) ) ;
187
- }
188
- let unescaped = unescaped. as_mut ( ) . expect ( "initialized" ) ;
186
+ let unescaped = unescaped. get_or_insert_with ( || String :: with_capacity ( raw. len ( ) ) ) ;
189
187
unescaped. push_str ( & raw [ last_end..start] ) ;
190
188
191
189
// search for character correctness
192
190
let pat = & raw [ start + 1 ..end] ;
193
191
if let Some ( entity) = pat. strip_prefix ( '#' ) {
194
192
let codepoint = parse_number ( entity, start..end) ?;
195
- unescaped. push_str ( codepoint. encode_utf8 ( & mut [ 0u8 ; 4 ] ) ) ;
193
+ unescaped. push ( codepoint) ;
196
194
} else if let Some ( value) = named_entity ( pat) {
197
195
unescaped. push_str ( value) ;
198
196
} else if let Some ( value) = resolve_entity ( pat) {
0 commit comments