Skip to content

Commit def940d

Browse files
committed
Add functions for attribute value normalization
closes tafia#371
1 parent 25cf231 commit def940d

File tree

7 files changed

+537
-13
lines changed

7 files changed

+537
-13
lines changed

Changelog.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,18 @@
1212

1313
### New Features
1414

15+
- [#379]: Improved compliance with the XML attribute value normalization process by
16+
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
17+
which ought to be used in place of `Attribute::unescape_value()` and
18+
`Attribute::unescape_value_with()`
19+
1520
### Bug Fixes
1621

1722
### Misc Changes
1823

24+
- [#379]: Added tests for attribute value normalization
25+
26+
[#379]: https://github.com/tafia/quick-xml/pull/379
1927

2028
## 0.31.0 -- 2023-10-22
2129

@@ -187,6 +195,10 @@ serde >= 1.0.181
187195
- [#565]: Fix compilation error when build with serde <1.0.139
188196

189197

198+
### New Tests
199+
200+
- [#379]: Added tests for attribute value normalization
201+
190202
[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
191203
[#490]: https://github.com/tafia/quick-xml/pull/490
192204
[#510]: https://github.com/tafia/quick-xml/issues/510

benches/macrobenches.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
4343
("players.xml", PLAYERS),
4444
];
4545

46-
// TODO: use fully normalized attribute values
4746
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
4847
let mut r = Reader::from_str(doc);
4948
loop {
5049
match criterion::black_box(r.read_event()?) {
5150
Event::Start(e) | Event::Empty(e) => {
5251
for attr in e.attributes() {
53-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
52+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
5453
}
5554
}
5655
Event::Text(e) => {
@@ -67,15 +66,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
6766
Ok(())
6867
}
6968

70-
// TODO: use fully normalized attribute values
7169
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
7270
let mut r = Reader::from_reader(doc);
7371
let mut buf = Vec::new();
7472
loop {
7573
match criterion::black_box(r.read_event_into(&mut buf)?) {
7674
Event::Start(e) | Event::Empty(e) => {
7775
for attr in e.attributes() {
78-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
76+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
7977
}
8078
}
8179
Event::Text(e) => {
@@ -93,15 +91,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
9391
Ok(())
9492
}
9593

96-
// TODO: use fully normalized attribute values
9794
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
9895
let mut r = NsReader::from_str(doc);
9996
loop {
10097
match criterion::black_box(r.read_resolved_event()?) {
10198
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
10299
criterion::black_box(resolved_ns);
103100
for attr in e.attributes() {
104-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
101+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
105102
}
106103
}
107104
(resolved_ns, Event::Text(e)) => {
@@ -120,7 +117,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
120117
Ok(())
121118
}
122119

123-
// TODO: use fully normalized attribute values
124120
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
125121
let mut r = NsReader::from_reader(doc);
126122
let mut buf = Vec::new();
@@ -129,7 +125,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
129125
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
130126
criterion::black_box(resolved_ns);
131127
for attr in e.attributes() {
132-
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
128+
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
133129
}
134130
}
135131
(resolved_ns, Event::Text(e)) => {

benches/microbenches.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
use std::borrow::Cow;
2+
13
use criterion::{self, criterion_group, criterion_main, Criterion};
24
use pretty_assertions::assert_eq;
35
use quick_xml::escape::{escape, unescape};
6+
use quick_xml::events::attributes::Attribute;
47
use quick_xml::events::Event;
58
use quick_xml::name::QName;
69
use quick_xml::reader::{NsReader, Reader};
@@ -232,6 +235,74 @@ fn attributes(c: &mut Criterion) {
232235
assert_eq!(count, 150);
233236
})
234237
});
238+
239+
group.finish();
240+
}
241+
242+
/// Benchmarks normalizing attribute values
243+
fn attribute_value_normalization(c: &mut Criterion) {
244+
let mut group = c.benchmark_group("attribute_value_normalization");
245+
246+
group.bench_function("noop_short", |b| {
247+
let attr = Attribute {
248+
key: QName(b"foo"),
249+
value: Cow::Borrowed(b"foobar"),
250+
};
251+
b.iter(|| {
252+
criterion::black_box(attr.normalized_value()).unwrap();
253+
})
254+
});
255+
256+
group.bench_function("noop_long", |b| {
257+
let attr = Attribute {
258+
key: QName(b"foo"),
259+
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
260+
};
261+
b.iter(|| {
262+
criterion::black_box(attr.normalized_value()).unwrap();
263+
})
264+
});
265+
266+
group.bench_function("replacement_chars", |b| {
267+
let attr = Attribute {
268+
key: QName(b"foo"),
269+
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
270+
};
271+
b.iter(|| {
272+
criterion::black_box(attr.normalized_value()).unwrap();
273+
})
274+
});
275+
276+
group.bench_function("char_reference", |b| {
277+
let attr1 = Attribute {
278+
key: QName(b"foo"),
279+
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
280+
};
281+
let attr2 = Attribute {
282+
key: QName(b"foo"),
283+
value: Cow::Borrowed(b"&#38;&#60;"),
284+
};
285+
b.iter(|| {
286+
criterion::black_box(attr1.normalized_value()).unwrap();
287+
criterion::black_box(attr2.normalized_value()).unwrap();
288+
})
289+
});
290+
291+
group.bench_function("entity_reference", |b| {
292+
let attr1 = Attribute {
293+
key: QName(b"foo"),
294+
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
295+
};
296+
let attr2 = Attribute {
297+
key: QName(b"foo"),
298+
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
299+
};
300+
b.iter(|| {
301+
criterion::black_box(attr1.normalized_value()).unwrap();
302+
criterion::black_box(attr2.normalized_value()).unwrap();
303+
})
304+
});
305+
235306
group.finish();
236307
}
237308

@@ -344,6 +415,7 @@ criterion_group!(
344415
read_resolved_event_into,
345416
one_event,
346417
attributes,
418+
attribute_value_normalization,
347419
escaping,
348420
unescaping,
349421
);

src/errors.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ impl From<EscapeError> for Error {
100100
}
101101

102102
impl From<AttrError> for Error {
103+
/// Creates a new `Error::InvalidAttr` from the given error
103104
#[inline]
104105
fn from(error: AttrError) -> Self {
105106
Error::InvalidAttr(error)

0 commit comments

Comments
 (0)