Skip to content

Commit f9cdf27

Browse files
committed
Split reader into IoReader and SliceReader
1 parent a6588c2 commit f9cdf27

14 files changed

+1494
-1272
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ let xml = r#"<tag1 att1 = "test">
3030
</tag2>
3131
</tag1>"#;
3232

33-
let mut reader = Reader::from_str(xml);
33+
let mut reader = Reader::from_reader(xml.as_bytes());
34+
// If you want to read from a string or byte slice without buffering, use:
35+
// let mut reader = Reader::from_str(xml);
36+
// In that case, no `Vec` is needed for buffering below and you can use `read_event` instead of
37+
// `read_event_into`.
3438
reader.trim_text(true);
3539

3640
let mut count = 0;
@@ -75,9 +79,8 @@ let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
7579
let mut reader = Reader::from_str(xml);
7680
reader.trim_text(true);
7781
let mut writer = Writer::new(Cursor::new(Vec::new()));
78-
let mut buf = Vec::new();
7982
loop {
80-
match reader.read_event_into(&mut buf) {
83+
match reader.read_event() {
8184
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
8285

8386
// crates a new element ... alternatively we could reuse `e` by calling
@@ -101,7 +104,6 @@ loop {
101104
Ok(e) => assert!(writer.write_event(&e).is_ok()),
102105
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
103106
}
104-
buf.clear();
105107
}
106108

107109
let result = writer.into_inner().into_inner();

benches/macrobenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
1919
// TODO: read the namespaces too
2020
// TODO: use fully normalized attribute values
2121
fn parse_document(doc: &[u8]) -> XmlResult<()> {
22-
let mut r = Reader::from_reader(doc);
22+
let mut r = Reader::from_bytes(doc);
2323
loop {
2424
match r.read_event()? {
2525
Event::Start(e) | Event::Empty(e) => {

benches/microbenches.rs

Lines changed: 26 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
2929
let mut group = c.benchmark_group("read_event");
3030
group.bench_function("trim_text = false", |b| {
3131
b.iter(|| {
32-
let mut r = Reader::from_reader(SAMPLE);
32+
let mut r = Reader::from_bytes(SAMPLE);
3333
r.check_end_names(false).check_comments(false);
3434
let mut count = criterion::black_box(0);
35-
let mut buf = Vec::new();
3635
loop {
37-
match r.read_event_into(&mut buf) {
36+
match r.read_event() {
3837
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
3938
Ok(Event::Eof) => break,
4039
_ => (),
4140
}
42-
buf.clear();
4341
}
4442
assert_eq!(
4543
count, 1550,
@@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {
5048

5149
group.bench_function("trim_text = true", |b| {
5250
b.iter(|| {
53-
let mut r = Reader::from_reader(SAMPLE);
51+
let mut r = Reader::from_bytes(SAMPLE);
5452
r.check_end_names(false)
5553
.check_comments(false)
5654
.trim_text(true);
5755
let mut count = criterion::black_box(0);
58-
let mut buf = Vec::new();
5956
loop {
60-
match r.read_event_into(&mut buf) {
57+
match r.read_event() {
6158
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
6259
Ok(Event::Eof) => break,
6360
_ => (),
6461
}
65-
buf.clear();
6662
}
6763
assert_eq!(
6864
count, 1550,
@@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
7975
let mut group = c.benchmark_group("read_namespaced_event");
8076
group.bench_function("trim_text = false", |b| {
8177
b.iter(|| {
82-
let mut r = Reader::from_reader(SAMPLE);
78+
let mut r = Reader::from_bytes(SAMPLE);
8379
r.check_end_names(false).check_comments(false);
8480
let mut count = criterion::black_box(0);
85-
let mut buf = Vec::new();
8681
let mut ns_buf = Vec::new();
8782
loop {
88-
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
83+
match r.read_namespaced_event(&mut ns_buf) {
8984
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
9085
Ok((_, Event::Eof)) => break,
9186
_ => (),
9287
}
93-
buf.clear();
9488
}
9589
assert_eq!(
9690
count, 1550,
@@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {
10195

10296
group.bench_function("trim_text = true", |b| {
10397
b.iter(|| {
104-
let mut r = Reader::from_reader(SAMPLE);
98+
let mut r = Reader::from_bytes(SAMPLE);
10599
r.check_end_names(false)
106100
.check_comments(false)
107101
.trim_text(true);
108102
let mut count = criterion::black_box(0);
109-
let mut buf = Vec::new();
110103
let mut ns_buf = Vec::new();
111104
loop {
112-
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
105+
match r.read_namespaced_event(&mut ns_buf) {
113106
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
114107
Ok((_, Event::Eof)) => break,
115108
_ => (),
116109
}
117-
buf.clear();
118110
}
119111
assert_eq!(
120112
count, 1550,
@@ -131,19 +123,17 @@ fn bytes_text_unescaped(c: &mut Criterion) {
131123
let mut group = c.benchmark_group("BytesText::unescaped");
132124
group.bench_function("trim_text = false", |b| {
133125
b.iter(|| {
134-
let mut buf = Vec::new();
135-
let mut r = Reader::from_reader(SAMPLE);
126+
let mut r = Reader::from_bytes(SAMPLE);
136127
r.check_end_names(false).check_comments(false);
137128
let mut count = criterion::black_box(0);
138129
let mut nbtxt = criterion::black_box(0);
139130
loop {
140-
match r.read_event_into(&mut buf) {
131+
match r.read_event() {
141132
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
142133
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
143134
Ok(Event::Eof) => break,
144135
_ => (),
145136
}
146-
buf.clear();
147137
}
148138
assert_eq!(
149139
count, 1550,
@@ -167,21 +157,19 @@ fn bytes_text_unescaped(c: &mut Criterion) {
167157

168158
group.bench_function("trim_text = true", |b| {
169159
b.iter(|| {
170-
let mut buf = Vec::new();
171-
let mut r = Reader::from_reader(SAMPLE);
160+
let mut r = Reader::from_bytes(SAMPLE);
172161
r.check_end_names(false)
173162
.check_comments(false)
174163
.trim_text(true);
175164
let mut count = criterion::black_box(0);
176165
let mut nbtxt = criterion::black_box(0);
177166
loop {
178-
match r.read_event_into(&mut buf) {
167+
match r.read_event() {
179168
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
180169
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
181170
Ok(Event::Eof) => break,
182171
_ => (),
183172
}
184-
buf.clear();
185173
}
186174
assert_eq!(
187175
count, 1550,
@@ -210,78 +198,66 @@ fn one_event(c: &mut Criterion) {
210198
let mut group = c.benchmark_group("One event");
211199
group.bench_function("StartText", |b| {
212200
let src = "Hello world!".repeat(512 / 12).into_bytes();
213-
let mut buf = Vec::with_capacity(1024);
214201
b.iter(|| {
215-
let mut r = Reader::from_reader(src.as_ref());
202+
let mut r = Reader::from_bytes(src.as_ref());
216203
let mut nbtxt = criterion::black_box(0);
217204
r.check_end_names(false).check_comments(false);
218-
match r.read_event_into(&mut buf) {
205+
match r.read_event() {
219206
Ok(Event::StartText(e)) => nbtxt += e.len(),
220207
something_else => panic!("Did not expect {:?}", something_else),
221208
};
222209

223-
buf.clear();
224-
225210
assert_eq!(nbtxt, 504);
226211
})
227212
});
228213

229214
group.bench_function("Start", |b| {
230215
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
231-
let mut buf = Vec::with_capacity(1024);
232216
b.iter(|| {
233-
let mut r = Reader::from_reader(src.as_ref());
217+
let mut r = Reader::from_bytes(src.as_ref());
234218
let mut nbtxt = criterion::black_box(0);
235219
r.check_end_names(false)
236220
.check_comments(false)
237221
.trim_text(true);
238-
match r.read_event_into(&mut buf) {
222+
match r.read_event() {
239223
Ok(Event::Start(ref e)) => nbtxt += e.len(),
240224
something_else => panic!("Did not expect {:?}", something_else),
241225
};
242226

243-
buf.clear();
244-
245227
assert_eq!(nbtxt, 525);
246228
})
247229
});
248230

249231
group.bench_function("Comment", |b| {
250232
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
251-
let mut buf = Vec::with_capacity(1024);
252233
b.iter(|| {
253-
let mut r = Reader::from_reader(src.as_ref());
234+
let mut r = Reader::from_bytes(src.as_ref());
254235
let mut nbtxt = criterion::black_box(0);
255236
r.check_end_names(false)
256237
.check_comments(false)
257238
.trim_text(true);
258-
match r.read_event_into(&mut buf) {
239+
match r.read_event() {
259240
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
260241
something_else => panic!("Did not expect {:?}", something_else),
261242
};
262243

263-
buf.clear();
264-
265244
assert_eq!(nbtxt, 520);
266245
})
267246
});
268247

269248
group.bench_function("CData", |b| {
270249
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
271-
let mut buf = Vec::with_capacity(1024);
272250
b.iter(|| {
273-
let mut r = Reader::from_reader(src.as_ref());
251+
let mut r = Reader::from_bytes(src.as_ref());
274252
let mut nbtxt = criterion::black_box(0);
275253
r.check_end_names(false)
276254
.check_comments(false)
277255
.trim_text(true);
278-
match r.read_event_into(&mut buf) {
256+
match r.read_event() {
279257
Ok(Event::CData(ref e)) => nbtxt += e.len(),
280258
something_else => panic!("Did not expect {:?}", something_else),
281259
};
282260

283-
buf.clear();
284-
285261
assert_eq!(nbtxt, 518);
286262
})
287263
});
@@ -293,12 +269,11 @@ fn attributes(c: &mut Criterion) {
293269
let mut group = c.benchmark_group("attributes");
294270
group.bench_function("with_checks = true", |b| {
295271
b.iter(|| {
296-
let mut r = Reader::from_reader(PLAYERS);
272+
let mut r = Reader::from_bytes(PLAYERS);
297273
r.check_end_names(false).check_comments(false);
298274
let mut count = criterion::black_box(0);
299-
let mut buf = Vec::new();
300275
loop {
301-
match r.read_event_into(&mut buf) {
276+
match r.read_event() {
302277
Ok(Event::Empty(e)) => {
303278
for attr in e.attributes() {
304279
let _attr = attr.unwrap();
@@ -308,20 +283,18 @@ fn attributes(c: &mut Criterion) {
308283
Ok(Event::Eof) => break,
309284
_ => (),
310285
}
311-
buf.clear();
312286
}
313287
assert_eq!(count, 1041);
314288
})
315289
});
316290

317291
group.bench_function("with_checks = false", |b| {
318292
b.iter(|| {
319-
let mut r = Reader::from_reader(PLAYERS);
293+
let mut r = Reader::from_bytes(PLAYERS);
320294
r.check_end_names(false).check_comments(false);
321295
let mut count = criterion::black_box(0);
322-
let mut buf = Vec::new();
323296
loop {
324-
match r.read_event_into(&mut buf) {
297+
match r.read_event() {
325298
Ok(Event::Empty(e)) => {
326299
for attr in e.attributes().with_checks(false) {
327300
let _attr = attr.unwrap();
@@ -331,20 +304,18 @@ fn attributes(c: &mut Criterion) {
331304
Ok(Event::Eof) => break,
332305
_ => (),
333306
}
334-
buf.clear();
335307
}
336308
assert_eq!(count, 1041);
337309
})
338310
});
339311

340312
group.bench_function("try_get_attribute", |b| {
341313
b.iter(|| {
342-
let mut r = Reader::from_reader(PLAYERS);
314+
let mut r = Reader::from_bytes(PLAYERS);
343315
r.check_end_names(false).check_comments(false);
344316
let mut count = criterion::black_box(0);
345-
let mut buf = Vec::new();
346317
loop {
347-
match r.read_event_into(&mut buf) {
318+
match r.read_event() {
348319
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
349320
for name in ["num", "status", "avg"] {
350321
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
@@ -359,7 +330,6 @@ fn attributes(c: &mut Criterion) {
359330
Ok(Event::Eof) => break,
360331
_ => (),
361332
}
362-
buf.clear();
363333
}
364334
assert_eq!(count, 150);
365335
})

examples/custom_entities.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
2626
let mut reader = Reader::from_str(DATA);
2727
reader.trim_text(true);
2828

29-
let mut buf = Vec::new();
3029
let mut custom_entities = HashMap::new();
3130
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
3231

3332
loop {
34-
match reader.read_event_into(&mut buf) {
33+
match reader.read_event() {
3534
Ok(Event::DocType(ref e)) => {
3635
for cap in entity_re.captures_iter(&e) {
3736
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());

examples/read_texts.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@ fn main() {
1010
reader.trim_text(true);
1111

1212
let mut txt = Vec::new();
13-
let mut buf = Vec::new();
1413

1514
loop {
16-
match reader.read_event_into(&mut buf) {
15+
match reader.read_event() {
1716
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
1817
txt.push(
1918
reader
20-
.read_text_into(QName(b"tag2"), &mut Vec::new())
19+
.read_text(QName(b"tag2"))
2120
.expect("Cannot decode text value"),
2221
);
2322
println!("{:?}", txt);
@@ -26,6 +25,5 @@ fn main() {
2625
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
2726
_ => (), // There are several other `Event`s we do not consider here
2827
}
29-
buf.clear();
3028
}
3129
}

src/de/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
707707

708708
/// Create new deserializer that will borrow data from the specified borrowing reader
709709
#[inline]
710-
fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
710+
fn from_borrowing_reader(mut reader: Reader<crate::SliceReader<'de>>) -> Self {
711711
reader
712712
.expand_empty_elements(true)
713713
.check_end_names(true)
@@ -942,7 +942,7 @@ pub trait XmlRead<'i> {
942942
/// You cannot create it, it is created automatically when you call
943943
/// [`Deserializer::from_reader`]
944944
pub struct IoReader<R: BufRead> {
945-
reader: Reader<R>,
945+
reader: Reader<crate::IoReader<R>>,
946946
buf: Vec<u8>,
947947
}
948948

@@ -987,7 +987,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
987987
/// You cannot create it, it is created automatically when you call
988988
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
989989
pub struct SliceReader<'de> {
990-
reader: Reader<&'de [u8]>,
990+
reader: Reader<crate::SliceReader<'de>>,
991991
}
992992

993993
impl<'de> XmlRead<'de> for SliceReader<'de> {

0 commit comments

Comments
 (0)