Skip to content

Commit 711525e

Browse files
authored
Merge pull request tafia#393 from Mingun/qname
Introduce typified wrappers for names and fix couple of bugs
2 parents 0a42987 + bac96d1 commit 711525e

20 files changed

+1202
-507
lines changed

Changelog.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
- [#387]: Allow overlapping between elements of sequence and other elements
1616
(using new feature `overlapped-lists`)
17+
- [#393]: New module `name` with `QName`, `LocalName`, `Namespace`, `Prefix`
18+
and `PrefixDeclaration` wrappers around byte arrays and `ResolveResult` with
19+
the result of namespace resolution
1720

1821
### Bug Fixes
1922

@@ -23,6 +26,9 @@
2326
- [#387]: Internal deserializer state can be broken when deserializing a map with
2427
a sequence field (such as `Vec<T>`), where elements of this sequence contains
2528
another sequence. This error affects only users with the `serialize` feature enabled
29+
- [#393]: Now `event_namespace`, `attribute_namespace` and `read_event_namespaced`
30+
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
31+
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
2632

2733
### Misc Changes
2834

@@ -42,15 +48,25 @@
4248

4349
- [#391]: Added code coverage
4450

51+
- [#393]: `event_namespace` and `attribute_namespace` now accept `QName`
52+
and returns `ResolveResult` and `LocalName`, `read_event_namespaced` now
53+
returns `ResolveResult` instead of `Option<[u8]>`
54+
- [#393]: Types of `Attribute::key` and `Attr::key()` changed to `QName`
55+
- [#393]: Now `BytesStart::name()` and `BytesEnd::name()` returns `QName`, and
56+
`BytesStart::local_name()` and `BytesEnd::local_name()` returns `LocalName`
57+
4558
### New Tests
4659

4760
- [#9]: Added tests for incorrect nested tags in input
4861
- [#387]: Added a bunch of tests for sequences deserialization
62+
- [#393]: Added more tests for namespace resolver
63+
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
4964

5065
[#8]: https://github.com/Mingun/fast-xml/pull/8
5166
[#9]: https://github.com/Mingun/fast-xml/pull/9
5267
[#387]: https://github.com/tafia/quick-xml/pull/387
5368
[#391]: https://github.com/tafia/quick-xml/pull/391
69+
[#393]: https://github.com/tafia/quick-xml/pull/393
5470

5571
## 0.23.0 -- 2022-05-08
5672

examples/custom_entities.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3737
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
3838
}
3939
}
40-
Ok(Event::Start(ref e)) => match e.name() {
40+
Ok(Event::Start(ref e)) => match e.name().as_ref() {
4141
b"test" => println!(
4242
"attributes values: {:?}",
4343
e.attributes()

examples/issue68.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#![allow(unused)]
22

33
use quick_xml::events::Event;
4+
use quick_xml::name::Namespace;
45
use quick_xml::Reader;
6+
use std::convert::TryFrom;
57
use std::io::Read;
68

79
struct Resource {
@@ -81,9 +83,11 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {
8183

8284
loop {
8385
match reader.read_namespaced_event(&mut buf, &mut ns_buffer) {
84-
Ok((namespace_value, Event::Start(e))) => {
85-
let namespace_value = namespace_value.unwrap_or_default();
86-
match (depth, state, namespace_value, e.local_name()) {
86+
Ok((ns, Event::Start(e))) => {
87+
let ns = Option::<Namespace>::try_from(ns)
88+
.unwrap_or_default() // Treat unknown prefixes as not bound to any namespace
89+
.unwrap_or(Namespace(b""));
90+
match (depth, state, ns.as_ref(), e.local_name().as_ref()) {
8791
(0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus,
8892
(1, State::MultiStatus, b"DAV:", b"response") => {
8993
state = State::Response;
@@ -96,10 +100,11 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {
96100
}
97101
depth += 1;
98102
}
99-
Ok((namespace_value, Event::End(e))) => {
100-
let namespace_value = namespace_value.unwrap_or_default();
101-
let local_name = e.local_name();
102-
match (depth, state, &*namespace_value, local_name) {
103+
Ok((ns, Event::End(e))) => {
104+
let ns = Option::<Namespace>::try_from(ns)
105+
.unwrap_or_default() // Treat unknown prefixes as not bound to any namespace
106+
.unwrap_or(Namespace(b""));
107+
match (depth, state, ns.as_ref(), e.local_name().as_ref()) {
103108
(1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root,
104109
(2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus,
105110
_ => {}

examples/nested_readers.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ fn main() -> Result<(), quick_xml::Error> {
2121
let mut found_tables = Vec::new();
2222
loop {
2323
match reader.read_event(&mut buf)? {
24-
Event::Start(element) => match element.name() {
24+
Event::Start(element) => match element.name().as_ref() {
2525
b"w:tbl" => {
2626
count += 1;
2727
let mut stats = TableStat {
@@ -34,19 +34,21 @@ fn main() -> Result<(), quick_xml::Error> {
3434
loop {
3535
skip_buf.clear();
3636
match reader.read_event(&mut skip_buf)? {
37-
Event::Start(element) => match element.name() {
37+
Event::Start(element) => match element.name().as_ref() {
3838
b"w:tr" => {
3939
stats.rows.push(vec![]);
4040
row_index = stats.rows.len() - 1;
4141
}
4242
b"w:tc" => {
43-
stats.rows[row_index]
44-
.push(String::from_utf8(element.name().to_vec()).unwrap());
43+
stats.rows[row_index].push(
44+
String::from_utf8(element.name().as_ref().to_vec())
45+
.unwrap(),
46+
);
4547
}
4648
_ => {}
4749
},
4850
Event::End(element) => {
49-
if element.name() == b"w:tbl" {
51+
if element.name().as_ref() == b"w:tbl" {
5052
found_tables.push(stats);
5153
break;
5254
}

examples/read_texts.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn main() {
1313

1414
loop {
1515
match reader.read_event(&mut buf) {
16-
Ok(Event::Start(ref e)) if e.name() == b"tag2" => {
16+
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
1717
txt.push(
1818
reader
1919
.read_text(b"tag2", &mut Vec::new())

src/de/map.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ where
275275
let key = if let Some(p) = self
276276
.unflatten_fields
277277
.iter()
278-
.position(|f| e.name() == &f[UNFLATTEN_PREFIX.len()..])
278+
.position(|f| e.name().as_ref() == &f[UNFLATTEN_PREFIX.len()..])
279279
{
280280
// Used to deserialize elements, like:
281281
// <root>
@@ -290,7 +290,7 @@ where
290290
// }
291291
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
292292
} else {
293-
let name = Cow::Borrowed(e.local_name());
293+
let name = Cow::Borrowed(e.local_name().into_inner());
294294
seed.deserialize(EscapedDeserializer::new(name, decoder, false))
295295
};
296296
key.map(Some)
@@ -606,7 +606,7 @@ where
606606
// Stop iteration after reaching a closing tag
607607
DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
608608
// This is a unmatched closing tag, so the XML is invalid
609-
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
609+
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
610610
// We cannot get `Eof` legally, because we always inside of the
611611
// opened tag `self.map.start`
612612
DeEvent::Eof => Err(DeError::UnexpectedEof),

src/de/mod.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ pub use crate::errors::serialize::DeError;
221221
use crate::{
222222
errors::Error,
223223
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
224+
name::QName,
224225
reader::Decoder,
225226
Reader,
226227
};
@@ -514,16 +515,16 @@ where
514515
match self.write.back() {
515516
// Skip all subtree, if we skip a start event
516517
Some(DeEvent::Start(e)) => {
517-
let end = e.name().to_owned();
518+
let end = e.name().as_ref().to_owned();
518519
let mut depth = 0;
519520
loop {
520521
let event = self.next()?;
521522
match event {
522-
DeEvent::Start(ref e) if e.name() == end => {
523+
DeEvent::Start(ref e) if e.name().as_ref() == end => {
523524
self.skip_event(event)?;
524525
depth += 1;
525526
}
526-
DeEvent::End(ref e) if e.name() == end => {
527+
DeEvent::End(ref e) if e.name().as_ref() == end => {
527528
self.skip_event(event)?;
528529
if depth == 0 {
529530
return Ok(());
@@ -571,7 +572,9 @@ where
571572
let e = self.next()?;
572573
match e {
573574
DeEvent::Start(e) => return Ok(Some(e)),
574-
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
575+
DeEvent::End(e) => {
576+
return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned()))
577+
}
575578
DeEvent::Eof => return Ok(None),
576579
_ => (), // ignore texts
577580
}
@@ -631,20 +634,24 @@ where
631634
DeEvent::Text(t) if unescape => t.unescape()?,
632635
DeEvent::Text(t) => BytesCData::new(t.into_inner()),
633636
DeEvent::CData(t) => t,
634-
DeEvent::Start(s) => return Err(DeError::UnexpectedStart(s.name().to_owned())),
637+
DeEvent::Start(s) => {
638+
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
639+
}
635640
// We can get End event in case of `<tag></tag>` or `<tag/>` input
636641
// Return empty text in that case
637642
DeEvent::End(end) if end.name() == e.name() => {
638643
return Ok(BytesCData::new(&[] as &[u8]));
639644
}
640-
DeEvent::End(end) => return Err(DeError::UnexpectedEnd(end.name().to_owned())),
645+
DeEvent::End(end) => {
646+
return Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned()))
647+
}
641648
DeEvent::Eof => return Err(DeError::UnexpectedEof),
642649
};
643650
self.read_to_end(e.name())?;
644651
Ok(t)
645652
}
646-
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().to_owned())),
647-
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
653+
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
654+
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
648655
DeEvent::Eof => Err(DeError::UnexpectedEof),
649656
}
650657
}
@@ -658,7 +665,7 @@ where
658665
/// Drops all events until event with [name](BytesEnd::name()) `name` won't be
659666
/// dropped. This method should be called after [`Self::next()`]
660667
#[cfg(feature = "overlapped-lists")]
661-
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
668+
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
662669
let mut depth = 0;
663670
loop {
664671
match self.read.pop_front() {
@@ -682,7 +689,7 @@ where
682689
}
683690
}
684691
#[cfg(not(feature = "overlapped-lists"))]
685-
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
692+
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
686693
// First one might be in self.peek
687694
match self.next()? {
688695
DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
@@ -751,10 +758,10 @@ where
751758
{
752759
// Try to go to the next `<tag ...>...</tag>` or `<tag .../>`
753760
if let Some(e) = self.next_start()? {
754-
let name = e.name().to_vec();
761+
let name = e.name().as_ref().to_vec();
755762
let map = map::MapAccess::new(self, e, fields)?;
756763
let value = visitor.visit_map(map)?;
757-
self.read_to_end(&name)?;
764+
self.read_to_end(QName(&name))?;
758765
Ok(value)
759766
} else {
760767
Err(DeError::ExpectedStart)
@@ -789,7 +796,7 @@ where
789796
visitor.visit_unit()
790797
}
791798
DeEvent::Text(_) | DeEvent::CData(_) => visitor.visit_unit(),
792-
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
799+
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
793800
DeEvent::Eof => Err(DeError::UnexpectedEof),
794801
}
795802
}
@@ -893,7 +900,7 @@ where
893900
{
894901
match self.next()? {
895902
DeEvent::Start(e) => self.read_to_end(e.name())?,
896-
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
903+
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
897904
DeEvent::Eof => return Err(DeError::UnexpectedEof),
898905
_ => (),
899906
}
@@ -925,7 +932,7 @@ pub trait XmlRead<'i> {
925932

926933
/// Skips until end element is found. Unlike `next()` it will not allocate
927934
/// when it cannot satisfy the lifetime.
928-
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError>;
935+
fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
929936

930937
/// A copy of the reader's decoder used to decode strings.
931938
fn decoder(&self) -> Decoder;
@@ -960,7 +967,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
960967
event
961968
}
962969

963-
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
970+
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
964971
match self.reader.read_to_end(name, &mut self.buf) {
965972
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
966973
other => Ok(other?),
@@ -996,7 +1003,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
9961003
}
9971004
}
9981005

999-
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
1006+
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
10001007
match self.reader.read_to_end_unbuffered(name) {
10011008
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
10021009
other => Ok(other?),
@@ -1212,7 +1219,7 @@ mod tests {
12121219
de.next().unwrap(),
12131220
Start(BytesStart::borrowed_name(b"target"))
12141221
);
1215-
de.read_to_end(b"target").unwrap();
1222+
de.read_to_end(QName(b"target")).unwrap();
12161223
assert_eq!(de.read, vec![]);
12171224
assert_eq!(
12181225
de.write,
@@ -1252,7 +1259,7 @@ mod tests {
12521259
de.next().unwrap(),
12531260
Start(BytesStart::borrowed_name(b"skip"))
12541261
);
1255-
de.read_to_end(b"skip").unwrap();
1262+
de.read_to_end(QName(b"skip")).unwrap();
12561263

12571264
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
12581265
}
@@ -1313,7 +1320,7 @@ mod tests {
13131320
de.next().unwrap(),
13141321
Start(BytesStart::borrowed(br#"tag a="1""#, 3))
13151322
);
1316-
assert_eq!(de.read_to_end(b"tag").unwrap(), ());
1323+
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
13171324

13181325
assert_eq!(
13191326
de.next().unwrap(),
@@ -1329,7 +1336,7 @@ mod tests {
13291336
de.next().unwrap(),
13301337
Start(BytesStart::borrowed(b"self-closed", 11))
13311338
);
1332-
assert_eq!(de.read_to_end(b"self-closed").unwrap(), ());
1339+
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
13331340

13341341
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
13351342
assert_eq!(de.next().unwrap(), Eof);
@@ -1432,7 +1439,7 @@ mod tests {
14321439
reader.next().unwrap(),
14331440
DeEvent::Start(BytesStart::borrowed(b"item ", 4))
14341441
);
1435-
reader.read_to_end(b"item").unwrap();
1442+
reader.read_to_end(QName(b"item")).unwrap();
14361443
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
14371444
}
14381445

src/de/seq.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ pub fn not_in(
1515
decoder: Decoder,
1616
) -> Result<bool, DeError> {
1717
#[cfg(not(feature = "encoding"))]
18-
let tag = Cow::Borrowed(decoder.decode(start.name())?);
18+
let tag = Cow::Borrowed(decoder.decode(start.name().into_inner())?);
1919

2020
#[cfg(feature = "encoding")]
21-
let tag = decoder.decode(start.name());
21+
let tag = decoder.decode(start.name().into_inner());
2222

2323
Ok(fields.iter().all(|&field| field != tag.as_ref()))
2424
}

src/de/var.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ where
3838
DeEvent::Text(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, true),
3939
// Escape sequences does not processed inside CDATA section
4040
DeEvent::CData(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, false),
41-
DeEvent::Start(e) => EscapedDeserializer::new(Cow::Borrowed(e.name()), decoder, false),
41+
DeEvent::Start(e) => {
42+
EscapedDeserializer::new(Cow::Borrowed(e.name().into_inner()), decoder, false)
43+
}
4244
_ => {
4345
return Err(DeError::Unsupported(
4446
"Invalid event for Enum, expecting `Text` or `Start`",

0 commit comments

Comments
 (0)