Skip to content

Commit b0a3c78

Browse files
committed
Position tracking: make spans optional.
Better support manual creation of BytesStart by making position information optional. Also replaces PartialEq derives with manual implementations to ensure that position information is not taken into account when comparing BytesStart objects - prevents observable and possibly breaking changes to comparison behaviour.
1 parent 3f778c2 commit b0a3c78

File tree

3 files changed

+44
-19
lines changed

3 files changed

+44
-19
lines changed

src/events/mod.rs

+24-10
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,28 @@ use attributes::{Attribute, Attributes};
5959
/// The name can be accessed using the [`name`] or [`local_name`] methods.
6060
/// An iterator over the attributes is returned by the [`attributes`] method.
6161
///
62+
/// If this was created from a reader, its location in the data can be accessed
63+
/// using the [`span`] method.
64+
///
6265
/// [`name`]: Self::name
6366
/// [`local_name`]: Self::local_name
6467
/// [`attributes`]: Self::attributes
65-
#[derive(Clone, Eq, PartialEq)]
68+
/// [`span`]: Self::span
69+
#[derive(Clone, Eq)]
6670
pub struct BytesStart<'a> {
6771
/// content of the element, before any utf8 conversion
6872
pub(crate) buf: Cow<'a, [u8]>,
6973
/// end of the element name, the name starts at that the start of `buf`
7074
pub(crate) name_len: usize,
71-
/// the position of the element in the input, this does not reflect updates to the struct
72-
pub(crate) span: Span,
75+
/// the position of the element in the input, this does not reflect updates to the struct and
76+
/// is not included in equality checking
77+
pub(crate) span: Option<Span>,
7378
}
7479

7580
impl<'a> BytesStart<'a> {
7681
/// Internal constructor, used by `Reader`. Supplies data in reader's encoding
7782
#[inline]
78-
pub(crate) fn wrap(content: &'a [u8], name_len: usize, span: Span) -> Self {
83+
pub(crate) fn wrap(content: &'a [u8], name_len: usize, span: Option<Span>) -> Self {
7984
BytesStart {
8085
buf: Cow::Borrowed(content),
8186
name_len,
@@ -93,7 +98,7 @@ impl<'a> BytesStart<'a> {
9398
let buf = str_cow_to_bytes(name);
9499
BytesStart {
95100
name_len: buf.len(),
96-
span: 0..buf.len(),
101+
span: None,
97102
buf,
98103
}
99104
}
@@ -110,7 +115,7 @@ impl<'a> BytesStart<'a> {
110115
let buf = str_cow_to_bytes(content);
111116

112117
BytesStart {
113-
span: 0..buf.len(),
118+
span: None,
114119
buf,
115120
name_len,
116121
}
@@ -187,14 +192,15 @@ impl<'a> BytesStart<'a> {
187192
self.name().into()
188193
}
189194

190-
/// Gets the range of bytes this element spans in the input stream.
195+
/// Gets the range of bytes this element spans in the input stream, if it
196+
/// came from one.
191197
///
192-
/// This does not reflect updates to the struct. I.e. if [`set_name`] is called, this will not
193-
/// update the span, as the underlying source is not altered by this operation.
198+
/// This does not reflect updates to the struct. For example, if [`set_name`] is called, it
199+
/// not update the span, as the underlying source is not altered by the operation.
194200
///
195201
/// [`set_name`]: Self::set_name
196202
#[inline]
197-
pub fn span(&self) -> Span {
203+
pub fn span(&self) -> Option<Span> {
198204
self.span.clone()
199205
}
200206

@@ -307,6 +313,14 @@ impl<'a> Deref for BytesStart<'a> {
307313
}
308314
}
309315

316+
// Manually implemented to prevent equality changes between items returned from reader versus
317+
// items created via public API.
318+
impl<'a> PartialEq for BytesStart<'a> {
319+
fn eq(&self, other: &Self) -> bool {
320+
self.buf == other.buf && self.name_len == other.name_len
321+
}
322+
}
323+
310324
////////////////////////////////////////////////////////////////////////////////////////////////////
311325

312326
/// An XML declaration (`Event::Decl`).

src/reader/mod.rs

+11
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,17 @@ pub(crate) fn is_whitespace(b: u8) -> bool {
858858
}
859859
}
860860

861+
/// A function to resolve the span of an XML element from its contents and its opening position.
862+
#[inline]
863+
pub(crate) fn resolve_span(buf: &[u8], open_pos: usize) -> Span {
864+
const L_BRACKET: usize = '<'.len_utf8();
865+
const R_BRACKET: usize = '>'.len_utf8();
866+
867+
let close_pos = open_pos + L_BRACKET + buf.len() + R_BRACKET;
868+
869+
open_pos..close_pos
870+
}
871+
861872
////////////////////////////////////////////////////////////////////////////////////////////////////
862873

863874
#[cfg(test)]

src/reader/parser.rs

+9-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::errors::{Error, Result};
66
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
77
#[cfg(feature = "encoding")]
88
use crate::reader::EncodingRef;
9-
use crate::reader::{is_whitespace, BangType, ParseState};
9+
use crate::reader::{is_whitespace, resolve_span, BangType, ParseState};
1010

1111
use memchr;
1212

@@ -180,13 +180,13 @@ impl Parser {
180180

181181
/// reads `BytesElement` starting with a `?`,
182182
/// return `Decl` or `PI` event
183-
pub fn read_question_mark<'b>(&mut self, buf: &'b [u8], position: usize) -> Result<Event<'b>> {
183+
pub fn read_question_mark<'b>(&mut self, buf: &'b [u8], open_pos: usize) -> Result<Event<'b>> {
184184
let len = buf.len();
185-
let end = position + len;
185+
let span = resolve_span(buf, open_pos);
186186
if len > 2 && buf[len - 1] == b'?' {
187187
if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
188188
let event =
189-
BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3, position..end));
189+
BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3, Some(span)));
190190

191191
// Try getting encoding from the declaration event
192192
#[cfg(feature = "encoding")]
@@ -208,10 +208,10 @@ impl Parser {
208208

209209
/// reads `BytesElement` starting with any character except `/`, `!` or ``?`
210210
/// return `Start` or `Empty` event
211-
pub fn read_start<'b>(&mut self, buf: &'b [u8], position: usize) -> Result<Event<'b>> {
211+
pub fn read_start<'b>(&mut self, buf: &'b [u8], open_pos: usize) -> Result<Event<'b>> {
212212
// TODO: do this directly when reading bufreader ...
213213
let len = buf.len();
214-
let end = position + len;
214+
let span = resolve_span(buf, open_pos);
215215
let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
216216
if let Some(&b'/') = buf.last() {
217217
let end = if name_end < len { name_end } else { len - 1 };
@@ -222,13 +222,13 @@ impl Parser {
222222
Ok(Event::Start(BytesStart::wrap(
223223
&buf[..len - 1],
224224
end,
225-
position..end,
225+
Some(span),
226226
)))
227227
} else {
228228
Ok(Event::Empty(BytesStart::wrap(
229229
&buf[..len - 1],
230230
end,
231-
position..end,
231+
Some(span),
232232
)))
233233
}
234234
} else {
@@ -237,7 +237,7 @@ impl Parser {
237237
// enabled, we should have that information
238238
self.opened_starts.push(self.opened_buffer.len());
239239
self.opened_buffer.extend(&buf[..name_end]);
240-
Ok(Event::Start(BytesStart::wrap(buf, name_end, position..end)))
240+
Ok(Event::Start(BytesStart::wrap(buf, name_end, Some(span))))
241241
}
242242
}
243243

0 commit comments

Comments
 (0)