Skip to content

Commit 3f778c2

Browse files
committed
Position tracking: add span to BytesStart
1 parent add7406 commit 3f778c2

File tree

3 files changed

+47
-11
lines changed

3 files changed

+47
-11
lines changed

src/events/mod.rs

+25-3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use crate::encoding::Decoder;
4848
use crate::errors::{Error, Result};
4949
use crate::escape::{escape, partial_escape, unescape_with};
5050
use crate::name::{LocalName, QName};
51+
use crate::reader::Span;
5152
use crate::utils::write_cow_string;
5253
use attributes::{Attribute, Attributes};
5354

@@ -67,15 +68,18 @@ pub struct BytesStart<'a> {
6768
pub(crate) buf: Cow<'a, [u8]>,
6869
/// end of the element name, the name starts at that the start of `buf`
6970
pub(crate) name_len: usize,
71+
/// the position of the element in the input, this does not reflect updates to the struct
72+
pub(crate) span: Span,
7073
}
7174

7275
impl<'a> BytesStart<'a> {
7376
/// Internal constructor, used by `Reader`. Supplies data in reader's encoding
7477
#[inline]
75-
pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
78+
pub(crate) fn wrap(content: &'a [u8], name_len: usize, span: Span) -> Self {
7679
BytesStart {
7780
buf: Cow::Borrowed(content),
7881
name_len,
82+
span,
7983
}
8084
}
8185

@@ -89,6 +93,7 @@ impl<'a> BytesStart<'a> {
8993
let buf = str_cow_to_bytes(name);
9094
BytesStart {
9195
name_len: buf.len(),
96+
span: 0..buf.len(),
9297
buf,
9398
}
9499
}
@@ -102,8 +107,11 @@ impl<'a> BytesStart<'a> {
102107
/// to generate invalid XML if `content` or `name_len` are incorrect.
103108
#[inline]
104109
pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
110+
let buf = str_cow_to_bytes(content);
111+
105112
BytesStart {
106-
buf: str_cow_to_bytes(content),
113+
span: 0..buf.len(),
114+
buf,
107115
name_len,
108116
}
109117
}
@@ -113,6 +121,7 @@ impl<'a> BytesStart<'a> {
113121
BytesStart {
114122
buf: Cow::Owned(self.buf.into_owned()),
115123
name_len: self.name_len,
124+
span: self.span,
116125
}
117126
}
118127

@@ -121,6 +130,7 @@ impl<'a> BytesStart<'a> {
121130
BytesStart {
122131
buf: Cow::Owned(self.buf.to_owned().into()),
123132
name_len: self.name_len,
133+
span: self.span(),
124134
}
125135
}
126136

@@ -153,6 +163,7 @@ impl<'a> BytesStart<'a> {
153163
BytesStart {
154164
buf: Cow::Borrowed(&self.buf),
155165
name_len: self.name_len,
166+
span: self.span(),
156167
}
157168
}
158169

@@ -176,6 +187,17 @@ impl<'a> BytesStart<'a> {
176187
self.name().into()
177188
}
178189

190+
/// Gets the range of bytes this element spans in the input stream.
191+
///
192+
/// This does not reflect updates to the struct. I.e. if [`set_name`] is called, this will not
193+
/// update the span, as the underlying source is not altered by this operation.
194+
///
195+
/// [`set_name`]: Self::set_name
196+
#[inline]
197+
pub fn span(&self) -> Span {
198+
self.span.clone()
199+
}
200+
179201
/// Edit the name of the BytesStart in-place
180202
///
181203
/// # Warning
@@ -273,7 +295,7 @@ impl<'a> Debug for BytesStart<'a> {
273295
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
274296
write!(f, "BytesStart {{ buf: ")?;
275297
write_cow_string(f, &self.buf)?;
276-
write!(f, ", name_len: {} }}", self.name_len)
298+
write!(f, ", name_len: {}, span: {:?} }}", self.name_len, self.span)
277299
}
278300
}
279301

src/reader/mod.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,9 @@ macro_rules! read_until_close {
213213
) => {{
214214
$self.parser.state = ParseState::ClosedTag;
215215

216+
// save the current offset of the parser to use later
217+
let position = $self.parser.offset;
218+
216219
match $reader.peek_one() $(.$await)? {
217220
// `<!` - comment, CDATA or DOCTYPE declaration
218221
Ok(Some(b'!')) => match $reader
@@ -238,7 +241,7 @@ macro_rules! read_until_close {
238241
$(.$await)?
239242
{
240243
Ok(None) => Ok(Event::Eof),
241-
Ok(Some(bytes)) => $self.parser.read_question_mark(bytes),
244+
Ok(Some(bytes)) => $self.parser.read_question_mark(bytes, position),
242245
Err(e) => Err(e),
243246
},
244247
// `<...` - opening or self-closed tag
@@ -247,7 +250,7 @@ macro_rules! read_until_close {
247250
$(.$await)?
248251
{
249252
Ok(None) => Ok(Event::Eof),
250-
Ok(Some(bytes)) => $self.parser.read_start(bytes),
253+
Ok(Some(bytes)) => $self.parser.read_start(bytes, position),
251254
Err(e) => Err(e),
252255
},
253256
Ok(None) => Ok(Event::Eof),

src/reader/parser.rs

+17-6
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,13 @@ impl Parser {
180180

181181
/// reads `BytesElement` starting with a `?`,
182182
/// return `Decl` or `PI` event
183-
pub fn read_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
183+
pub fn read_question_mark<'b>(&mut self, buf: &'b [u8], position: usize) -> Result<Event<'b>> {
184184
let len = buf.len();
185+
let end = position + len;
185186
if len > 2 && buf[len - 1] == b'?' {
186187
if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
187-
let event = BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3));
188+
let event =
189+
BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3, position..end));
188190

189191
// Try getting encoding from the declaration event
190192
#[cfg(feature = "encoding")]
@@ -206,27 +208,36 @@ impl Parser {
206208

207209
/// reads `BytesElement` starting with any character except `/`, `!` or ``?`
208210
/// return `Start` or `Empty` event
209-
pub fn read_start<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
211+
pub fn read_start<'b>(&mut self, buf: &'b [u8], position: usize) -> Result<Event<'b>> {
210212
// TODO: do this directly when reading bufreader ...
211213
let len = buf.len();
214+
let end = position + len;
212215
let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
213216
if let Some(&b'/') = buf.last() {
214217
let end = if name_end < len { name_end } else { len - 1 };
215218
if self.expand_empty_elements {
216219
self.state = ParseState::Empty;
217220
self.opened_starts.push(self.opened_buffer.len());
218221
self.opened_buffer.extend(&buf[..end]);
219-
Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end)))
222+
Ok(Event::Start(BytesStart::wrap(
223+
&buf[..len - 1],
224+
end,
225+
position..end,
226+
)))
220227
} else {
221-
Ok(Event::Empty(BytesStart::wrap(&buf[..len - 1], end)))
228+
Ok(Event::Empty(BytesStart::wrap(
229+
&buf[..len - 1],
230+
end,
231+
position..end,
232+
)))
222233
}
223234
} else {
224235
// #514: Always store names event when .check_end_names == false,
225236
// because checks can be temporary disabled and when they would be
226237
// enabled, we should have that information
227238
self.opened_starts.push(self.opened_buffer.len());
228239
self.opened_buffer.extend(&buf[..name_end]);
229-
Ok(Event::Start(BytesStart::wrap(buf, name_end)))
240+
Ok(Event::Start(BytesStart::wrap(buf, name_end, position..end)))
230241
}
231242
}
232243

0 commit comments

Comments
 (0)