@@ -361,7 +361,7 @@ macro_rules! read_until_close {
361
361
} ,
362
362
// `<?` - processing instruction
363
363
Ok ( Some ( b'?' ) ) => match $reader
364
- . read_pi ( $buf, & mut $self. state. offset)
364
+ . read_with ( PiParser :: default ( ) , $buf, & mut $self. state. offset)
365
365
$( . $await) ?
366
366
{
367
367
Ok ( bytes) => $self. state. emit_question_mark( bytes) ,
@@ -374,7 +374,7 @@ macro_rules! read_until_close {
374
374
} ,
375
375
// `<...` - opening or self-closed tag
376
376
Ok ( Some ( _) ) => match $reader
377
- . read_element ( $buf, & mut $self. state. offset)
377
+ . read_with ( ElementParser :: default ( ) , $buf, & mut $self. state. offset)
378
378
$( . $await) ?
379
379
{
380
380
Ok ( bytes) => $self. state. emit_start( bytes) ,
@@ -763,6 +763,26 @@ impl<R> Reader<R> {
763
763
764
764
////////////////////////////////////////////////////////////////////////////////////////////////////
765
765
766
+ /// Used to decouple reading of data from data source and parsing XML structure from it.
767
+ /// This is a state preserved between getting chunks of bytes from the reader.
768
+ ///
769
+ /// This trait is implemented for every parser that processes piece of XML grammar.
770
+ pub trait Parser {
771
+ /// Process new data and try to determine end of the parsed thing.
772
+ ///
773
+ /// Returns position of the end of thing in `bytes` in case of successful search
774
+ /// and `None` otherwise.
775
+ ///
776
+ /// # Parameters
777
+ /// - `bytes`: a slice to find the end of a thing.
778
+ /// Should contain text in ASCII-compatible encoding
779
+ fn feed ( & mut self , bytes : & [ u8 ] ) -> Option < usize > ;
780
+
781
+ /// Returns parse error produced by this parser in case of reaching end of
782
+ /// input without finding the end of a parsed thing.
783
+ fn eof_error ( ) -> SyntaxError ;
784
+ }
785
+
766
786
/// Represents an input for a reader that can return borrowed data.
767
787
///
768
788
/// There are two implementors of this trait: generic one that read data from
@@ -821,20 +841,25 @@ trait XmlSource<'r, B> {
821
841
822
842
/// Read input until processing instruction is finished.
823
843
///
824
- /// This method expect that `<?` already was read.
844
+ /// This method expect that start sequence of a parser already was read.
825
845
///
826
- /// Returns a slice of data read up to end of processing instruction (`>`),
827
- /// which does not include into result (`?` at the end included) .
846
+ /// Returns a slice of data read up to the end of the thing being parsed.
847
+ /// The end of thing and the returned content is determined by the used parser .
828
848
///
829
- /// If input (`Self`) is exhausted and nothing was read, returns `None`.
849
+ /// If input (`Self`) is exhausted and no bytes was read, or if the specified
850
+ /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
830
851
///
831
852
/// # Parameters
832
853
/// - `buf`: Buffer that could be filled from an input (`Self`) and
833
854
/// from which [events] could borrow their data
834
855
/// - `position`: Will be increased by amount of bytes consumed
835
856
///
857
+ /// A `P` type parameter is used to preserve state between calls to the underlying
858
+ /// reader which provides bytes fed into the parser.
836
859
/// [events]: crate::events::Event
837
- fn read_pi ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
860
+ fn read_with < P > ( & mut self , parser : P , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] >
861
+ where
862
+ P : Parser ;
838
863
839
864
/// Read input until comment or CDATA is finished.
840
865
///
@@ -853,30 +878,6 @@ trait XmlSource<'r, B> {
853
878
/// [events]: crate::events::Event
854
879
fn read_bang_element ( & mut self , buf : B , position : & mut usize ) -> Result < ( BangType , & ' r [ u8 ] ) > ;
855
880
856
- /// Read input until XML element is closed by approaching a `>` symbol.
857
- /// Returns a buffer that contains a data between `<` and `>` or
858
- /// [`SyntaxError::UnclosedTag`] if end-of-input was reached before reading `>`.
859
- ///
860
- /// Derived from `read_until`, but modified to handle XML attributes
861
- /// using a minimal state machine.
862
- ///
863
- /// Attribute values are [defined] as follows:
864
- /// ```plain
865
- /// AttValue := '"' (([^<&"]) | Reference)* '"'
866
- /// | "'" (([^<&']) | Reference)* "'"
867
- /// ```
868
- /// (`Reference` is something like `"`, but we don't care about
869
- /// escaped characters at this level)
870
- ///
871
- /// # Parameters
872
- /// - `buf`: Buffer that could be filled from an input (`Self`) and
873
- /// from which [events] could borrow their data
874
- /// - `position`: Will be increased by amount of bytes consumed
875
- ///
876
- /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
877
- /// [events]: crate::events::Event
878
- fn read_element ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
879
-
880
881
/// Consume and discard all the whitespace until the next non-whitespace
881
882
/// character or EOF.
882
883
///
@@ -1510,6 +1511,7 @@ mod test {
1510
1511
mod read_element {
1511
1512
use super :: * ;
1512
1513
use crate :: errors:: { Error , SyntaxError } ;
1514
+ use crate :: reader:: ElementParser ;
1513
1515
use crate :: utils:: Bytes ;
1514
1516
use pretty_assertions:: assert_eq;
1515
1517
@@ -1521,7 +1523,7 @@ mod test {
1521
1523
let mut input = b"" . as_ref( ) ;
1522
1524
// ^= 1
1523
1525
1524
- match $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? {
1526
+ match $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? {
1525
1527
Err ( Error :: Syntax ( SyntaxError :: UnclosedTag ) ) => { }
1526
1528
x => panic!(
1527
1529
"Expected `Err(Syntax(UnclosedTag))`, but got `{:?}`" ,
@@ -1543,7 +1545,7 @@ mod test {
1543
1545
// ^= 2
1544
1546
1545
1547
assert_eq!(
1546
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1548
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1547
1549
Bytes ( b"" )
1548
1550
) ;
1549
1551
assert_eq!( position, 2 ) ;
@@ -1557,7 +1559,7 @@ mod test {
1557
1559
// ^= 5
1558
1560
1559
1561
assert_eq!(
1560
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1562
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1561
1563
Bytes ( b"tag" )
1562
1564
) ;
1563
1565
assert_eq!( position, 5 ) ;
@@ -1571,7 +1573,7 @@ mod test {
1571
1573
// ^= 3
1572
1574
1573
1575
assert_eq!(
1574
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1576
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1575
1577
Bytes ( b":" )
1576
1578
) ;
1577
1579
assert_eq!( position, 3 ) ;
@@ -1585,7 +1587,7 @@ mod test {
1585
1587
// ^= 6
1586
1588
1587
1589
assert_eq!(
1588
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1590
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1589
1591
Bytes ( b":tag" )
1590
1592
) ;
1591
1593
assert_eq!( position, 6 ) ;
@@ -1599,7 +1601,7 @@ mod test {
1599
1601
// ^= 39
1600
1602
1601
1603
assert_eq!(
1602
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1604
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1603
1605
Bytes ( br#"tag attr-1=">" attr2 = '>' 3attr"# )
1604
1606
) ;
1605
1607
assert_eq!( position, 39 ) ;
@@ -1618,7 +1620,7 @@ mod test {
1618
1620
// ^= 3
1619
1621
1620
1622
assert_eq!(
1621
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1623
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1622
1624
Bytes ( b"/" )
1623
1625
) ;
1624
1626
assert_eq!( position, 3 ) ;
@@ -1632,7 +1634,7 @@ mod test {
1632
1634
// ^= 6
1633
1635
1634
1636
assert_eq!(
1635
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1637
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1636
1638
Bytes ( b"tag/" )
1637
1639
) ;
1638
1640
assert_eq!( position, 6 ) ;
@@ -1646,7 +1648,7 @@ mod test {
1646
1648
// ^= 4
1647
1649
1648
1650
assert_eq!(
1649
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1651
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1650
1652
Bytes ( b":/" )
1651
1653
) ;
1652
1654
assert_eq!( position, 4 ) ;
@@ -1660,7 +1662,7 @@ mod test {
1660
1662
// ^= 7
1661
1663
1662
1664
assert_eq!(
1663
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1665
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1664
1666
Bytes ( b":tag/" )
1665
1667
) ;
1666
1668
assert_eq!( position, 7 ) ;
@@ -1674,7 +1676,7 @@ mod test {
1674
1676
// ^= 42
1675
1677
1676
1678
assert_eq!(
1677
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1679
+ Bytes ( $source( & mut input) . read_with ( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1678
1680
Bytes ( br#"tag attr-1="/>" attr2 = '/>' 3attr/"# )
1679
1681
) ;
1680
1682
assert_eq!( position, 42 ) ;
0 commit comments