@@ -361,7 +361,7 @@ macro_rules! read_until_close {
361
361
} ,
362
362
// `<?` - processing instruction
363
363
Ok ( Some ( b'?' ) ) => match $reader
364
- . read_pi ( $buf, & mut $self. state. offset)
364
+ . read_with :: < PiParser > ( $buf, & mut $self. state. offset)
365
365
$( . $await) ?
366
366
{
367
367
Ok ( bytes) => $self. state. emit_question_mark( bytes) ,
@@ -374,7 +374,7 @@ macro_rules! read_until_close {
374
374
} ,
375
375
// `<...` - opening or self-closed tag
376
376
Ok ( Some ( _) ) => match $reader
377
- . read_element ( $buf, & mut $self. state. offset)
377
+ . read_with :: < ElementParser > ( $buf, & mut $self. state. offset)
378
378
$( . $await) ?
379
379
{
380
380
Ok ( bytes) => $self. state. emit_start( bytes) ,
@@ -763,6 +763,26 @@ impl<R> Reader<R> {
763
763
764
764
////////////////////////////////////////////////////////////////////////////////////////////////////
765
765
766
+ /// Used to decouple reading of data from data source and parsing XML structure from it.
767
+ /// This is a state preserved between getting chunks of bytes from the reader.
768
+ ///
769
+ /// This trait is implemented for every parser that processes piece of XML grammar.
770
+ pub trait Parser : Default {
771
+ /// Process new data and try to determine end of the parsed thing.
772
+ ///
773
+ /// Returns position of the end of thing in `bytes` in case of successful search
774
+ /// and `None` otherwise.
775
+ ///
776
+ /// # Parameters
777
+ /// - `bytes`: a slice to find the end of a thing.
778
+ /// Should contain text in ASCII-compatible encoding
779
+ fn feed ( & mut self , bytes : & [ u8 ] ) -> Option < usize > ;
780
+
781
+ /// Returns parse error produced by this parser in case of reaching end of
782
+ /// input without finding the end of a parsed thing.
783
+ fn eof_error ( ) -> SyntaxError ;
784
+ }
785
+
766
786
/// Represents an input for a reader that can return borrowed data.
767
787
///
768
788
/// There are two implementors of this trait: generic one that read data from
@@ -821,20 +841,23 @@ trait XmlSource<'r, B> {
821
841
822
842
/// Read input until processing instruction is finished.
823
843
///
824
- /// This method expect that `<?` already was read.
844
+ /// This method expect that start sequence of a parser already was read.
825
845
///
826
- /// Returns a slice of data read up to end of processing instruction (`>`),
827
- /// which does not include into result (`?` at the end included) .
846
+ /// Returns a slice of data read up to the end of the thing being parsed.
847
+ /// The end of thing and the returned content is determined by the used parser .
828
848
///
829
- /// If input (`Self`) is exhausted and nothing was read, returns `None`.
849
+ /// If input (`Self`) is exhausted and no bytes was read, or if the specified
850
+ /// parser could not find the ending sequence of the thing, returns `SyntaxError`.
830
851
///
831
852
/// # Parameters
832
853
/// - `buf`: Buffer that could be filled from an input (`Self`) and
833
854
/// from which [events] could borrow their data
834
855
/// - `position`: Will be increased by amount of bytes consumed
835
856
///
857
+ /// A `P` type parameter is used to preserve state between calls to the underlying
858
+ /// reader which provides bytes fed into the parser.
836
859
/// [events]: crate::events::Event
837
- fn read_pi ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
860
+ fn read_with < P : Parser > ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
838
861
839
862
/// Read input until comment or CDATA is finished.
840
863
///
@@ -853,30 +876,6 @@ trait XmlSource<'r, B> {
853
876
/// [events]: crate::events::Event
854
877
fn read_bang_element ( & mut self , buf : B , position : & mut usize ) -> Result < ( BangType , & ' r [ u8 ] ) > ;
855
878
856
- /// Read input until XML element is closed by approaching a `>` symbol.
857
- /// Returns a buffer that contains a data between `<` and `>` or
858
- /// [`SyntaxError::UnclosedTag`] if end-of-input was reached before reading `>`.
859
- ///
860
- /// Derived from `read_until`, but modified to handle XML attributes
861
- /// using a minimal state machine.
862
- ///
863
- /// Attribute values are [defined] as follows:
864
- /// ```plain
865
- /// AttValue := '"' (([^<&"]) | Reference)* '"'
866
- /// | "'" (([^<&']) | Reference)* "'"
867
- /// ```
868
- /// (`Reference` is something like `"`, but we don't care about
869
- /// escaped characters at this level)
870
- ///
871
- /// # Parameters
872
- /// - `buf`: Buffer that could be filled from an input (`Self`) and
873
- /// from which [events] could borrow their data
874
- /// - `position`: Will be increased by amount of bytes consumed
875
- ///
876
- /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
877
- /// [events]: crate::events::Event
878
- fn read_element ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
879
-
880
879
/// Consume and discard all the whitespace until the next non-whitespace
881
880
/// character or EOF.
882
881
///
@@ -1510,6 +1509,7 @@ mod test {
1510
1509
mod read_element {
1511
1510
use super :: * ;
1512
1511
use crate :: errors:: { Error , SyntaxError } ;
1512
+ use crate :: reader:: ElementParser ;
1513
1513
use crate :: utils:: Bytes ;
1514
1514
use pretty_assertions:: assert_eq;
1515
1515
@@ -1521,7 +1521,7 @@ mod test {
1521
1521
let mut input = b"" . as_ref( ) ;
1522
1522
// ^= 1
1523
1523
1524
- match $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? {
1524
+ match $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? {
1525
1525
Err ( Error :: Syntax ( SyntaxError :: UnclosedTag ) ) => { }
1526
1526
x => panic!(
1527
1527
"Expected `Err(Syntax(UnclosedTag))`, but got `{:?}`" ,
@@ -1543,7 +1543,7 @@ mod test {
1543
1543
// ^= 2
1544
1544
1545
1545
assert_eq!(
1546
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1546
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1547
1547
Bytes ( b"" )
1548
1548
) ;
1549
1549
assert_eq!( position, 2 ) ;
@@ -1557,7 +1557,7 @@ mod test {
1557
1557
// ^= 5
1558
1558
1559
1559
assert_eq!(
1560
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1560
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1561
1561
Bytes ( b"tag" )
1562
1562
) ;
1563
1563
assert_eq!( position, 5 ) ;
@@ -1571,7 +1571,7 @@ mod test {
1571
1571
// ^= 3
1572
1572
1573
1573
assert_eq!(
1574
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1574
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1575
1575
Bytes ( b":" )
1576
1576
) ;
1577
1577
assert_eq!( position, 3 ) ;
@@ -1585,7 +1585,7 @@ mod test {
1585
1585
// ^= 6
1586
1586
1587
1587
assert_eq!(
1588
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1588
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1589
1589
Bytes ( b":tag" )
1590
1590
) ;
1591
1591
assert_eq!( position, 6 ) ;
@@ -1599,7 +1599,7 @@ mod test {
1599
1599
// ^= 39
1600
1600
1601
1601
assert_eq!(
1602
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1602
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1603
1603
Bytes ( br#"tag attr-1=">" attr2 = '>' 3attr"# )
1604
1604
) ;
1605
1605
assert_eq!( position, 39 ) ;
@@ -1618,7 +1618,7 @@ mod test {
1618
1618
// ^= 3
1619
1619
1620
1620
assert_eq!(
1621
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1621
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1622
1622
Bytes ( b"/" )
1623
1623
) ;
1624
1624
assert_eq!( position, 3 ) ;
@@ -1632,7 +1632,7 @@ mod test {
1632
1632
// ^= 6
1633
1633
1634
1634
assert_eq!(
1635
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1635
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1636
1636
Bytes ( b"tag/" )
1637
1637
) ;
1638
1638
assert_eq!( position, 6 ) ;
@@ -1646,7 +1646,7 @@ mod test {
1646
1646
// ^= 4
1647
1647
1648
1648
assert_eq!(
1649
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1649
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1650
1650
Bytes ( b":/" )
1651
1651
) ;
1652
1652
assert_eq!( position, 4 ) ;
@@ -1660,7 +1660,7 @@ mod test {
1660
1660
// ^= 7
1661
1661
1662
1662
assert_eq!(
1663
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1663
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1664
1664
Bytes ( b":tag/" )
1665
1665
) ;
1666
1666
assert_eq!( position, 7 ) ;
@@ -1674,7 +1674,7 @@ mod test {
1674
1674
// ^= 42
1675
1675
1676
1676
assert_eq!(
1677
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1677
+ Bytes ( $source( & mut input) . read_with :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1678
1678
Bytes ( br#"tag attr-1="/>" attr2 = '/>' 3attr/"# )
1679
1679
) ;
1680
1680
assert_eq!( position, 42 ) ;
0 commit comments