@@ -361,7 +361,7 @@ macro_rules! read_until_close {
361
361
} ,
362
362
// `<?` - processing instruction
363
363
Ok ( Some ( b'?' ) ) => match $reader
364
- . read_pi ( $buf, & mut $self. state. offset)
364
+ . read :: < PiParser > ( $buf, & mut $self. state. offset)
365
365
$( . $await) ?
366
366
{
367
367
Ok ( bytes) => $self. state. emit_question_mark( bytes) ,
@@ -374,7 +374,7 @@ macro_rules! read_until_close {
374
374
} ,
375
375
// `<...` - opening or self-closed tag
376
376
Ok ( Some ( _) ) => match $reader
377
- . read_element ( $buf, & mut $self. state. offset)
377
+ . read :: < ElementParser > ( $buf, & mut $self. state. offset)
378
378
$( . $await) ?
379
379
{
380
380
Ok ( bytes) => $self. state. emit_start( bytes) ,
@@ -763,6 +763,25 @@ impl<R> Reader<R> {
763
763
764
764
////////////////////////////////////////////////////////////////////////////////////////////////////
765
765
766
+ /// Used to decouple reading of data from data source and parsing XML structure from it.
767
+ ///
768
+ /// This trait is implemented for every parser that processes piece of XML grammar.
769
+ pub trait Parser : Default {
770
+ /// Process new data and try to determine end of the parsed thing.
771
+ ///
772
+ /// Returns position of the end of thing in `bytes` in case of successful search
773
+ /// and `None` otherwise.
774
+ ///
775
+ /// # Parameters
776
+ /// - `bytes`: a slice to find the end of a thing
777
+ /// Should contain text in ASCII-compatible encoding
778
+ fn feed ( & mut self , bytes : & [ u8 ] ) -> Option < usize > ;
779
+
780
+ /// Returns parse error produced by this parser in case of reaching end of
781
+ /// input without finding the end of a parsed thing.
782
+ fn eof_error ( ) -> SyntaxError ;
783
+ }
784
+
766
785
/// Represents an input for a reader that can return borrowed data.
767
786
///
768
787
/// There are two implementors of this trait: generic one that read data from
@@ -821,20 +840,20 @@ trait XmlSource<'r, B> {
821
840
822
841
/// Read input until processing instruction is finished.
823
842
///
824
- /// This method expect that `<?` already was read.
843
+ /// This method expect that start sequence of a parser already was read.
825
844
///
826
- /// Returns a slice of data read up to end of processing instruction (`>`),
827
- /// which does not include into result (`?` at the end included) .
845
+ /// Returns a slice of data read up to end of a chunk, which does not include
846
+ /// into result.
828
847
///
829
- /// If input (`Self`) is exhausted and nothing was read, returns `None `.
848
+ /// If input (`Self`) is exhausted and nothing was read, returns `SyntaxError `.
830
849
///
831
850
/// # Parameters
832
851
/// - `buf`: Buffer that could be filled from an input (`Self`) and
833
852
/// from which [events] could borrow their data
834
853
/// - `position`: Will be increased by amount of bytes consumed
835
854
///
836
855
/// [events]: crate::events::Event
837
- fn read_pi ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
856
+ fn read < P : Parser > ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
838
857
839
858
/// Read input until comment or CDATA is finished.
840
859
///
@@ -853,30 +872,6 @@ trait XmlSource<'r, B> {
853
872
/// [events]: crate::events::Event
854
873
fn read_bang_element ( & mut self , buf : B , position : & mut usize ) -> Result < ( BangType , & ' r [ u8 ] ) > ;
855
874
856
- /// Read input until XML element is closed by approaching a `>` symbol.
857
- /// Returns a buffer that contains a data between `<` and `>` or
858
- /// [`SyntaxError::UnclosedTag`] if end-of-input was reached before reading `>`.
859
- ///
860
- /// Derived from `read_until`, but modified to handle XML attributes
861
- /// using a minimal state machine.
862
- ///
863
- /// Attribute values are [defined] as follows:
864
- /// ```plain
865
- /// AttValue := '"' (([^<&"]) | Reference)* '"'
866
- /// | "'" (([^<&']) | Reference)* "'"
867
- /// ```
868
- /// (`Reference` is something like `"`, but we don't care about
869
- /// escaped characters at this level)
870
- ///
871
- /// # Parameters
872
- /// - `buf`: Buffer that could be filled from an input (`Self`) and
873
- /// from which [events] could borrow their data
874
- /// - `position`: Will be increased by amount of bytes consumed
875
- ///
876
- /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
877
- /// [events]: crate::events::Event
878
- fn read_element ( & mut self , buf : B , position : & mut usize ) -> Result < & ' r [ u8 ] > ;
879
-
880
875
/// Consume and discard all the whitespace until the next non-whitespace
881
876
/// character or EOF.
882
877
///
@@ -1510,6 +1505,7 @@ mod test {
1510
1505
mod read_element {
1511
1506
use super :: * ;
1512
1507
use crate :: errors:: { Error , SyntaxError } ;
1508
+ use crate :: reader:: ElementParser ;
1513
1509
use crate :: utils:: Bytes ;
1514
1510
use pretty_assertions:: assert_eq;
1515
1511
@@ -1521,7 +1517,7 @@ mod test {
1521
1517
let mut input = b"" . as_ref( ) ;
1522
1518
// ^= 1
1523
1519
1524
- match $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? {
1520
+ match $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? {
1525
1521
Err ( Error :: Syntax ( SyntaxError :: UnclosedTag ) ) => { }
1526
1522
x => panic!(
1527
1523
"Expected `Err(Syntax(UnclosedTag))`, but got `{:?}`" ,
@@ -1543,7 +1539,7 @@ mod test {
1543
1539
// ^= 2
1544
1540
1545
1541
assert_eq!(
1546
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1542
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1547
1543
Bytes ( b"" )
1548
1544
) ;
1549
1545
assert_eq!( position, 2 ) ;
@@ -1557,7 +1553,7 @@ mod test {
1557
1553
// ^= 5
1558
1554
1559
1555
assert_eq!(
1560
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1556
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1561
1557
Bytes ( b"tag" )
1562
1558
) ;
1563
1559
assert_eq!( position, 5 ) ;
@@ -1571,7 +1567,7 @@ mod test {
1571
1567
// ^= 3
1572
1568
1573
1569
assert_eq!(
1574
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1570
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1575
1571
Bytes ( b":" )
1576
1572
) ;
1577
1573
assert_eq!( position, 3 ) ;
@@ -1585,7 +1581,7 @@ mod test {
1585
1581
// ^= 6
1586
1582
1587
1583
assert_eq!(
1588
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1584
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1589
1585
Bytes ( b":tag" )
1590
1586
) ;
1591
1587
assert_eq!( position, 6 ) ;
@@ -1599,7 +1595,7 @@ mod test {
1599
1595
// ^= 39
1600
1596
1601
1597
assert_eq!(
1602
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1598
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1603
1599
Bytes ( br#"tag attr-1=">" attr2 = '>' 3attr"# )
1604
1600
) ;
1605
1601
assert_eq!( position, 39 ) ;
@@ -1618,7 +1614,7 @@ mod test {
1618
1614
// ^= 3
1619
1615
1620
1616
assert_eq!(
1621
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1617
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1622
1618
Bytes ( b"/" )
1623
1619
) ;
1624
1620
assert_eq!( position, 3 ) ;
@@ -1632,7 +1628,7 @@ mod test {
1632
1628
// ^= 6
1633
1629
1634
1630
assert_eq!(
1635
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1631
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1636
1632
Bytes ( b"tag/" )
1637
1633
) ;
1638
1634
assert_eq!( position, 6 ) ;
@@ -1646,7 +1642,7 @@ mod test {
1646
1642
// ^= 4
1647
1643
1648
1644
assert_eq!(
1649
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1645
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1650
1646
Bytes ( b":/" )
1651
1647
) ;
1652
1648
assert_eq!( position, 4 ) ;
@@ -1660,7 +1656,7 @@ mod test {
1660
1656
// ^= 7
1661
1657
1662
1658
assert_eq!(
1663
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1659
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1664
1660
Bytes ( b":tag/" )
1665
1661
) ;
1666
1662
assert_eq!( position, 7 ) ;
@@ -1674,7 +1670,7 @@ mod test {
1674
1670
// ^= 42
1675
1671
1676
1672
assert_eq!(
1677
- Bytes ( $source( & mut input) . read_element ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1673
+ Bytes ( $source( & mut input) . read :: < ElementParser > ( buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1678
1674
Bytes ( br#"tag attr-1="/>" attr2 = '/>' 3attr/"# )
1679
1675
) ;
1680
1676
assert_eq!( position, 42 ) ;
0 commit comments