@@ -24,6 +24,32 @@ use crate::reader::state::ReaderState;
24
24
#[ cfg_attr( feature = "serde-types" , derive( serde:: Deserialize , serde:: Serialize ) ) ]
25
25
#[ non_exhaustive]
26
26
pub struct Config {
27
+ /// Whether lone ampersand character (without a paired semicolon) should be
28
+ /// allowed in textual content. Unless enabled, in case of a dangling ampersand,
29
+ /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods.
30
+ ///
31
+ /// Default: `false`
32
+ ///
33
+ /// # Example
34
+ ///
35
+ /// ```
36
+ /// # use quick_xml::events::{BytesRef, BytesText, Event};
37
+ /// # use quick_xml::reader::Reader;
38
+ /// # use pretty_assertions::assert_eq;
39
+ /// let mut reader = Reader::from_str("text with & & & alone");
40
+ /// reader.config_mut().allow_dangling_amp = true;
41
+ ///
42
+ /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with ")));
43
+ /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& ")));
44
+ /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp")));
45
+ /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" ")));
46
+ /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone")));
47
+ /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
48
+ /// ```
49
+ ///
50
+ /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference
51
+ pub allow_dangling_amp : bool ,
52
+
27
53
/// Whether unmatched closing tag names should be allowed. Unless enabled,
28
54
/// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`]
29
55
/// is returned from read methods.
@@ -210,6 +236,7 @@ impl Config {
210
236
impl Default for Config {
211
237
fn default ( ) -> Self {
212
238
Self {
239
+ allow_dangling_amp : false ,
213
240
allow_unmatched_ends : false ,
214
241
check_comments : false ,
215
242
check_end_names : true ,
@@ -261,18 +288,29 @@ macro_rules! read_event_impl {
261
288
Ok ( Event :: GeneralRef ( BytesRef :: wrap( & bytes[ 1 ..] , $self. decoder( ) ) ) )
262
289
}
263
290
// Go to Done state
264
- ReadRefResult :: UpToEof => {
291
+ ReadRefResult :: UpToEof ( bytes) if $self. state. config. allow_dangling_amp => {
292
+ $self. state. state = ParseState :: Done ;
293
+ Ok ( Event :: Text ( $self. state. emit_text( bytes) ) )
294
+ }
295
+ ReadRefResult :: UpToEof ( _) => {
265
296
$self. state. state = ParseState :: Done ;
266
297
$self. state. last_error_offset = start;
267
298
Err ( Error :: IllFormed ( IllFormedError :: UnclosedReference ) )
268
299
}
269
300
// Do not change state, stay in InsideRef
270
- ReadRefResult :: UpToRef => {
301
+ ReadRefResult :: UpToRef ( bytes) if $self. state. config. allow_dangling_amp => {
302
+ Ok ( Event :: Text ( $self. state. emit_text( bytes) ) )
303
+ }
304
+ ReadRefResult :: UpToRef ( _) => {
271
305
$self. state. last_error_offset = start;
272
306
Err ( Error :: IllFormed ( IllFormedError :: UnclosedReference ) )
273
307
}
274
308
// Go to InsideMarkup state
275
- ReadRefResult :: UpToMarkup => {
309
+ ReadRefResult :: UpToMarkup ( bytes) if $self. state. config. allow_dangling_amp => {
310
+ $self. state. state = ParseState :: InsideMarkup ;
311
+ Ok ( Event :: Text ( $self. state. emit_text( bytes) ) )
312
+ }
313
+ ReadRefResult :: UpToMarkup ( _) => {
276
314
$self. state. state = ParseState :: InsideMarkup ;
277
315
$self. state. last_error_offset = start;
278
316
Err ( Error :: IllFormed ( IllFormedError :: UnclosedReference ) )
@@ -997,13 +1035,13 @@ enum ReadRefResult<'r> {
997
1035
/// Contains text block up to EOF. Neither end of reference (`;`), start of
998
1036
/// another reference (`&`) or start of markup (`<`) characters was found.
999
1037
/// Result includes start `&`.
1000
- UpToEof ,
1038
+ UpToEof ( & ' r [ u8 ] ) ,
1001
1039
/// Contains text block up to next possible reference (`&` character).
1002
1040
/// Result includes start `&`.
1003
- UpToRef ,
1041
+ UpToRef ( & ' r [ u8 ] ) ,
1004
1042
/// Contains text block up to start of markup (`<` character).
1005
1043
/// Result includes start `&`.
1006
- UpToMarkup ,
1044
+ UpToMarkup ( & ' r [ u8 ] ) ,
1007
1045
/// IO error occurred.
1008
1046
Err ( io:: Error ) ,
1009
1047
}
@@ -1722,8 +1760,8 @@ mod test {
1722
1760
// ^= 2
1723
1761
1724
1762
match $source( & mut input) . read_ref( buf, & mut position) $( . $await) ? {
1725
- ReadRefResult :: UpToEof => ( ) ,
1726
- x => panic!( "Expected `UpToEof`, but got `{:?}`" , x) ,
1763
+ ReadRefResult :: UpToEof ( bytes ) => assert_eq! ( Bytes ( bytes ) , Bytes ( b"&" ) ) ,
1764
+ x => panic!( "Expected `UpToEof(_) `, but got `{:?}`" , x) ,
1727
1765
}
1728
1766
assert_eq!( position, 2 ) ;
1729
1767
}
@@ -1736,8 +1774,8 @@ mod test {
1736
1774
// ^= 2
1737
1775
1738
1776
match $source( & mut input) . read_ref( buf, & mut position) $( . $await) ? {
1739
- ReadRefResult :: UpToRef => ( ) ,
1740
- x => panic!( "Expected `UpToRef`, but got `{:?}`" , x) ,
1777
+ ReadRefResult :: UpToRef ( bytes ) => assert_eq! ( Bytes ( bytes ) , Bytes ( b"&" ) ) ,
1778
+ x => panic!( "Expected `UpToRef(_) `, but got `{:?}`" , x) ,
1741
1779
}
1742
1780
assert_eq!( position, 2 ) ;
1743
1781
}
@@ -1750,8 +1788,8 @@ mod test {
1750
1788
// ^= 3
1751
1789
1752
1790
match $source( & mut input) . read_ref( buf, & mut position) $( . $await) ? {
1753
- ReadRefResult :: UpToMarkup => ( ) ,
1754
- x => panic!( "Expected `UpToMarkup`, but got `{:?}`" , x) ,
1791
+ ReadRefResult :: UpToMarkup ( bytes ) => assert_eq! ( Bytes ( bytes ) , Bytes ( b"&" ) ) ,
1792
+ x => panic!( "Expected `UpToMarkup(_) `, but got `{:?}`" , x) ,
1755
1793
}
1756
1794
assert_eq!( position, 3 ) ;
1757
1795
}
0 commit comments