From 42d71232f5456ddb160216ece8d9e15e5245adbf Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 26 Jun 2024 23:04:33 +0500 Subject: [PATCH] Do not allow (positive) sign in character references Negative sing will return ParseIntError(InvalidDigit) because we parse into unsigned number, but `+` sign is allowed by the parse method. To return the same error for both `+` and `-` we check for sign itself before parse --- Changelog.md | 4 ++- src/escape.rs | 74 +++++++++++++++++++++++++++----------- tests/escape.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 144 insertions(+), 30 deletions(-) diff --git a/Changelog.md b/Changelog.md index cf4a8486..abcd6625 100644 --- a/Changelog.md +++ b/Changelog.md @@ -32,9 +32,11 @@ - [#771]: `EscapeError::UnrecognizedSymbol` renamed to `EscapeError::UnrecognizedEntity`. - [#771]: Implemented `PartialEq` for `EscapeError`. - [#771]: Replace the following variants of `EscapeError` by `InvalidCharRef` variant - with a standard `ParseIntError` inside: + with a new `ParseCharRefError` inside: + - `EntityWithNull` - `InvalidDecimal` - `InvalidHexadecimal` + - `InvalidCodepoint` [#771]: https://github.com/tafia/quick-xml/pull/771 [#772]: https://github.com/tafia/quick-xml/pull/772 diff --git a/src/escape.rs b/src/escape.rs index d4481d74..76c7f7e6 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -5,30 +5,56 @@ use std::borrow::Cow; use std::num::ParseIntError; use std::ops::Range; +/// Error of parsing character reference (`&#;` or `&#x;`). +#[derive(Clone, Debug, PartialEq)] +pub enum ParseCharRefError { + /// Number contains sign character (`+` or `-`) which is not allowed. + UnexpectedSign, + /// Number cannot be parsed due to non-number characters or a numeric overflow. + InvalidNumber(ParseIntError), + /// Character reference represents not a valid unicode codepoint. + InvalidCodepoint(u32), + /// Character reference expanded to a not permitted character for an XML. + /// + /// Currently, only `0x0` character produces this error. + IllegalCharacter(u32), +} + +impl std::fmt::Display for ParseCharRefError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::UnexpectedSign => f.write_str("unexpected number sign"), + Self::InvalidNumber(e) => e.fmt(f), + Self::InvalidCodepoint(n) => write!(f, "`{}` is not a valid codepoint", n), + Self::IllegalCharacter(n) => write!(f, "0x{:x} character is not permitted in XML", n), + } + } +} + +impl std::error::Error for ParseCharRefError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::InvalidNumber(e) => Some(e), + _ => None, + } + } +} + /// Error for XML escape / unescape. #[derive(Clone, Debug, PartialEq)] pub enum EscapeError { - /// Entity with Null character - EntityWithNull(Range), /// Referenced entity in unknown to the parser. UnrecognizedEntity(Range, String), /// Cannot find `;` after `&` UnterminatedEntity(Range), /// Attempt to parse character reference (`&#;` or `&#x;`) /// was unsuccessful, not all characters are decimal or hexadecimal numbers. - InvalidCharRef(ParseIntError), - /// Not a valid unicode codepoint - InvalidCodepoint(u32), + InvalidCharRef(ParseCharRefError), } impl std::fmt::Display for EscapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - EscapeError::EntityWithNull(e) => write!( - f, - "Error while escaping character at range {:?}: Null character entity not allowed", - e - ), EscapeError::UnrecognizedEntity(rge, res) => { write!(f, "at {:?}: unrecognized entity `{}`", rge, res) } @@ -40,7 +66,6 @@ impl std::fmt::Display for EscapeError { EscapeError::InvalidCharRef(e) => { write!(f, "invalid character reference: {}", e) } - EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n), } } } @@ -246,7 +271,7 @@ where // search for character correctness let pat = &raw[start + 1..end]; if let Some(entity) = pat.strip_prefix('#') { - let codepoint = parse_number(entity, start..end)?; + let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); } else if let Some(value) = resolve_entity(pat) { unescaped.push_str(value); @@ -1791,18 +1816,27 @@ pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> { Some(s) } -fn parse_number(bytes: &str, range: Range) -> Result { - let code = if let Some(hex_digits) = bytes.strip_prefix('x') { - u32::from_str_radix(hex_digits, 16) +fn parse_number(num: &str) -> Result { + let code = if let Some(hex) = num.strip_prefix('x') { + from_str_radix(hex, 16)? } else { - u32::from_str_radix(bytes, 10) - } - .map_err(EscapeError::InvalidCharRef)?; + from_str_radix(num, 10)? + }; if code == 0 { - return Err(EscapeError::EntityWithNull(range)); + return Err(ParseCharRefError::IllegalCharacter(code)); } match std::char::from_u32(code) { Some(c) => Ok(c), - None => Err(EscapeError::InvalidCodepoint(code)), + None => Err(ParseCharRefError::InvalidCodepoint(code)), + } +} + +#[inline] +fn from_str_radix(src: &str, radix: u32) -> Result { + match src.as_bytes().first().copied() { + // We should not allow sign numbers, but u32::from_str_radix will accept `+`. + // We also handle `-` to be consistent in returned errors + Some(b'+') | Some(b'-') => Err(ParseCharRefError::UnexpectedSign), + _ => u32::from_str_radix(src, radix).map_err(ParseCharRefError::InvalidNumber), } } diff --git a/tests/escape.rs b/tests/escape.rs index 0aecc440..894c97c2 100644 --- a/tests/escape.rs +++ b/tests/escape.rs @@ -1,5 +1,5 @@ use pretty_assertions::assert_eq; -use quick_xml::escape::{self, EscapeError}; +use quick_xml::escape::{self, EscapeError, ParseCharRefError}; use std::borrow::Cow; use std::num::IntErrorKind; @@ -93,15 +93,54 @@ fn unescape_long() { // Too big numbers for u32 should produce errors match escape::unescape(&format!("&#{};", u32::MAX as u64 + 1)) { - Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow), - x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x), + Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { + assert_eq!(err.kind(), &IntErrorKind::PosOverflow) + } + x => panic!( + "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", + x + ), } match escape::unescape(&format!("&#x{:x};", u32::MAX as u64 + 1)) { - Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow), - x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x), + Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { + assert_eq!(err.kind(), &IntErrorKind::PosOverflow) + } + x => panic!( + "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", + x + ), } } +#[test] +fn unescape_sign() { + assert_eq!( + escape::unescape("&#+48;"), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + assert_eq!( + escape::unescape("&#x+30;"), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + + assert_eq!( + escape::unescape("&#-48;"), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + assert_eq!( + escape::unescape("&#x-30;"), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); +} + #[test] fn unescape_with() { let custom_entities = |ent: &str| match ent { @@ -156,11 +195,50 @@ fn unescape_with_long() { // Too big numbers for u32 should produce errors match escape::unescape_with(&format!("&#{};", u32::MAX as u64 + 1), |_| None) { - Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow), - x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x), + Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { + assert_eq!(err.kind(), &IntErrorKind::PosOverflow) + } + x => panic!( + "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", + x + ), } match escape::unescape_with(&format!("&#x{:x};", u32::MAX as u64 + 1), |_| None) { - Err(EscapeError::InvalidCharRef(err)) => assert_eq!(err.kind(), &IntErrorKind::PosOverflow), - x => panic!("expected Err(InvalidCharRef(PosOverflow)), bug got {:?}", x), + Err(EscapeError::InvalidCharRef(ParseCharRefError::InvalidNumber(err))) => { + assert_eq!(err.kind(), &IntErrorKind::PosOverflow) + } + x => panic!( + "expected Err(InvalidCharRef(InvalidNumber(PosOverflow))), bug got {:?}", + x + ), } } + +#[test] +fn unescape_with_sign() { + assert_eq!( + escape::unescape_with("&#+48;", |_| None), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + assert_eq!( + escape::unescape_with("&#x+30;", |_| None), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + + assert_eq!( + escape::unescape_with("&#-48;", |_| None), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); + assert_eq!( + escape::unescape_with("&#x-30;", |_| None), + Err(EscapeError::InvalidCharRef( + ParseCharRefError::UnexpectedSign + )), + ); +}