From 8b303155e692780909e7a44a559a0db6e55424f5 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Sun, 24 Sep 2023 19:35:33 +0300 Subject: [PATCH] Improve panic messages for decoding errors --- src/decoder.rs | 121 +++++++++++++++++++++++++++++++++++-------------- src/tests.rs | 16 ++++++- 2 files changed, 101 insertions(+), 36 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 36242cb..b380686 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,9 +1,57 @@ //! `Decoder` and closely related types. -use compile_fmt::{clip, compile_assert, compile_panic, fmt}; +use compile_fmt::{clip_ascii, compile_assert, compile_panic, fmt, Ascii}; use crate::wrappers::{SkipWhitespace, Skipper}; +// Since `?` is not allowed in `const fn`s, we use its simplified version. +macro_rules! const_try { + ($result:expr) => { + match $result { + Ok(value) => value, + Err(err) => return Err(err), + } + }; +} + +#[derive(Debug)] +struct DecodeError { + invalid_char: u8, + // `None` for hex encoding + alphabet: Option>, +} + +impl DecodeError { + const fn invalid_char(invalid_char: u8, alphabet: Option>) -> Self { + Self { + invalid_char, + alphabet, + } + } + + const fn panic(self, input_pos: usize) -> ! { + if self.invalid_char.is_ascii() { + if let Some(alphabet) = self.alphabet { + compile_panic!( + "Character '", self.invalid_char as char => fmt::(), "' at position ", + input_pos => fmt::(), " is not a part of \ + the decoder alphabet '", alphabet => clip_ascii(64, ""), "'" + ); + } else { + compile_panic!( + "Character '", self.invalid_char as char => fmt::(), "' at position ", + input_pos => fmt::(), " is not a hex digit" + ); + } + } else { + compile_panic!( + "Non-ASCII character with decimal code ", self.invalid_char => fmt::(), + " encountered at position ", input_pos => fmt::() + ); + } + } +} + /// Custom encoding scheme based on a certain alphabet (mapping between a subset of ASCII chars /// and digits in `0..P`, where `P` is a power of 2). /// @@ -22,6 +70,7 @@ use crate::wrappers::{SkipWhitespace, Skipper}; /// ``` #[derive(Debug, Clone, Copy)] pub struct Encoding { + alphabet: Ascii<'static>, table: [u8; 128], bits_per_char: u8, } @@ -42,7 +91,7 @@ impl Encoding { /// - Panics if `alphabet` does not consist of distinct ASCII chars. /// - Panics if `alphabet` length is not 2, 4, 8, 16, 32 or 64. #[allow(clippy::cast_possible_truncation)] - pub const fn new(alphabet: &str) -> Self { + pub const fn new(alphabet: &'static str) -> Self { let bits_per_char = match alphabet.len() { 2 => 1, 4 => 2, @@ -58,14 +107,10 @@ impl Encoding { let mut table = [Self::NO_MAPPING; 128]; let alphabet_bytes = alphabet.as_bytes(); + let alphabet = Ascii::new(alphabet); // will panic if `alphabet` contains non-ASCII chars let mut index = 0; while index < alphabet_bytes.len() { let byte = alphabet_bytes[index]; - compile_assert!( - byte < 0x80, - "Alphabet '", alphabet => clip(64, ""), "' contains non-ASCII character at position ", - index => fmt::() - ); let byte_idx = byte as usize; compile_assert!( table[byte_idx] == Self::NO_MAPPING, @@ -76,18 +121,22 @@ impl Encoding { } Self { + alphabet, table, bits_per_char, } } - const fn lookup(&self, ascii_char: u8) -> u8 { + const fn lookup(&self, ascii_char: u8) -> Result { + if !ascii_char.is_ascii() { + return Err(DecodeError::invalid_char(ascii_char, Some(self.alphabet))); + } let mapping = self.table[ascii_char as usize]; - compile_assert!( - mapping != Self::NO_MAPPING, - "Character '", ascii_char as char => fmt::(), "' is not present in the alphabet" - ); - mapping + if mapping == Self::NO_MAPPING { + Err(DecodeError::invalid_char(ascii_char, Some(self.alphabet))) + } else { + Ok(mapping) + } } } @@ -96,15 +145,13 @@ impl Encoding { struct HexDecoderState(Option); impl HexDecoderState { - const fn byte_value(val: u8) -> u8 { - match val { + const fn byte_value(val: u8) -> Result { + Ok(match val { b'0'..=b'9' => val - b'0', b'A'..=b'F' => val - b'A' + 10, b'a'..=b'f' => val - b'a' + 10, - _ => compile_panic!( - "Invalid character '", val as char => fmt::(), "' in input; expected a hex digit" - ), - } + _ => return Err(DecodeError::invalid_char(val, None)), + }) } const fn new() -> Self { @@ -112,8 +159,8 @@ impl HexDecoderState { } #[allow(clippy::option_if_let_else)] // `Option::map_or_else` cannot be used in const fns - const fn update(mut self, byte: u8) -> (Self, Option) { - let byte = Self::byte_value(byte); + const fn update(mut self, byte: u8) -> Result<(Self, Option), DecodeError> { + let byte = const_try!(Self::byte_value(byte)); let output = if let Some(b) = self.0 { self.0 = None; Some((b << 4) + byte) @@ -121,7 +168,7 @@ impl HexDecoderState { self.0 = Some(byte); None }; - (self, output) + Ok((self, output)) } const fn is_final(self) -> bool { @@ -147,8 +194,8 @@ impl CustomDecoderState { } #[allow(clippy::comparison_chain)] // not feasible in const context - const fn update(mut self, byte: u8) -> (Self, Option) { - let byte = self.table.lookup(byte); + const fn update(mut self, byte: u8) -> Result<(Self, Option), DecodeError> { + let byte = const_try!(self.table.lookup(byte)); let output = if self.filled_bits < 8 - self.table.bits_per_char { self.partial_byte = (self.partial_byte << self.table.bits_per_char) + byte; self.filled_bits += self.table.bits_per_char; @@ -166,7 +213,7 @@ impl CustomDecoderState { self.filled_bits = new_filled_bits; Some(output) }; - (self, output) + Ok((self, output)) } const fn is_final(&self) -> bool { @@ -184,25 +231,25 @@ enum DecoderState { } impl DecoderState { - const fn update(self, byte: u8) -> (Self, Option) { - match self { + const fn update(self, byte: u8) -> Result<(Self, Option), DecodeError> { + Ok(match self { Self::Hex(state) => { - let (updated_state, output) = state.update(byte); + let (updated_state, output) = const_try!(state.update(byte)); (Self::Hex(updated_state), output) } Self::Base64(state) => { if byte == b'=' { (self, None) } else { - let (updated_state, output) = state.update(byte); + let (updated_state, output) = const_try!(state.update(byte)); (Self::Base64(updated_state), output) } } Self::Custom(state) => { - let (updated_state, output) = state.update(byte); + let (updated_state, output) = const_try!(state.update(byte)); (Self::Custom(updated_state), output) } - } + }) } const fn is_final(&self) -> bool { @@ -243,7 +290,7 @@ impl Decoder { /// # Panics /// /// Panics in the same situations as [`Encoding::new()`]. - pub const fn custom(alphabet: &str) -> Self { + pub const fn custom(alphabet: &'static str) -> Self { Self::Custom(Encoding::new(alphabet)) } @@ -290,7 +337,10 @@ impl Decoder { } } - let update = state.update(input[in_index]); + let update = match state.update(input[in_index]) { + Ok(update) => update, + Err(err) => err.panic(in_index), + }; state = update.0; if let Some(byte) = update.1 { if out_index < N { @@ -336,7 +386,10 @@ impl Decoder { } } - let update = state.update(input[in_index]); + let update = match state.update(input[in_index]) { + Ok(update) => update, + Err(err) => err.panic(in_index), + }; state = update.0; if update.1.is_some() { out_index += 1; diff --git a/src/tests.rs b/src/tests.rs index 9be2205..5bd000c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -97,7 +97,7 @@ fn base64_codec_in_runtime() { } #[test] -#[should_panic(expected = "Character '-' is not present in the alphabet")] +#[should_panic(expected = "Character '-' at position 3 is not a part of the decoder alphabet")] fn mixed_base64_alphabet_leads_to_panic() { Decoder::Base64.decode::<6>(b"Pj4-Pz8/"); } @@ -109,7 +109,7 @@ fn invalid_alphabet_length() { } #[test] -#[should_panic(expected = "Alphabet 'teß' contains non-ASCII character at position 2")] +#[should_panic(expected = "String 'teß' contains non-ASCII chars; first at position 2")] fn non_ascii_char_in_alphabet() { Decoder::custom("teß"); } @@ -120,6 +120,18 @@ fn duplicate_char_in_alphabet() { Decoder::custom("alphabet"); } +#[test] +#[should_panic(expected = "Non-ASCII character with decimal code 223")] +fn non_ascii_input() { + Decoder::Base64.decode::<6>(b"P\xDF+Pz8/"); +} + +#[test] +#[should_panic(expected = "Character 'u' at position 7 is not a hex digit")] +fn invalid_char_in_hex_input() { + Decoder::Hex.decode::<4>(b"c0ffeecu"); +} + #[test] #[should_panic(expected = "input decodes to 6 bytes, while type inference implies 3.")] fn input_length_overflow() {