Skip to content

Commit

Permalink
Improve panic messages for decoding errors
Browse files Browse the repository at this point in the history
  • Loading branch information
slowli committed Sep 24, 2023
1 parent ade3e8a commit 8b30315
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 36 deletions.
121 changes: 87 additions & 34 deletions src/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,57 @@
//! `Decoder` and closely related types.
use compile_fmt::{clip, compile_assert, compile_panic, fmt};
use compile_fmt::{clip_ascii, compile_assert, compile_panic, fmt, Ascii};

use crate::wrappers::{SkipWhitespace, Skipper};

// Since `?` is not allowed in `const fn`s, we use its simplified version.
macro_rules! const_try {
($result:expr) => {
match $result {
Ok(value) => value,
Err(err) => return Err(err),
}
};
}

#[derive(Debug)]
struct DecodeError {
invalid_char: u8,
// `None` for hex encoding
alphabet: Option<Ascii<'static>>,
}

impl DecodeError {
const fn invalid_char(invalid_char: u8, alphabet: Option<Ascii<'static>>) -> Self {
Self {
invalid_char,
alphabet,
}
}

const fn panic(self, input_pos: usize) -> ! {
if self.invalid_char.is_ascii() {
if let Some(alphabet) = self.alphabet {
compile_panic!(
"Character '", self.invalid_char as char => fmt::<char>(), "' at position ",
input_pos => fmt::<usize>(), " is not a part of \
the decoder alphabet '", alphabet => clip_ascii(64, ""), "'"
);
} else {
compile_panic!(
"Character '", self.invalid_char as char => fmt::<char>(), "' at position ",
input_pos => fmt::<usize>(), " is not a hex digit"
);
}
} else {
compile_panic!(
"Non-ASCII character with decimal code ", self.invalid_char => fmt::<u8>(),
" encountered at position ", input_pos => fmt::<usize>()
);
}
}
}

/// Custom encoding scheme based on a certain alphabet (mapping between a subset of ASCII chars
/// and digits in `0..P`, where `P` is a power of 2).
///
Expand All @@ -22,6 +70,7 @@ use crate::wrappers::{SkipWhitespace, Skipper};
/// ```
#[derive(Debug, Clone, Copy)]
pub struct Encoding {
alphabet: Ascii<'static>,
table: [u8; 128],
bits_per_char: u8,
}
Expand All @@ -42,7 +91,7 @@ impl Encoding {
/// - Panics if `alphabet` does not consist of distinct ASCII chars.
/// - Panics if `alphabet` length is not 2, 4, 8, 16, 32 or 64.
#[allow(clippy::cast_possible_truncation)]
pub const fn new(alphabet: &str) -> Self {
pub const fn new(alphabet: &'static str) -> Self {
let bits_per_char = match alphabet.len() {
2 => 1,
4 => 2,
Expand All @@ -58,14 +107,10 @@ impl Encoding {

let mut table = [Self::NO_MAPPING; 128];
let alphabet_bytes = alphabet.as_bytes();
let alphabet = Ascii::new(alphabet); // will panic if `alphabet` contains non-ASCII chars
let mut index = 0;
while index < alphabet_bytes.len() {
let byte = alphabet_bytes[index];
compile_assert!(
byte < 0x80,
"Alphabet '", alphabet => clip(64, ""), "' contains non-ASCII character at position ",
index => fmt::<usize>()
);
let byte_idx = byte as usize;
compile_assert!(
table[byte_idx] == Self::NO_MAPPING,
Expand All @@ -76,18 +121,22 @@ impl Encoding {
}

Self {
alphabet,
table,
bits_per_char,
}
}

const fn lookup(&self, ascii_char: u8) -> u8 {
const fn lookup(&self, ascii_char: u8) -> Result<u8, DecodeError> {
if !ascii_char.is_ascii() {
return Err(DecodeError::invalid_char(ascii_char, Some(self.alphabet)));
}
let mapping = self.table[ascii_char as usize];
compile_assert!(
mapping != Self::NO_MAPPING,
"Character '", ascii_char as char => fmt::<char>(), "' is not present in the alphabet"
);
mapping
if mapping == Self::NO_MAPPING {
Err(DecodeError::invalid_char(ascii_char, Some(self.alphabet)))
} else {
Ok(mapping)
}
}
}

Expand All @@ -96,32 +145,30 @@ impl Encoding {
struct HexDecoderState(Option<u8>);

impl HexDecoderState {
const fn byte_value(val: u8) -> u8 {
match val {
const fn byte_value(val: u8) -> Result<u8, DecodeError> {
Ok(match val {
b'0'..=b'9' => val - b'0',
b'A'..=b'F' => val - b'A' + 10,
b'a'..=b'f' => val - b'a' + 10,
_ => compile_panic!(
"Invalid character '", val as char => fmt::<char>(), "' in input; expected a hex digit"
),
}
_ => return Err(DecodeError::invalid_char(val, None)),
})
}

const fn new() -> Self {
Self(None)
}

#[allow(clippy::option_if_let_else)] // `Option::map_or_else` cannot be used in const fns
const fn update(mut self, byte: u8) -> (Self, Option<u8>) {
let byte = Self::byte_value(byte);
const fn update(mut self, byte: u8) -> Result<(Self, Option<u8>), DecodeError> {
let byte = const_try!(Self::byte_value(byte));
let output = if let Some(b) = self.0 {
self.0 = None;
Some((b << 4) + byte)
} else {
self.0 = Some(byte);
None
};
(self, output)
Ok((self, output))
}

const fn is_final(self) -> bool {
Expand All @@ -147,8 +194,8 @@ impl CustomDecoderState {
}

#[allow(clippy::comparison_chain)] // not feasible in const context
const fn update(mut self, byte: u8) -> (Self, Option<u8>) {
let byte = self.table.lookup(byte);
const fn update(mut self, byte: u8) -> Result<(Self, Option<u8>), DecodeError> {
let byte = const_try!(self.table.lookup(byte));
let output = if self.filled_bits < 8 - self.table.bits_per_char {
self.partial_byte = (self.partial_byte << self.table.bits_per_char) + byte;
self.filled_bits += self.table.bits_per_char;
Expand All @@ -166,7 +213,7 @@ impl CustomDecoderState {
self.filled_bits = new_filled_bits;
Some(output)
};
(self, output)
Ok((self, output))
}

const fn is_final(&self) -> bool {
Expand All @@ -184,25 +231,25 @@ enum DecoderState {
}

impl DecoderState {
const fn update(self, byte: u8) -> (Self, Option<u8>) {
match self {
const fn update(self, byte: u8) -> Result<(Self, Option<u8>), DecodeError> {
Ok(match self {
Self::Hex(state) => {
let (updated_state, output) = state.update(byte);
let (updated_state, output) = const_try!(state.update(byte));
(Self::Hex(updated_state), output)
}
Self::Base64(state) => {
if byte == b'=' {
(self, None)
} else {
let (updated_state, output) = state.update(byte);
let (updated_state, output) = const_try!(state.update(byte));
(Self::Base64(updated_state), output)
}
}
Self::Custom(state) => {
let (updated_state, output) = state.update(byte);
let (updated_state, output) = const_try!(state.update(byte));
(Self::Custom(updated_state), output)
}
}
})
}

const fn is_final(&self) -> bool {
Expand Down Expand Up @@ -243,7 +290,7 @@ impl Decoder {
/// # Panics
///
/// Panics in the same situations as [`Encoding::new()`].
pub const fn custom(alphabet: &str) -> Self {
pub const fn custom(alphabet: &'static str) -> Self {
Self::Custom(Encoding::new(alphabet))
}

Expand Down Expand Up @@ -290,7 +337,10 @@ impl Decoder {
}
}

let update = state.update(input[in_index]);
let update = match state.update(input[in_index]) {
Ok(update) => update,
Err(err) => err.panic(in_index),
};
state = update.0;
if let Some(byte) = update.1 {
if out_index < N {
Expand Down Expand Up @@ -336,7 +386,10 @@ impl Decoder {
}
}

let update = state.update(input[in_index]);
let update = match state.update(input[in_index]) {
Ok(update) => update,
Err(err) => err.panic(in_index),
};
state = update.0;
if update.1.is_some() {
out_index += 1;
Expand Down
16 changes: 14 additions & 2 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ fn base64_codec_in_runtime() {
}

#[test]
#[should_panic(expected = "Character '-' is not present in the alphabet")]
#[should_panic(expected = "Character '-' at position 3 is not a part of the decoder alphabet")]
fn mixed_base64_alphabet_leads_to_panic() {
Decoder::Base64.decode::<6>(b"Pj4-Pz8/");
}
Expand All @@ -109,7 +109,7 @@ fn invalid_alphabet_length() {
}

#[test]
#[should_panic(expected = "Alphabet 'teß' contains non-ASCII character at position 2")]
#[should_panic(expected = "String 'teß' contains non-ASCII chars; first at position 2")]
fn non_ascii_char_in_alphabet() {
Decoder::custom("teß");
}
Expand All @@ -120,6 +120,18 @@ fn duplicate_char_in_alphabet() {
Decoder::custom("alphabet");
}

#[test]
#[should_panic(expected = "Non-ASCII character with decimal code 223")]
fn non_ascii_input() {
Decoder::Base64.decode::<6>(b"P\xDF+Pz8/");
}

#[test]
#[should_panic(expected = "Character 'u' at position 7 is not a hex digit")]
fn invalid_char_in_hex_input() {
Decoder::Hex.decode::<4>(b"c0ffeecu");
}

#[test]
#[should_panic(expected = "input decodes to 6 bytes, while type inference implies 3.")]
fn input_length_overflow() {
Expand Down

0 comments on commit 8b30315

Please sign in to comment.