Skip to content

Commit

Permalink
Audit use of unsafe in uri/authority.rs (hyperium#414)
Browse files Browse the repository at this point in the history
* Add unit test for rejecting invalid UTF-8

* Add Authority::from_static() test

* Refactor uri::Authority

Extract the common code from three ways of creating an Authority into a
private create_authority() function.

* Add comments to explain the safety of Authority

The comments describe the preconditions and postconditions that together
ensure that the one use of 'unsafe' in uri/authority.rs is sound.

* Fix typo
  • Loading branch information
sbosnick authored and Benxiang Ge committed Jul 26, 2021
1 parent c6e4ebe commit 4cf44a7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 31 deletions.
88 changes: 57 additions & 31 deletions src/uri/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,9 @@ impl Authority {

// Not public while `bytes` is unstable.
pub(super) fn from_shared(s: Bytes) -> Result<Self, InvalidUri> {
let authority_end = Authority::parse_non_empty(&s[..])?;

if authority_end != s.len() {
return Err(ErrorKind::InvalidUriChar.into());
}

Ok(Authority {
data: unsafe { ByteStr::from_utf8_unchecked(s) },
})
// Precondition on create_authority: trivially satisfied by the
// identity clousre
create_authority(s, |s| s)
}

/// Attempt to convert an `Authority` from a static string.
Expand All @@ -52,18 +46,8 @@ impl Authority {
/// assert_eq!(authority.host(), "example.com");
/// ```
pub fn from_static(src: &'static str) -> Self {
let s = src.as_bytes();
let b = Bytes::from_static(s);
let authority_end =
Authority::parse_non_empty(&b[..]).expect("static str is not valid authority");

if authority_end != b.len() {
panic!("static str is not valid authority");
}

Authority {
data: unsafe { ByteStr::from_utf8_unchecked(b) },
}
Authority::from_shared(Bytes::from_static(src.as_bytes()))
.expect("static str is not valid authority")
}


Expand All @@ -83,6 +67,8 @@ impl Authority {
}

// Note: this may return an *empty* Authority. You might want `parse_non_empty`.
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
// ret is the return value.
pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
let mut colon_cnt = 0;
let mut start_bracket = false;
Expand All @@ -91,6 +77,10 @@ impl Authority {
let mut end = s.len();
let mut at_sign_pos = None;

// Among other things, this loop checks that every byte in s up to the
// first '/', '?', or '#' is a valid URI character (or in some contexts,
// a '%'). This means that each such byte is a valid single-byte UTF-8
// code point.
for (i, &b) in s.iter().enumerate() {
match URI_CHARS[b as usize] {
b'/' | b'?' | b'#' => {
Expand Down Expand Up @@ -168,6 +158,9 @@ impl Authority {
//
// This should be used by functions that allow a user to parse
// an `Authority` by itself.
//
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
// ret is the return value.
fn parse_non_empty(s: &[u8]) -> Result<usize, InvalidUri> {
if s.is_empty() {
return Err(ErrorKind::Empty.into());
Expand Down Expand Up @@ -432,17 +425,10 @@ impl<'a> TryFrom<&'a [u8]> for Authority {
#[inline]
fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
// parse first, and only turn into Bytes if valid
let end = Authority::parse_non_empty(s)?;

if end != s.len() {
return Err(ErrorKind::InvalidAuthority.into());
}

Ok(Authority {
data: unsafe {
ByteStr::from_utf8_unchecked(Bytes::copy_from_slice(s))
},
})
// Preconditon on create_authority: copy_from_slice() copies all of
// bytes from the [u8] parameter into a new Bytes
create_authority(s, |s| Bytes::copy_from_slice(s))
}
}

Expand Down Expand Up @@ -494,6 +480,30 @@ fn host(auth: &str) -> &str {
}
}

// Precondition: f converts all of the bytes in the passed in B into the
// returned Bytes.
fn create_authority<B, F>(b: B, f: F) -> Result<Authority, InvalidUri>
where
B: AsRef<[u8]>,
F: FnOnce(B) -> Bytes,
{
let s = b.as_ref();
let authority_end = Authority::parse_non_empty(s)?;

if authority_end != s.len() {
return Err(ErrorKind::InvalidUriChar.into());
}

let bytes = f(b);

Ok(Authority {
// Safety: the postcondition on parse_non_empty() and the check against
// s.len() ensure that b is valid UTF-8. The precondition on f ensures
// that this is carried through to bytes.
data: unsafe { ByteStr::from_utf8_unchecked(bytes) },
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -529,6 +539,12 @@ mod tests {
assert_eq!("EXAMPLE.com", authority);
}

#[test]
fn from_static_equates_with_a_str() {
let authority = Authority::from_static("example.com");
assert_eq!(authority, "example.com");
}

#[test]
fn not_equal_with_a_str_of_a_different_authority() {
let authority: Authority = "example.com".parse().unwrap();
Expand Down Expand Up @@ -616,4 +632,14 @@ mod tests {
let err = Authority::parse_non_empty(b"[fe80::1:2:3:4]%20").unwrap_err();
assert_eq!(err.0, ErrorKind::InvalidAuthority);
}

#[test]
fn rejects_invalid_utf8() {
let err = Authority::try_from([0xc0u8].as_ref()).unwrap_err();
assert_eq!(err.0, ErrorKind::InvalidUriChar);

let err = Authority::from_shared(Bytes::from_static([0xc0u8].as_ref()))
.unwrap_err();
assert_eq!(err.0, ErrorKind::InvalidUriChar);
}
}
6 changes: 6 additions & 0 deletions src/uri/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ enum ErrorKind {
// u16::MAX is reserved for None
const MAX_LEN: usize = (u16::MAX - 1) as usize;

// URI_CHARS is a table of valid characters in a URI. An entry in the table is
// 0 for invalid characters. For valid characters the entry is itself (i.e.
// the entry for 33 is b'!' because b'!' == 33u8). An important characteristic
// of this table is that all entries above 127 are invalid. This makes all of the
// valid entries a valid single-byte UTF-8 code point. This means that a slice
// of such valid entries is valid UTF-8.
const URI_CHARS: [u8; 256] = [
// 0 1 2 3 4 5 6 7 8 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
Expand Down

0 comments on commit 4cf44a7

Please sign in to comment.