From fcf134bd240aed7de20b0bb3e307d13df6f3b9ad Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Thu, 21 Apr 2022 15:26:58 +0000 Subject: [PATCH] chacha20poly1305: Switch to one-pass encryption and decryption --- Cargo.lock | 16 ++ chacha20poly1305/Cargo.toml | 1 + chacha20poly1305/src/cipher.rs | 324 +++++++++++++++++++++++++++++++-- chacha20poly1305/src/lib.rs | 22 +-- 4 files changed, 332 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9a8a653..be004c16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,6 +178,7 @@ dependencies = [ "aead", "chacha20", "cipher 0.4.3", + "gcd", "poly1305", "zeroize", ] @@ -319,6 +320,15 @@ dependencies = [ "void", ] +[[package]] +name = "gcd" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f37978dab2ca789938a83b2f8bc1ef32db6633af9051a6cd409eff72cbaaa79a" +dependencies = [ + "paste", +] + [[package]] name = "generic-array" version = "0.14.6" @@ -488,6 +498,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + [[package]] name = "pmac" version = "0.7.1" diff --git a/chacha20poly1305/Cargo.toml b/chacha20poly1305/Cargo.toml index f2adaea7..a7674775 100644 --- a/chacha20poly1305/Cargo.toml +++ b/chacha20poly1305/Cargo.toml @@ -21,6 +21,7 @@ categories = ["cryptography", "no-std"] aead = { version = "0.5", default-features = false } chacha20 = { version = "0.9", features = ["zeroize"] } cipher = "0.4" +gcd = "2" poly1305 = "0.8" zeroize = { version = "1.5", default-features = false } diff --git a/chacha20poly1305/src/cipher.rs b/chacha20poly1305/src/cipher.rs index 05244284..eeb38fb6 100644 --- a/chacha20poly1305/src/cipher.rs +++ b/chacha20poly1305/src/cipher.rs @@ -1,10 +1,16 @@ //! Core AEAD cipher implementation for (X)ChaCha20Poly1305. -use ::cipher::{StreamCipher, StreamCipherSeek}; +use core::marker::PhantomData; + +use ::cipher::{StreamBackend, StreamCipherCore, StreamCipherSeekCore, StreamClosure, Unsigned}; use aead::generic_array::GenericArray; use aead::Error; +use cipher::inout::InOutBuf; use poly1305::{ - universal_hash::{KeyInit, UniversalHash}, + universal_hash::{ + crypto_common::{BlockSizeUser as UhfBlockSizeUser, KeySizeUser}, + KeyInit, UhfBackend, UhfClosure, UniversalHash, + }, Poly1305, }; use zeroize::Zeroize; @@ -21,7 +27,7 @@ const MAX_BLOCKS: usize = core::u32::MAX as usize; /// ChaCha20Poly1305 instantiated with a particular nonce pub(crate) struct Cipher where - C: StreamCipher + StreamCipherSeek, + C: StreamCipherCore + StreamCipherSeekCore, { cipher: C, mac: Poly1305, @@ -29,19 +35,20 @@ where impl Cipher where - C: StreamCipher + StreamCipherSeek, + C: StreamCipherCore + StreamCipherSeekCore, { /// Instantiate the underlying cipher with a particular nonce pub(crate) fn new(mut cipher: C) -> Self { // Derive Poly1305 key from the first 32-bytes of the ChaCha20 keystream - let mut mac_key = poly1305::Key::default(); - cipher.apply_keystream(&mut *mac_key); + let mut mac_key = [GenericArray::default()]; + cipher.apply_keystream_blocks(&mut mac_key); - let mac = Poly1305::new(GenericArray::from_slice(&*mac_key)); - mac_key.zeroize(); + let mac = Poly1305::new(GenericArray::from_slice( + &mac_key[0][..::KeySize::USIZE], + )); + mac_key[0].zeroize(); - // Set ChaCha20 counter to 1 - cipher.seek(BLOCK_SIZE as u64); + // We've consumed an entire ChaCha20 block, so its counter is now 1. Self { cipher, mac } } @@ -58,10 +65,11 @@ where self.mac.update_padded(associated_data); - // TODO(tarcieri): interleave encryption with Poly1305 - // See: - self.cipher.apply_keystream(buffer); - self.mac.update_padded(buffer); + self.cipher.process_with_backend(PaddedEncryptor { + buffer, + cipher: PhantomData::::default(), + mac: &mut self.mac, + }); self.authenticate_lengths(associated_data, buffer)?; Ok(self.mac.finalize()) @@ -80,16 +88,28 @@ where } self.mac.update_padded(associated_data); - self.mac.update_padded(buffer); - self.authenticate_lengths(associated_data, buffer)?; + + self.cipher.process_with_backend(PaddedDecryptor { + buffer, + cipher: PhantomData::::default(), + mac: &mut self.mac, + }); // This performs a constant-time comparison using the `subtle` crate - if self.mac.verify(tag).is_ok() { - // TODO(tarcieri): interleave decryption with Poly1305 - // See: - self.cipher.apply_keystream(buffer); + if self.authenticate_lengths(associated_data, buffer).is_ok() + && self.mac.verify(tag).is_ok() + { Ok(()) } else { + // On MAC verify failure, re-encrypt the plaintext buffer to prevent + // accidental exposure. + let pos_1 = match 1.try_into() { + Ok(counter) => counter, + // Counter trait has no Debug bound, so we can't use Result::unwrap. + Err(_) => panic!(), + }; + self.cipher.set_block_pos(pos_1); + self.cipher.apply_keystream_partial(buffer.into()); Err(Error) } } @@ -107,3 +127,267 @@ where Ok(()) } } + +/// Returns the least common multiple of `a` and `b`. +const fn lcm(a: usize, b: usize) -> usize { + a * (b / gcd::binary_usize(a, b)) +} + +struct PaddedEncryptor<'a, C: StreamCipherCore> { + buffer: &'a mut [u8], + cipher: PhantomData, + mac: &'a mut Poly1305, +} + +impl<'a, C: StreamCipherCore> ::cipher::BlockSizeUser for PaddedEncryptor<'a, C> { + type BlockSize = C::BlockSize; +} + +impl<'a, C: StreamCipherCore> StreamClosure for PaddedEncryptor<'a, C> { + #[inline(always)] + #[allow(clippy::needless_range_loop)] + fn call>(self, cipher_backend: &mut B) { + // This simulates a nested closure. + self.mac.update_with_backend(PaddedEncryptorInner { + buffer: self.buffer, + cipher_backend, + }) + } +} + +struct PaddedEncryptorInner<'a, CB: StreamBackend> { + buffer: &'a mut [u8], + cipher_backend: &'a mut CB, +} + +impl<'a, CB: StreamBackend> UhfBlockSizeUser for PaddedEncryptorInner<'a, CB> { + type BlockSize = ::BlockSize; +} + +impl<'a, CB: StreamBackend> UhfClosure for PaddedEncryptorInner<'a, CB> { + #[inline(always)] + fn call>(self, mac_backend: &mut MB) { + // Calculate the least common multiple of the number of bytes which can be + // processed by each backend in parallel. + let lcm_block_size = lcm( + CB::BlockSize::USIZE * CB::ParBlocksSize::USIZE, + MB::BlockSize::USIZE * MB::ParBlocksSize::USIZE, + ); + + let mut iter = self.buffer.chunks_exact_mut(lcm_block_size); + for lcm_segment in &mut iter { + // Cast the segment into `&mut [[Block; CB::ParBlocksSize]]`. + let cipher_chunks = { + let (lcm_blocks, _) = InOutBuf::from(&mut *lcm_segment).into_chunks(); + lcm_blocks.into_chunks().0 + }; + + // Encrypt the blocks. + let mut tmp = Default::default(); + for mut chunk in cipher_chunks { + self.cipher_backend.gen_par_ks_blocks(&mut tmp); + chunk.xor_in2out(&tmp); + } + + // Cast the segment into `&mut [[Block; MB::ParBlocksSize]]`. + let mac_blocks = { + let (lcm_blocks, _) = InOutBuf::from(lcm_segment).into_chunks(); + lcm_blocks.into_chunks().0 + }; + + // Update the MAC with the encrypted blocks. + for par_blocks in mac_blocks { + mac_backend.proc_par_blocks(par_blocks.get_in()); + } + } + + // The remaining tail bytes can't be nicely interleaved, so we process them with + // the cipher and MAC separately. + let tail = iter.into_remainder(); + + let (cipher_blocks, mut cipher_tail) = InOutBuf::from(&mut *tail).into_chunks(); + let cipher_blocks = if CB::ParBlocksSize::USIZE > 1 { + let (chunks, tail) = cipher_blocks.into_chunks(); + + // Encrypt the remaining blocks that can be parallelized by the cipher. + for mut chunk in chunks { + let mut tmp = Default::default(); + self.cipher_backend.gen_par_ks_blocks(&mut tmp); + chunk.xor_in2out(&tmp); + } + + tail + } else { + cipher_blocks + }; + + // Encrypt any remaining complete blocks. + let n = cipher_blocks.len(); + let mut tmp = GenericArray::, CB::ParBlocksSize>::default(); + let ks = &mut tmp[..n]; + self.cipher_backend.gen_tail_blocks(ks); + for (mut block, data) in cipher_blocks.into_iter().zip(ks) { + block.xor_in2out(&data); + } + + // Encrypt any remaining bytes, which are smaller than a cipher block. + if !cipher_tail.is_empty() { + let mut t = Default::default(); + self.cipher_backend.gen_ks_block(&mut t); + cipher_tail.xor_in2out(&t[..cipher_tail.len()]); + } + + let (mac_blocks, mac_tail) = InOutBuf::from(tail).into_chunks(); + let mac_blocks = if MB::ParBlocksSize::USIZE > 1 { + let (par_blocks, tail) = mac_blocks.into_chunks(); + for par_block in par_blocks { + mac_backend.proc_par_blocks(par_block.get_in()); + } + tail + } else { + mac_blocks + }; + for block in mac_blocks { + mac_backend.proc_block(block.get_in()); + } + + // Pad any remaining bytes with zeroes to create the final MAC block. + if !mac_tail.is_empty() { + let mut padded_block = GenericArray::default(); + padded_block[..mac_tail.len()].copy_from_slice(mac_tail.get_in()); + mac_backend.proc_block(&padded_block); + } + } +} + +struct PaddedDecryptor<'a, C: StreamCipherCore> { + buffer: &'a mut [u8], + cipher: PhantomData, + mac: &'a mut Poly1305, +} + +impl<'a, C: StreamCipherCore> ::cipher::BlockSizeUser for PaddedDecryptor<'a, C> { + type BlockSize = C::BlockSize; +} + +impl<'a, C: StreamCipherCore> StreamClosure for PaddedDecryptor<'a, C> { + #[inline(always)] + #[allow(clippy::needless_range_loop)] + fn call>(self, cipher_backend: &mut B) { + // This simulates a nested closure. + self.mac.update_with_backend(PaddedDecryptorInner { + buffer: self.buffer, + cipher_backend, + }) + } +} + +struct PaddedDecryptorInner<'a, CB: StreamBackend> { + buffer: &'a mut [u8], + cipher_backend: &'a mut CB, +} + +impl<'a, CB: StreamBackend> UhfBlockSizeUser for PaddedDecryptorInner<'a, CB> { + type BlockSize = ::BlockSize; +} + +impl<'a, CB: StreamBackend> UhfClosure for PaddedDecryptorInner<'a, CB> { + #[inline(always)] + fn call>(self, mac_backend: &mut MB) { + // Calculate the least common multiple of the number of bytes which can be + // processed by each backend in parallel. + let lcm_block_size = lcm( + CB::BlockSize::USIZE * CB::ParBlocksSize::USIZE, + MB::BlockSize::USIZE * MB::ParBlocksSize::USIZE, + ); + + let mut iter = self.buffer.chunks_exact_mut(lcm_block_size); + for lcm_segment in &mut iter { + // Cast the segment into `&mut [[Block; MB::ParBlocksSize]]`. + let mac_blocks = { + let (lcm_blocks, _) = InOutBuf::from(&mut *lcm_segment).into_chunks(); + lcm_blocks.into_chunks().0 + }; + + // Update the MAC with the encrypted blocks. + for par_blocks in mac_blocks { + mac_backend.proc_par_blocks(par_blocks.get_in()); + } + + // Cast the segment into `&mut [[Block; CB::ParBlocksSize]]`. + let cipher_chunks = { + let (lcm_blocks, _) = InOutBuf::from(lcm_segment).into_chunks(); + lcm_blocks.into_chunks().0 + }; + + // Decrypt the blocks. + let mut tmp = Default::default(); + for mut chunk in cipher_chunks { + self.cipher_backend.gen_par_ks_blocks(&mut tmp); + chunk.xor_in2out(&tmp); + } + } + + // The remaining tail bytes can't be nicely interleaved, so we process them with + // the cipher and MAC separately. + let tail = iter.into_remainder(); + + let (mac_blocks, mac_tail) = InOutBuf::from(&mut *tail).into_chunks(); + if MB::ParBlocksSize::USIZE > 1 { + let (par_blocks, tail) = mac_blocks.into_chunks(); + for par_block in par_blocks { + mac_backend.proc_par_blocks(par_block.get_in()); + } + for block in tail { + mac_backend.proc_block(block.get_in()); + } + } else { + for block in mac_blocks { + mac_backend.proc_block(block.get_in()); + } + } + + // Pad any remaining bytes with zeroes to create the final MAC block. + if !mac_tail.is_empty() { + let mut padded_block = GenericArray::default(); + padded_block[..mac_tail.len()].copy_from_slice(mac_tail.get_in()); + mac_backend.proc_block(&padded_block); + } + + let (cipher_blocks, mut cipher_tail) = InOutBuf::from(tail).into_chunks(); + if CB::ParBlocksSize::USIZE > 1 { + let (chunks, mut tail) = cipher_blocks.into_chunks(); + + // Decrypt the remaining blocks that can be parallelized by the cipher. + for mut chunk in chunks { + let mut tmp = Default::default(); + self.cipher_backend.gen_par_ks_blocks(&mut tmp); + chunk.xor_in2out(&tmp); + } + + // Decrypt any remaining complete blocks. + let n = tail.len(); + let mut tmp = + GenericArray::, CB::ParBlocksSize>::default(); + let ks = &mut tmp[..n]; + self.cipher_backend.gen_tail_blocks(ks); + for i in 0..n { + tail.get(i).xor_in2out(&ks[i]); + } + } else { + // Decrypt any remaining complete blocks. + for mut block in cipher_blocks { + let mut t = Default::default(); + self.cipher_backend.gen_ks_block(&mut t); + block.xor_in2out(&t); + } + } + + // Decrypt any remaining bytes, which are smaller than a cipher block. + if !cipher_tail.is_empty() { + let mut t = Default::default(); + self.cipher_backend.gen_ks_block(&mut t); + cipher_tail.xor_in2out(&t[..cipher_tail.len()]); + } + } +} diff --git a/chacha20poly1305/src/lib.rs b/chacha20poly1305/src/lib.rs index 49e10f03..529463c8 100644 --- a/chacha20poly1305/src/lib.rs +++ b/chacha20poly1305/src/lib.rs @@ -142,18 +142,18 @@ mod cipher; pub use aead::{self, consts, AeadCore, AeadInPlace, Error, KeyInit, KeySizeUser}; use self::cipher::Cipher; -use ::cipher::{KeyIvInit, StreamCipher, StreamCipherSeek}; +use ::cipher::{KeyIvInit, StreamCipherCore, StreamCipherSeekCore}; use aead::{ - consts::{U0, U12, U16, U24, U32}, + consts::{U0, U10, U12, U16, U24, U32}, generic_array::{ArrayLength, GenericArray}, }; use core::marker::PhantomData; use zeroize::{Zeroize, ZeroizeOnDrop}; -use chacha20::{ChaCha20, XChaCha20}; +use chacha20::{ChaChaCore, XChaChaCore}; #[cfg(feature = "reduced-round")] -use chacha20::{ChaCha12, ChaCha8, XChaCha12, XChaCha8}; +use aead::consts::{U4, U6}; /// Key type (256-bits/32-bytes). /// @@ -179,30 +179,30 @@ pub type XNonce = GenericArray; pub type Tag = GenericArray; /// ChaCha20Poly1305 Authenticated Encryption with Additional Data (AEAD). -pub type ChaCha20Poly1305 = ChaChaPoly1305; +pub type ChaCha20Poly1305 = ChaChaPoly1305, U12>; /// XChaCha20Poly1305 Authenticated Encryption with Additional Data (AEAD). -pub type XChaCha20Poly1305 = ChaChaPoly1305; +pub type XChaCha20Poly1305 = ChaChaPoly1305, U24>; /// ChaCha8Poly1305 (reduced round variant) Authenticated Encryption with Additional Data (AEAD). #[cfg(feature = "reduced-round")] #[cfg_attr(docsrs, doc(cfg(feature = "reduced-round")))] -pub type ChaCha8Poly1305 = ChaChaPoly1305; +pub type ChaCha8Poly1305 = ChaChaPoly1305, U12>; /// ChaCha12Poly1305 (reduced round variant) Authenticated Encryption with Additional Data (AEAD). #[cfg(feature = "reduced-round")] #[cfg_attr(docsrs, doc(cfg(feature = "reduced-round")))] -pub type ChaCha12Poly1305 = ChaChaPoly1305; +pub type ChaCha12Poly1305 = ChaChaPoly1305, U12>; /// XChaCha8Poly1305 (reduced round variant) Authenticated Encryption with Additional Data (AEAD). #[cfg(feature = "reduced-round")] #[cfg_attr(docsrs, doc(cfg(feature = "reduced-round")))] -pub type XChaCha8Poly1305 = ChaChaPoly1305; +pub type XChaCha8Poly1305 = ChaChaPoly1305, U24>; /// XChaCha12Poly1305 (reduced round variant) Authenticated Encryption with Additional Data (AEAD). #[cfg(feature = "reduced-round")] #[cfg_attr(docsrs, doc(cfg(feature = "reduced-round")))] -pub type XChaCha12Poly1305 = ChaChaPoly1305; +pub type XChaCha12Poly1305 = ChaChaPoly1305, U24>; /// Generic ChaCha+Poly1305 Authenticated Encryption with Additional Data (AEAD) construction. /// @@ -250,7 +250,7 @@ where impl AeadInPlace for ChaChaPoly1305 where - C: KeyIvInit + StreamCipher + StreamCipherSeek, + C: KeyIvInit + StreamCipherCore + StreamCipherSeekCore, N: ArrayLength, { fn encrypt_in_place_detached(