From 86341e0a90046b952f1167bbb7a67f402ed60e8a Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Thu, 14 Aug 2025 18:09:29 -0500 Subject: [PATCH 01/64] added newtype that should be able to simulate a 64-bit counter; needs fixing --- chacha20/src/rng.rs | 498 +++++++++++++++++++++++++++++--------------- 1 file changed, 333 insertions(+), 165 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index bcb94c0c..71822ff3 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -28,6 +28,9 @@ use cfg_if::cfg_if; /// Number of 32-bit words per ChaCha block (fixed by algorithm definition). const BLOCK_WORDS: u8 = 16; +/// Number of blocks generated at once. Changing this does not change the +/// current functionality. +const MAX_PAR_BLOCKS: u32 = 4; /// The seed for ChaCha20. Implements ZeroizeOnDrop when the /// zeroize feature is enabled. @@ -177,6 +180,27 @@ impl From<[u8; 4]> for BlockPos { } } +/// A wrapper for the 64-bit `block_pos`. +/// +/// Can be constructed from any of the following: +/// * `u64` +/// * `[u32; 2]` +pub struct LegacyBlockPos(u64); + +impl From for LegacyBlockPos { + #[inline] + fn from(value: u64) -> Self { + Self(value.to_le()) + } +} + +impl From<[u32; 2]> for LegacyBlockPos { + #[inline] + fn from(value: [u32; 2]) -> Self { + Self((value[1].to_le() as u64) << 32 | value[0] as u64) + } +} + /// The results buffer that zeroizes on drop when the `zeroize` feature is enabled. #[derive(Clone)] pub struct BlockRngResults([u32; BUFFER_SIZE]); @@ -211,191 +235,85 @@ const BUFFER_SIZE: usize = 64; // NB. this must remain consistent with some currently hard-coded numbers in this module const BUF_BLOCKS: u8 = BUFFER_SIZE as u8 >> 4; -impl ChaChaCore { - /// Generates 4 blocks in parallel with avx2 & neon, but merely fills - /// 4 blocks with sse2 & soft - #[cfg(feature = "rand_core")] - fn generate(&mut self, buffer: &mut [u32; 64]) { +macro_rules! generate_core { + ($self:expr, $buffer:expr) => { cfg_if! { if #[cfg(chacha20_force_soft)] { - backends::soft::Backend(self).gen_ks_blocks(buffer); + backends::soft::Backend($self).gen_ks_blocks($buffer); } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if! { if #[cfg(chacha20_force_avx2)] { unsafe { - backends::avx2::rng_inner::(self, buffer); + backends::avx2::rng_inner::($self, $buffer); } } else if #[cfg(chacha20_force_sse2)] { unsafe { - backends::sse2::rng_inner::(self, buffer); + backends::sse2::rng_inner::($self, $buffer); } } else { - let (avx2_token, sse2_token) = self.tokens; + let (avx2_token, sse2_token) = $self.tokens; if avx2_token.get() { unsafe { - backends::avx2::rng_inner::(self, buffer); + backends::avx2::rng_inner::($self, $buffer); } } else if sse2_token.get() { unsafe { - backends::sse2::rng_inner::(self, buffer); + backends::sse2::rng_inner::($self, $buffer); } } else { - backends::soft::Backend(self).gen_ks_blocks(buffer); + backends::soft::Backend($self).gen_ks_blocks($buffer); } } } } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { unsafe { - backends::neon::rng_inner::(self, buffer); + backends::neon::rng_inner::($self, $buffer); } } else { - backends::soft::Backend(self).gen_ks_blocks(buffer); + backends::soft::Backend($self).gen_ks_blocks($buffer); } } - } + }; } -macro_rules! impl_chacha_rng { - ($ChaChaXRng:ident, $ChaChaXCore:ident, $rounds:ident, $abst: ident) => { - /// A cryptographically secure random number generator that uses the ChaCha algorithm. - /// - /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is - /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream - /// ciphers suitable for widespread adoption" by eSTREAM[^2]. - /// - /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing - /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD - /// implementation to support high throughput on a variety of common hardware platforms. - /// - /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm - /// should run. The number of rounds is a tradeoff between performance and security, where 8 - /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a - /// conservative choice. - /// - /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] - /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte - /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows - /// 296 unique streams of output per seed. Both counter and stream are initialized - /// to zero but may be set via the `set_word_pos` and `set_stream` methods. - /// - /// The word layout is: - /// - /// ```text - /// constant constant constant constant - /// seed seed seed seed - /// seed seed seed seed - /// counter stream_id stream_id stream_id - /// ``` - /// This implementation uses an output buffer of sixteen `u32` words, and uses - /// [`BlockRng`] to implement the [`RngCore`] methods. - /// - /// # Example for `ChaCha20Rng` - /// - /// ```rust - /// use chacha20::ChaCha20Rng; - /// // use rand_core traits - /// use rand_core::{SeedableRng, RngCore}; - /// - /// // the following inputs are examples and are neither - /// // recommended nor suggested values - /// - /// let seed = [42u8; 32]; - /// let mut rng = ChaCha20Rng::from_seed(seed); - /// rng.set_stream(100); - /// - /// // you can also use a [u8; 12] in `.set_stream()` - /// rng.set_stream([3u8; 12]); - /// // or a [u32; 3] - /// rng.set_stream([4u32; 3]); - /// - /// - /// rng.set_word_pos(5); - /// - /// // you can also use a [u8; 5] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 5]); - /// - /// let x = rng.next_u32(); - /// let mut array = [0u8; 32]; - /// rng.fill_bytes(&mut array); - /// - /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" - /// // feature is enabled in Cargo.toml, and then it will zeroize on - /// // drop automatically - /// # #[cfg(feature = "zeroize")] - /// use zeroize::Zeroize; - /// ``` - /// - /// The other Rngs from this crate are initialized similarly. - /// - /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( - /// https://cr.yp.to/chacha.html) - /// - /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( - /// http://www.ecrypt.eu.org/stream/) - /// - /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( - /// https://www.rfc-editor.org/rfc/rfc8439) - #[derive(Clone)] - pub struct $ChaChaXRng { - /// The ChaChaCore struct - pub core: BlockRng<$ChaChaXCore>, +impl ChaChaCore { + /// Generates 4 blocks in parallel with avx2 & neon, but merely fills + /// 4 blocks with sse2 & soft + #[cfg(feature = "rand_core")] + fn generate_64_bit_counter(&mut self, buffer: &mut [u32; 64]) { + let should_increment_large_counter = self.state[12].eq(&0u32.wrapping_sub(MAX_PAR_BLOCKS)); + generate_core!(self, buffer); + if should_increment_large_counter { + self.state[13] = self.state[13].wrapping_add(1); } + } + #[cfg(feature = "rand_core")] + fn generate(&mut self, buffer: &mut [u32; 64]) { + generate_core!(self, buffer); + } +} - /// The ChaCha core random number generator - #[derive(Clone)] - pub struct $ChaChaXCore(ChaChaCore<$rounds, Ietf>); - - impl SeedableRng for $ChaChaXRng { - type Seed = [u8; 32]; +macro_rules! impl_shared_traits { + ($Rng:ident, $Core:ident, $rounds:ident, $abst:ident) => { + impl SeedableRng for $Core { + type Seed = Seed; #[inline] fn from_seed(seed: Self::Seed) -> Self { - Self { - core: BlockRng::new($ChaChaXCore::from_seed(seed.into())), - } - } - } - - impl BlockRngCore for $ChaChaXCore { - type Item = u32; - type Results = BlockRngResults; - - #[inline] - fn generate(&mut self, r: &mut Self::Results) { - self.0.generate(&mut r.0); - #[cfg(target_endian = "big")] - for word in r.0.iter_mut() { - *word = word.to_le(); - } - } - } - - impl CryptoBlockRng for $ChaChaXCore {} - impl CryptoRng for $ChaChaXRng {} - - #[cfg(feature = "zeroize")] - impl ZeroizeOnDrop for $ChaChaXCore {} - - #[cfg(feature = "zeroize")] - impl ZeroizeOnDrop for $ChaChaXRng {} - - // Custom Debug implementation that does not expose the internal state - impl Debug for $ChaChaXRng { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "ChaChaXCore {{}}") + Self(ChaChaCore::<$rounds, Ietf>::new(seed.as_ref(), &[0u8; 12])) } } - - impl SeedableRng for $ChaChaXCore { - type Seed = Seed; + impl SeedableRng for $Rng { + type Seed = [u8; 32]; #[inline] fn from_seed(seed: Self::Seed) -> Self { - Self(ChaChaCore::<$rounds, Ietf>::new(seed.as_ref(), &[0u8; 12])) + Self { + core: BlockRng::new($Core::from_seed(seed.into())), + } } } - - impl RngCore for $ChaChaXRng { + impl RngCore for $Rng { #[inline] fn next_u32(&mut self) -> u32 { self.core.next_u32() @@ -409,8 +327,23 @@ macro_rules! impl_chacha_rng { self.core.fill_bytes(dest) } } + impl CryptoBlockRng for $Core {} + impl CryptoRng for $Rng {} - impl $ChaChaXRng { + #[cfg(feature = "zeroize")] + impl ZeroizeOnDrop for $Core {} + + #[cfg(feature = "zeroize")] + impl ZeroizeOnDrop for $Rng {} + + // Custom Debug implementation that does not expose the internal state + impl Debug for $Rng { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "ChaChaXCore {{}}") + } + } + + impl $Rng { // The buffer is a 4-block window, i.e. it is always at a block-aligned position in the // stream but if the stream has been sought it may not be self-aligned. @@ -522,38 +455,38 @@ macro_rules! impl_chacha_rng { } } - impl PartialEq<$ChaChaXRng> for $ChaChaXRng { - fn eq(&self, rhs: &$ChaChaXRng) -> bool { - let a: $abst::$ChaChaXRng = self.into(); - let b: $abst::$ChaChaXRng = rhs.into(); + impl PartialEq<$Rng> for $Rng { + fn eq(&self, rhs: &$Rng) -> bool { + let a: $abst::$Rng = self.into(); + let b: $abst::$Rng = rhs.into(); a == b } } - impl Eq for $ChaChaXRng {} + impl Eq for $Rng {} #[cfg(feature = "serde1")] - impl Serialize for $ChaChaXRng { + impl Serialize for $Rng { fn serialize(&self, s: S) -> Result where S: Serializer, { - $abst::$ChaChaXRng::from(self).serialize(s) + $abst::$Rng::from(self).serialize(s) } } #[cfg(feature = "serde1")] - impl<'de> Deserialize<'de> for $ChaChaXRng { + impl<'de> Deserialize<'de> for $Rng { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { - $abst::$ChaChaXRng::deserialize(d).map(|x| Self::from(&x)) + $abst::$Rng::deserialize(d).map(|x| Self::from(&x)) } } - impl From<$ChaChaXCore> for $ChaChaXRng { - fn from(core: $ChaChaXCore) -> Self { - $ChaChaXRng { + impl From<$Core> for $Rng { + fn from(core: $Core) -> Self { + $Rng { core: BlockRng::new(core), } } @@ -568,16 +501,16 @@ macro_rules! impl_chacha_rng { // the API. #[derive(Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] - pub(crate) struct $ChaChaXRng { + pub(crate) struct $Rng { seed: crate::rng::Seed, stream: u128, word_pos: u64, } - impl From<&super::$ChaChaXRng> for $ChaChaXRng { + impl From<&super::$Rng> for $Rng { // Forget all information about the input except what is necessary to determine the // outputs of any sequence of pub API calls. - fn from(r: &super::$ChaChaXRng) -> Self { + fn from(r: &super::$Rng) -> Self { Self { seed: r.get_seed().into(), stream: r.get_stream(), @@ -586,9 +519,9 @@ macro_rules! impl_chacha_rng { } } - impl From<&$ChaChaXRng> for super::$ChaChaXRng { + impl From<&$Rng> for super::$Rng { // Construct one of the possible concrete RNGs realizing an abstract state. - fn from(a: &$ChaChaXRng) -> Self { + fn from(a: &$Rng) -> Self { use rand_core::SeedableRng; let mut r = Self::from_seed(a.seed.0.into()); r.set_stream(a.stream); @@ -600,11 +533,246 @@ macro_rules! impl_chacha_rng { }; } -impl_chacha_rng!(ChaCha8Rng, ChaCha8Core, R8, abst8); +macro_rules! impl_chacha_rng { + ($ChaChaXRng:ident, $ChaChaXCore:ident, $ChaChaXLegacyRng:ident, $ChaChaXLegacyCore:ident, $rounds:ident, $abst:ident, $abst_legacy:ident) => { + /// A cryptographically secure random number generator that uses the ChaCha algorithm. + /// + /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is + /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream + /// ciphers suitable for widespread adoption" by eSTREAM[^2]. + /// + /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing + /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD + /// implementation to support high throughput on a variety of common hardware platforms. + /// + /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm + /// should run. The number of rounds is a tradeoff between performance and security, where 8 + /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a + /// conservative choice. + /// + /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte + /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows + /// 296 unique streams of output per seed. Both counter and stream are initialized + /// to zero but may be set via the `set_word_pos` and `set_stream` methods. + /// + /// The word layout is: + /// + /// ```text + /// constant constant constant constant + /// seed seed seed seed + /// seed seed seed seed + /// counter stream_id stream_id stream_id + /// ``` + /// This implementation uses an output buffer of sixteen `u32` words, and uses + /// [`BlockRng`] to implement the [`RngCore`] methods. + /// + /// # Example for `ChaCha20Rng` + /// + /// ```rust + /// use chacha20::ChaCha20Rng; + /// // use rand_core traits + /// use rand_core::{SeedableRng, RngCore}; + /// + /// // the following inputs are examples and are neither + /// // recommended nor suggested values + /// + /// let seed = [42u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// rng.set_stream(100); + /// + /// // you can also use a [u8; 12] in `.set_stream()` + /// rng.set_stream([3u8; 12]); + /// // or a [u32; 3] + /// rng.set_stream([4u32; 3]); + /// + /// + /// rng.set_word_pos(5); + /// + /// // you can also use a [u8; 5] in `.set_word_pos()` + /// rng.set_word_pos([2u8; 5]); + /// + /// let x = rng.next_u32(); + /// let mut array = [0u8; 32]; + /// rng.fill_bytes(&mut array); + /// + /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" + /// // feature is enabled in Cargo.toml, and then it will zeroize on + /// // drop automatically + /// # #[cfg(feature = "zeroize")] + /// use zeroize::Zeroize; + /// ``` + /// + /// The other Rngs from this crate are initialized similarly. + /// + /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( + /// https://cr.yp.to/chacha.html) + /// + /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( + /// http://www.ecrypt.eu.org/stream/) + /// + /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( + /// https://www.rfc-editor.org/rfc/rfc8439) + #[derive(Clone)] + pub struct $ChaChaXRng { + /// The ChaChaCore struct + pub core: BlockRng<$ChaChaXCore>, + } + + /// A cryptographically secure random number generator that uses the ChaCha algorithm. + /// + /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is + /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream + /// ciphers suitable for widespread adoption" by eSTREAM[^2]. + /// + /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing + /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD + /// implementation to support high throughput on a variety of common hardware platforms. + /// + /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm + /// should run. The number of rounds is a tradeoff between performance and security, where 8 + /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a + /// conservative choice. + /// + /// TODO: Fix this paragraph, some adjustments have been made already + /// We use a 64-bit counter and 64-bit stream identifier as in the IETF implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 64-bit counter over 64-byte + /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows + /// 296 unique streams of output per seed. Both counter and stream are initialized + /// to zero but may be set via the `set_word_pos` and `set_stream` methods. + /// + /// The word layout is: + /// + /// ```text + /// constant constant constant constant + /// seed seed seed seed + /// seed seed seed seed + /// counter counter stream_id stream_id + /// ``` + /// This implementation uses an output buffer of sixteen `u32` words, and uses + /// [`BlockRng`] to implement the [`RngCore`] methods. + /// + /// # Example for `ChaCha20Rng` + /// + /// ```rust + /// use chacha20::ChaCha20Rng; + /// // use rand_core traits + /// use rand_core::{SeedableRng, RngCore}; + /// + /// // the following inputs are examples and are neither + /// // recommended nor suggested values + /// + /// let seed = [42u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// rng.set_stream(100); + /// + /// // you can also use a [u8; 12] in `.set_stream()` + /// rng.set_stream([3u8; 12]); + /// // or a [u32; 3] + /// rng.set_stream([4u32; 3]); + /// + /// + /// rng.set_word_pos(5); + /// + /// // you can also use a [u8; 5] in `.set_word_pos()` + /// rng.set_word_pos([2u8; 5]); + /// + /// let x = rng.next_u32(); + /// let mut array = [0u8; 32]; + /// rng.fill_bytes(&mut array); + /// + /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" + /// // feature is enabled in Cargo.toml, and then it will zeroize on + /// // drop automatically + /// # #[cfg(feature = "zeroize")] + /// use zeroize::Zeroize; + /// ``` + /// + /// The other Rngs from this crate are initialized similarly. + /// + /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( + /// https://cr.yp.to/chacha.html) + /// + /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( + /// http://www.ecrypt.eu.org/stream/) + /// + /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( + /// https://www.rfc-editor.org/rfc/rfc8439) + #[derive(Clone)] + pub struct $ChaChaXLegacyRng { + pub core: BlockRng<$ChaChaXLegacyCore>, + } -impl_chacha_rng!(ChaCha12Rng, ChaCha12Core, R12, abst12); + /// The ChaCha core random number generator + #[derive(Clone)] + pub struct $ChaChaXCore(ChaChaCore<$rounds, Ietf>); + + /// The ChaCha core random number generator + #[derive(Clone)] + pub struct $ChaChaXLegacyCore(ChaChaCore<$rounds, Ietf>); + + impl_shared_traits!($ChaChaXRng, $ChaChaXCore, $rounds, $abst); + impl_shared_traits!($ChaChaXLegacyRng, $ChaChaXLegacyCore, $rounds, $abst_legacy); + + impl BlockRngCore for $ChaChaXLegacyCore { + type Item = u32; + type Results = BlockRngResults; + + #[inline] + fn generate(&mut self, r: &mut Self::Results) { + self.0.generate_64_bit_counter(&mut r.0); + #[cfg(target_endian = "big")] + for word in r.0.iter_mut() { + *word = word.to_le(); + } + } + } + + impl BlockRngCore for $ChaChaXCore { + type Item = u32; + type Results = BlockRngResults; + + #[inline] + fn generate(&mut self, r: &mut Self::Results) { + self.0.generate(&mut r.0); + #[cfg(target_endian = "big")] + for word in r.0.iter_mut() { + *word = word.to_le(); + } + } + } + }; +} -impl_chacha_rng!(ChaCha20Rng, ChaCha20Core, R20, abst20); +impl_chacha_rng!( + ChaCha8Rng, + ChaCha8Core, + ChaCha8LegacyRng, + ChaCha8LegacyCore, + R8, + abst8, + abst8legacy +); + +impl_chacha_rng!( + ChaCha12Rng, + ChaCha12Core, + ChaCha12LegacyRng, + ChaCha12LegacyCore, + R12, + abst12, + abst12legacy +); + +impl_chacha_rng!( + ChaCha20Rng, + ChaCha20Core, + ChaCha20LegacyRng, + ChaCha20LegacyCore, + R20, + abst20, + abst20legacy +); #[cfg(test)] pub(crate) mod tests { From 22d06d503e6db0774155aad24304a76d0613e64a Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Thu, 14 Aug 2025 18:19:36 -0500 Subject: [PATCH 02/64] fixed some unused warnings the only way I know that I can --- chacha20/src/rng.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 71822ff3..1a405b20 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -185,6 +185,7 @@ impl From<[u8; 4]> for BlockPos { /// Can be constructed from any of the following: /// * `u64` /// * `[u32; 2]` +#[allow(unused)] pub struct LegacyBlockPos(u64); impl From for LegacyBlockPos { @@ -389,6 +390,7 @@ macro_rules! impl_shared_traits { /// * `[u8; 4]` /// * `u32` #[inline] + #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); self.core.core.0.state[12] = block_pos.into().0.to_le() @@ -396,6 +398,7 @@ macro_rules! impl_shared_traits { /// Get the block pos. #[inline] + #[allow(unused)] pub fn get_block_pos(&self) -> u32 { self.core.core.0.state[12] } From 3bcd93b61f6c2ec9eba1fa5c5f368913ca3641d2 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Thu, 14 Aug 2025 19:31:38 -0500 Subject: [PATCH 03/64] attempted to restrict block pos to multiple of 4, all tests pass --- chacha20/src/rng.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 1a405b20..46713d43 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -96,10 +96,14 @@ pub struct WordPosInput { impl From<[u8; 5]> for WordPosInput { fn from(value: [u8; 5]) -> Self { - Self { + let mut result = Self { block_pos: u32::from_le_bytes(value[0..4].try_into().unwrap()), index: (value[4] & 0b1111) as usize, - } + }; + let overshot = result.block_pos & 0b11; + result.block_pos &= !0b11; + result.index += overshot as usize * BLOCK_WORDS as usize; + result } } @@ -393,14 +397,20 @@ macro_rules! impl_shared_traits { #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); - self.core.core.0.state[12] = block_pos.into().0.to_le() + // TODO: Change to multiple of 4 + let bp = block_pos.into().0; + let mult_4 = bp & !0b11; + let overshot_words = (bp & 0b11) * BLOCK_WORDS as u32; + self.core.core.0.state[12] = mult_4.to_le(); + self.core.generate_and_set(overshot_words as usize); } /// Get the block pos. #[inline] #[allow(unused)] pub fn get_block_pos(&self) -> u32 { - self.core.core.0.state[12] + self.core.core.0.state[12].wrapping_sub(4) + + self.core.index() as u32 / BLOCK_WORDS as u32 } /// Set the stream number. The lower 96 bits are used and the rest are From 768b0c8f32c5e0b81d3bc6fe450f7c5714480bba Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Thu, 14 Aug 2025 19:59:13 -0500 Subject: [PATCH 04/64] added to_le() --- chacha20/src/rng.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 46713d43..2ebc1df0 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -202,7 +202,7 @@ impl From for LegacyBlockPos { impl From<[u32; 2]> for LegacyBlockPos { #[inline] fn from(value: [u32; 2]) -> Self { - Self((value[1].to_le() as u64) << 32 | value[0] as u64) + Self((value[1].to_le() as u64) << 32 | value[0].to_le() as u64) } } From 385ebeab35daf0cd57b432188cc912bfb907241b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 11:41:30 -0500 Subject: [PATCH 05/64] updated backends to use a 64-bit counter --- chacha20/src/backends/avx2.rs | 12 +++++---- chacha20/src/backends/neon.rs | 49 +++++++++++++++++++++++++---------- chacha20/src/backends/soft.rs | 16 ++++++++++-- chacha20/src/backends/sse2.rs | 8 +++--- 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 2c28b345..4b74aba9 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -39,11 +39,11 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); } let mut backend = Backend:: { v, @@ -54,6 +54,7 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; + state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } #[inline] @@ -86,6 +87,7 @@ where backend.rng_gen_par_ks_blocks(buffer); core.state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; + core.state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } struct Backend { @@ -111,7 +113,7 @@ impl StreamCipherBackend for Backend { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); } let res0: [__m128i; 8] = core::mem::transmute(res[0]); @@ -130,7 +132,7 @@ impl StreamCipherBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; @@ -155,7 +157,7 @@ impl Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 777a01fb..aee86ce7 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -60,7 +60,20 @@ where f.call(&mut backend); - vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); + vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); +} + +/// Adds a counter row with 64-bit addition +macro_rules! add_counter { + ($a:expr, $b:expr) => { + vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vreinterpretq_u64_u32($b), + )) + }; } #[inline] @@ -77,7 +90,10 @@ where backend.write_par_ks_blocks(buffer); - vst1q_u32(core.state.as_mut_ptr().offset(12), backend.state[3]); + vst1q_u64( + core.state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); } #[cfg(feature = "cipher")] @@ -105,7 +121,7 @@ impl StreamCipherBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = vaddq_u32(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); } } @@ -118,19 +134,19 @@ impl StreamCipherBackend for Backend { self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0]), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[1]), + add_counter!(vaddq_u32(self.state[3], self.ctrs[1])), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[2]), + add_counter!(vaddq_u32(self.state[3], self.ctrs[2])), ], ]; @@ -144,7 +160,10 @@ impl StreamCipherBackend for Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add_counter!(self.state[3], self.ctrs[block - 1]); + ); } // write blocks to dest for state_row in 0..4 { @@ -154,7 +173,7 @@ impl StreamCipherBackend for Backend { ); } } - self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3]); } } } @@ -197,19 +216,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0]), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[1]), + add_counter!(vaddq_u32(self.state[3], self.ctrs[1])), ], [ self.state[0], self.state[1], self.state[2], - vaddq_u32(self.state[3], self.ctrs[2]), + add_counter!(vaddq_u32(self.state[3], self.ctrs[2])), ], ]; @@ -224,7 +243,10 @@ impl Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add_counter!(self.state[3], self.ctrs[block - 1]); + ); } // write blocks to buffer for state_row in 0..4 { @@ -235,10 +257,11 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3]); } } + #[inline] unsafe fn double_quarter_round(blocks: &mut [[uint32x4_t; 4]; 4]) { add_xor_rot(blocks); diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index e0614138..b19b7079 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -28,7 +28,13 @@ impl StreamCipherBackend for Backend<'_, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - self.0.state[12] = self.0.state[12].wrapping_add(1); + let no_carry = self.0.state[12].checked_add(1); + if let Some(v) = no_carry { + self.0.state[12] = v; + } else { + self.0.state[12] = 0; + self.0.state[13] = self.0.state[13].wrapping_add(1); + } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { chunk.copy_from_slice(&val.to_le_bytes()); @@ -42,7 +48,13 @@ impl Backend<'_, R, V> { pub(crate) fn gen_ks_blocks(&mut self, buffer: &mut [u32; 64]) { for i in 0..4 { let res = run_rounds::(&self.0.state); - self.0.state[12] = self.0.state[12].wrapping_add(1); + let no_carry = self.0.state[12].checked_add(1); + if let Some(v) = no_carry { + self.0.state[12] = v; + } else { + self.0.state[12] = 0; + self.0.state[13] = self.0.state[13].wrapping_add(1); + } for (word, val) in buffer[i << 4..(i + 1) << 4].iter_mut().zip(res.iter()) { *word = val.to_le(); diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 66ddc00f..13c73692 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -42,6 +42,7 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } struct Backend { @@ -65,7 +66,7 @@ impl StreamCipherBackend for Backend { fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); let block_ptr = block.as_mut_ptr() as *mut __m128i; for i in 0..4 { @@ -77,7 +78,7 @@ impl StreamCipherBackend for Backend { fn gen_par_ks_blocks(&mut self, blocks: &mut cipher::ParBlocks) { unsafe { let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); let blocks_ptr = blocks.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -111,6 +112,7 @@ where backend.gen_ks_blocks(buffer); core.state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; + core.state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } #[cfg(feature = "rng")] @@ -119,7 +121,7 @@ impl Backend { fn gen_ks_blocks(&mut self, block: &mut [u32]) { unsafe { let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); let blocks_ptr = block.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { From dbcdb4124502b7c28d74908b9ef2f470e1758f6e Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 11:44:12 -0500 Subject: [PATCH 06/64] ignored counter_wrapping rng test --- chacha20/src/backends/neon.rs | 1 - chacha20/src/rng.rs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index aee86ce7..3d3f7f13 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -261,7 +261,6 @@ impl Backend { } } - #[inline] unsafe fn double_quarter_round(blocks: &mut [[uint32x4_t; 4]; 4]) { add_xor_rot(blocks); diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 2ebc1df0..2981cd19 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1311,6 +1311,7 @@ pub(crate) mod tests { /// If this test fails, the backend may be /// performing 64-bit addition. #[test] + #[ignore = "Counter is now a 64 bit counter"] fn counter_wrapping() { let mut rng = ChaChaRng::from_seed([0u8; 32]); From 8a8f8fea35e46421c9293a6ec14e2fb20b3a9d91 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 11:51:23 -0500 Subject: [PATCH 07/64] undid block pos multiple of 4 --- chacha20/src/rng.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 2981cd19..edbceff5 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -96,14 +96,10 @@ pub struct WordPosInput { impl From<[u8; 5]> for WordPosInput { fn from(value: [u8; 5]) -> Self { - let mut result = Self { + Self { block_pos: u32::from_le_bytes(value[0..4].try_into().unwrap()), index: (value[4] & 0b1111) as usize, - }; - let overshot = result.block_pos & 0b11; - result.block_pos &= !0b11; - result.index += overshot as usize * BLOCK_WORDS as usize; - result + } } } @@ -397,20 +393,14 @@ macro_rules! impl_shared_traits { #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); - // TODO: Change to multiple of 4 - let bp = block_pos.into().0; - let mult_4 = bp & !0b11; - let overshot_words = (bp & 0b11) * BLOCK_WORDS as u32; - self.core.core.0.state[12] = mult_4.to_le(); - self.core.generate_and_set(overshot_words as usize); + self.core.core.0.state[12] = block_pos.into().0.to_le() } /// Get the block pos. #[inline] #[allow(unused)] pub fn get_block_pos(&self) -> u32 { - self.core.core.0.state[12].wrapping_sub(4) - + self.core.index() as u32 / BLOCK_WORDS as u32 + self.core.core.0.state[12] } /// Set the stream number. The lower 96 bits are used and the rest are From 5ae5b2231d63ca7dd39dbc7fbefa7c6bd6697573 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 12:21:38 -0500 Subject: [PATCH 08/64] fixed sse2 tests --- chacha20/src/backends/sse2.rs | 4 ++-- chacha20/src/rng.rs | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 13c73692..321c802c 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -121,7 +121,7 @@ impl Backend { fn gen_ks_blocks(&mut self, block: &mut [u32]) { unsafe { let res = rounds::(&self.v); - self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); let blocks_ptr = block.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -138,7 +138,7 @@ impl Backend { unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { let mut res = [*v; 4]; for block in 1..PAR_BLOCKS { - res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + res[block][3] = _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)); } for _ in 0..R::COUNT { diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index edbceff5..abfac93f 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1162,6 +1162,7 @@ pub(crate) mod tests { } #[test] + #[ignore = "64 bit counter"] fn test_chacha_word_pos_zero() { let mut rng = ChaChaRng::from_seed(Default::default()); assert_eq!(rng.core.core.0.state[12], 0); From 5a15d23e0c8aede4c6a310b73747129ecd684a73 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 12:25:10 -0500 Subject: [PATCH 09/64] fixed neon.rs syntax --- chacha20/src/backends/neon.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 3d3f7f13..82537ae4 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -140,13 +140,13 @@ impl StreamCipherBackend for Backend { self.state[0], self.state[1], self.state[2], - add_counter!(vaddq_u32(self.state[3], self.ctrs[1])), + add_counter!(self.state[3], self.ctrs[1]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(vaddq_u32(self.state[3], self.ctrs[2])), + add_counter!(self.state[3], self.ctrs[2]), ], ]; @@ -222,13 +222,13 @@ impl Backend { self.state[0], self.state[1], self.state[2], - add_counter!(vaddq_u32(self.state[3], self.ctrs[1])), + add_counter!(self.state[3], self.ctrs[1]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(vaddq_u32(self.state[3], self.ctrs[2])), + add_counter!(self.state[3], self.ctrs[2]), ], ]; From 87cb35db40d80886f5059b1f81025ef83c747b51 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 12:27:14 -0500 Subject: [PATCH 10/64] fixed neon.rs syntax again --- chacha20/src/backends/neon.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 82537ae4..3bddcb03 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -162,7 +162,7 @@ impl StreamCipherBackend for Backend { if block > 0 { add_assign_vec!( blocks[block][3], - add_counter!(self.state[3], self.ctrs[block - 1]); + add_counter!(self.state[3], self.ctrs[block - 1]) ); } // write blocks to dest From a671af7b4906f9747dd46c0669349ddf00879985 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 13:56:13 -0500 Subject: [PATCH 11/64] fixed neon code; tested with cross --- chacha20/src/backends/neon.rs | 47 +++++++++++++++-------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 3bddcb03..d15c8c9c 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -25,6 +25,15 @@ struct Backend { _pd: PhantomData, } +macro_rules! add64 { + ($a:expr, $b:expr) => { + vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vreinterpretq_u64_u32($b), + )) + }; +} + impl Backend { #[inline] unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self { @@ -66,16 +75,6 @@ where ); } -/// Adds a counter row with 64-bit addition -macro_rules! add_counter { - ($a:expr, $b:expr) => { - vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32($a), - vreinterpretq_u64_u32($b), - )) - }; -} - #[inline] #[cfg(feature = "rand_core")] #[target_feature(enable = "neon")] @@ -121,7 +120,7 @@ impl StreamCipherBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); } } @@ -134,19 +133,19 @@ impl StreamCipherBackend for Backend { self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[0]), + add64!(self.state[3], self.ctrs[0]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[1]), + add64!(self.state[3], self.ctrs[1]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[2]), + add64!(self.state[3], self.ctrs[2]), ], ]; @@ -160,10 +159,7 @@ impl StreamCipherBackend for Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - add_assign_vec!( - blocks[block][3], - add_counter!(self.state[3], self.ctrs[block - 1]) - ); + blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); } // write blocks to dest for state_row in 0..4 { @@ -173,7 +169,7 @@ impl StreamCipherBackend for Backend { ); } } - self.state[3] = add_counter!(self.state[3], self.ctrs[3]); + self.state[3] = add64!(self.state[3], self.ctrs[3]); } } } @@ -216,19 +212,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[0]), + add64!(self.state[3], self.ctrs[0]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[1]), + add64!(self.state[3], self.ctrs[1]), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[2]), + add64!(self.state[3], self.ctrs[2]), ], ]; @@ -243,10 +239,7 @@ impl Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - add_assign_vec!( - blocks[block][3], - add_counter!(self.state[3], self.ctrs[block - 1]); - ); + blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); } // write blocks to buffer for state_row in 0..4 { @@ -257,7 +250,7 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = add_counter!(self.state[3], self.ctrs[3]); + self.state[3] = add64!(self.state[3], self.ctrs[3]); } } From 9235c1e3533efff72a98f7f646300da1742c6d58 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 15:17:43 -0500 Subject: [PATCH 12/64] impled remaining_blocks() --- chacha20/src/lib.rs | 8 ++++++-- chacha20/src/variants.rs | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index f9765fcb..15abd606 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -282,8 +282,12 @@ impl StreamCipherSeekCore for ChaChaCore { impl StreamCipherCore for ChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { - let rem = u32::MAX - self.get_block_pos(); - rem.try_into().ok() + let rem = V::COUNTER_MAX - self.get_block_pos() as u64; + if rem > usize::MAX as u64 { + None + } else { + rem.try_into().ok() + } } fn process_with_backend( diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 58043a75..c6f484a3 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -6,6 +6,7 @@ pub trait Variant: Clone { /// the size of the Nonce in u32s const NONCE_INDEX: usize; + const COUNTER_MAX: u64; } #[derive(Clone)] @@ -13,6 +14,7 @@ pub trait Variant: Clone { pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; + const COUNTER_MAX: u64 = u32::MAX as u64; } #[derive(Clone)] @@ -22,4 +24,5 @@ pub struct Legacy(); #[cfg(feature = "legacy")] impl Variant for Legacy { const NONCE_INDEX: usize = 14; + const COUNTER_MAX: u64 = u64::MAX; } From 655d7fc9c7a30bc6b851a56a63f202fd561709e4 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 17:16:46 -0500 Subject: [PATCH 13/64] fixed test_wrapping_add; not quite perfect --- chacha20/src/rng.rs | 602 +++++++++++++++++--------------------------- 1 file changed, 227 insertions(+), 375 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index abfac93f..1541ef38 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -21,16 +21,13 @@ use zeroize::{Zeroize, ZeroizeOnDrop}; use crate::{ ChaChaCore, R8, R12, R20, Rounds, backends, - variants::{Ietf, Variant}, + variants::{Legacy, Variant}, }; use cfg_if::cfg_if; /// Number of 32-bit words per ChaCha block (fixed by algorithm definition). const BLOCK_WORDS: u8 = 16; -/// Number of blocks generated at once. Changing this does not change the -/// current functionality. -const MAX_PAR_BLOCKS: u32 = 4; /// The seed for ChaCha20. Implements ZeroizeOnDrop when the /// zeroize feature is enabled. @@ -87,27 +84,33 @@ impl Debug for Seed { /// A wrapper for set_word_pos() input. /// /// Can be constructed from any of the following: -/// * `[u8; 5]` -/// * `u64` +/// * `[u8; 9]` +/// * `u128` pub struct WordPosInput { - block_pos: u32, + block_pos: [u32; 2], index: usize, } -impl From<[u8; 5]> for WordPosInput { - fn from(value: [u8; 5]) -> Self { +impl From<[u8; 9]> for WordPosInput { + fn from(value: [u8; 9]) -> Self { + let s12 = u32::from_le_bytes(value[0..4].try_into().unwrap()); + let s13 = u32::from_le_bytes(value[4..8].try_into().unwrap()); Self { - block_pos: u32::from_le_bytes(value[0..4].try_into().unwrap()), - index: (value[4] & 0b1111) as usize, + block_pos: [s12, s13], + index: (value[8] & 0b1111) as usize, } } } -impl From for WordPosInput { - fn from(value: u64) -> Self { +impl From for WordPosInput { + fn from(value: u128) -> Self { + let index = (value.to_le_bytes()[0] & 0b1111) as usize; + let counter = value >> 4; + let s12 = counter as u32; + let s13 = (counter >> 32) as u32; Self { - block_pos: u32::from_le_bytes((value >> 4).to_le_bytes()[0..4].try_into().unwrap()), - index: (value.to_le_bytes()[0] & 0b1111) as usize, + block_pos: [s12, s13], + index, } } } @@ -125,7 +128,7 @@ impl StreamId { const BYTES: usize = size_of::(); /// The length of the array contained within `StreamId`. - const LEN: usize = 3; + const LEN: usize = 2; } impl From<[u32; Self::LEN]> for StreamId { @@ -151,9 +154,9 @@ impl From<[u8; Self::BYTES]> for StreamId { } } -impl From for StreamId { +impl From for StreamId { #[inline] - fn from(value: u128) -> Self { + fn from(value: u64) -> Self { let result: [u8; Self::BYTES] = value.to_le_bytes()[..Self::BYTES].try_into().unwrap(); result.into() } @@ -162,43 +165,29 @@ impl From for StreamId { /// A wrapper for `block_pos`. /// /// Can be constructed from any of the following: -/// * `[u8; 4]` -/// * `u32` -pub struct BlockPos(u32); - -impl From for BlockPos { - #[inline] - fn from(value: u32) -> Self { - Self(value.to_le()) - } -} +/// * `[u8; 8]` +/// * `u64` +/// * `[u32; 2]` +pub struct BlockPos([u32; 2]); -impl From<[u8; 4]> for BlockPos { +impl From for BlockPos { #[inline] - fn from(value: [u8; 4]) -> Self { - Self(u32::from_le_bytes(value).to_le()) + fn from(value: u64) -> Self { + Self([value as u32, (value >> 32) as u32]) } } -/// A wrapper for the 64-bit `block_pos`. -/// -/// Can be constructed from any of the following: -/// * `u64` -/// * `[u32; 2]` -#[allow(unused)] -pub struct LegacyBlockPos(u64); - -impl From for LegacyBlockPos { +impl From<[u8; 8]> for BlockPos { #[inline] - fn from(value: u64) -> Self { - Self(value.to_le()) + fn from(value: [u8; 8]) -> Self { + u64::from_le_bytes(value).into() } } -impl From<[u32; 2]> for LegacyBlockPos { +impl From<[u32; 2]> for BlockPos { #[inline] fn from(value: [u32; 2]) -> Self { - Self((value[1].to_le() as u64) << 32 | value[0].to_le() as u64) + Self(value) } } @@ -236,85 +225,162 @@ const BUFFER_SIZE: usize = 64; // NB. this must remain consistent with some currently hard-coded numbers in this module const BUF_BLOCKS: u8 = BUFFER_SIZE as u8 >> 4; -macro_rules! generate_core { - ($self:expr, $buffer:expr) => { +impl ChaChaCore { + /// Generates 4 blocks in parallel with avx2 & neon, but merely fills + /// 4 blocks with sse2 & soft + #[cfg(feature = "rand_core")] + fn generate(&mut self, buffer: &mut [u32; 64]) { cfg_if! { if #[cfg(chacha20_force_soft)] { - backends::soft::Backend($self).gen_ks_blocks($buffer); + backends::soft::Backend(self).gen_ks_blocks(buffer); } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if! { if #[cfg(chacha20_force_avx2)] { unsafe { - backends::avx2::rng_inner::($self, $buffer); + backends::avx2::rng_inner::(self, buffer); } } else if #[cfg(chacha20_force_sse2)] { unsafe { - backends::sse2::rng_inner::($self, $buffer); + backends::sse2::rng_inner::(self, buffer); } } else { - let (avx2_token, sse2_token) = $self.tokens; + let (avx2_token, sse2_token) = self.tokens; if avx2_token.get() { unsafe { - backends::avx2::rng_inner::($self, $buffer); + backends::avx2::rng_inner::(self, buffer); } } else if sse2_token.get() { unsafe { - backends::sse2::rng_inner::($self, $buffer); + backends::sse2::rng_inner::(self, buffer); } } else { - backends::soft::Backend($self).gen_ks_blocks($buffer); + backends::soft::Backend(self).gen_ks_blocks(buffer); } } } } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { unsafe { - backends::neon::rng_inner::($self, $buffer); + backends::neon::rng_inner::(self, buffer); } } else { - backends::soft::Backend($self).gen_ks_blocks($buffer); + backends::soft::Backend(self).gen_ks_blocks(buffer); } } - }; + } } -impl ChaChaCore { - /// Generates 4 blocks in parallel with avx2 & neon, but merely fills - /// 4 blocks with sse2 & soft - #[cfg(feature = "rand_core")] - fn generate_64_bit_counter(&mut self, buffer: &mut [u32; 64]) { - let should_increment_large_counter = self.state[12].eq(&0u32.wrapping_sub(MAX_PAR_BLOCKS)); - generate_core!(self, buffer); - if should_increment_large_counter { - self.state[13] = self.state[13].wrapping_add(1); +macro_rules! impl_chacha_rng { + ($ChaChaXRng:ident, $ChaChaXCore:ident, $rounds:ident, $abst:ident) => { + /// A cryptographically secure random number generator that uses the ChaCha algorithm. + /// + /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is + /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream + /// ciphers suitable for widespread adoption" by eSTREAM[^2]. + /// + /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing + /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD + /// implementation to support high throughput on a variety of common hardware platforms. + /// + /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm + /// should run. The number of rounds is a tradeoff between performance and security, where 8 + /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a + /// conservative choice. + /// + /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte + /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows + /// 296 unique streams of output per seed. Both counter and stream are initialized + /// to zero but may be set via the `set_word_pos` and `set_stream` methods. + /// + /// The word layout is: + /// + /// ```text + /// constant constant constant constant + /// seed seed seed seed + /// seed seed seed seed + /// counter stream_id stream_id stream_id + /// ``` + /// This implementation uses an output buffer of sixteen `u32` words, and uses + /// [`BlockRng`] to implement the [`RngCore`] methods. + /// + /// # Example for `ChaCha20Rng` + /// + /// ```rust + /// use chacha20::ChaCha20Rng; + /// // use rand_core traits + /// use rand_core::{SeedableRng, RngCore}; + /// + /// // the following inputs are examples and are neither + /// // recommended nor suggested values + /// + /// let seed = [42u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// rng.set_stream(100); + /// + /// // you can also use a [u8; 12] in `.set_stream()` + /// rng.set_stream([3u8; 12]); + /// // or a [u32; 3] + /// rng.set_stream([4u32; 3]); + /// + /// + /// rng.set_word_pos(5); + /// + /// // you can also use a [u8; 5] in `.set_word_pos()` + /// rng.set_word_pos([2u8; 5]); + /// + /// let x = rng.next_u32(); + /// let mut array = [0u8; 32]; + /// rng.fill_bytes(&mut array); + /// + /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" + /// // feature is enabled in Cargo.toml, and then it will zeroize on + /// // drop automatically + /// # #[cfg(feature = "zeroize")] + /// use zeroize::Zeroize; + /// ``` + /// + /// The other Rngs from this crate are initialized similarly. + /// + /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( + /// https://cr.yp.to/chacha.html) + /// + /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( + /// http://www.ecrypt.eu.org/stream/) + /// + /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( + /// https://www.rfc-editor.org/rfc/rfc8439) + #[derive(Clone)] + pub struct $ChaChaXRng { + /// The ChaChaCore struct + pub core: BlockRng<$ChaChaXCore>, } - } - #[cfg(feature = "rand_core")] - fn generate(&mut self, buffer: &mut [u32; 64]) { - generate_core!(self, buffer); - } -} -macro_rules! impl_shared_traits { - ($Rng:ident, $Core:ident, $rounds:ident, $abst:ident) => { - impl SeedableRng for $Core { + /// The ChaCha core random number generator + #[derive(Clone)] + pub struct $ChaChaXCore(ChaChaCore<$rounds, Legacy>); + + impl SeedableRng for $ChaChaXCore { type Seed = Seed; #[inline] fn from_seed(seed: Self::Seed) -> Self { - Self(ChaChaCore::<$rounds, Ietf>::new(seed.as_ref(), &[0u8; 12])) + Self(ChaChaCore::<$rounds, Legacy>::new( + seed.as_ref(), + &[0u8; 12], + )) } } - impl SeedableRng for $Rng { + impl SeedableRng for $ChaChaXRng { type Seed = [u8; 32]; #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { - core: BlockRng::new($Core::from_seed(seed.into())), + core: BlockRng::new($ChaChaXCore::from_seed(seed.into())), } } } - impl RngCore for $Rng { + impl RngCore for $ChaChaXRng { #[inline] fn next_u32(&mut self) -> u32 { self.core.next_u32() @@ -328,56 +394,58 @@ macro_rules! impl_shared_traits { self.core.fill_bytes(dest) } } - impl CryptoBlockRng for $Core {} - impl CryptoRng for $Rng {} + impl CryptoBlockRng for $ChaChaXCore {} + impl CryptoRng for $ChaChaXRng {} #[cfg(feature = "zeroize")] - impl ZeroizeOnDrop for $Core {} + impl ZeroizeOnDrop for $ChaChaXCore {} #[cfg(feature = "zeroize")] - impl ZeroizeOnDrop for $Rng {} + impl ZeroizeOnDrop for $ChaChaXRng {} // Custom Debug implementation that does not expose the internal state - impl Debug for $Rng { + impl Debug for $ChaChaXRng { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "ChaChaXCore {{}}") } } - impl $Rng { + impl $ChaChaXRng { // The buffer is a 4-block window, i.e. it is always at a block-aligned position in the // stream but if the stream has been sought it may not be self-aligned. /// Get the offset from the start of the stream, in 32-bit words. /// /// Since the generated blocks are 64 words (26) long and the - /// counter is 32-bits, the offset is a 36-bit number. Sub-word offsets are + /// counter is 64-bits, the offset is a 68-bit number. Sub-word offsets are /// not supported, hence the result can simply be multiplied by 4 to get a /// byte-offset. #[inline] - pub fn get_word_pos(&self) -> u64 { + pub fn get_word_pos(&self) -> u128 { let mut result = - u64::from(self.core.core.0.state[12].wrapping_sub(BUF_BLOCKS.into())) << 4; - result += self.core.index() as u64; - // eliminate bits above the 36th bit - result & 0xfffffffff + u128::from(self.core.core.0.state[12].wrapping_sub(BUF_BLOCKS.into())) << 4; + result += u128::from(self.core.core.0.state[13]) << (32 + 4); + result += self.core.index() as u128; + // eliminate bits above the 68th bit + result & ((1 << 68) - 1) } /// Set the offset from the start of the stream, in 32-bit words. This method /// takes any of the following: - /// * `[u8; 5]` - /// * `u64` + /// * `[u8; 9]` + /// * `u128` /// /// As with `get_word_pos`, we use a 36-bit number. When given a `u64`, we use /// the least significant 4 bits as the RNG's index, and the 32 bits before it /// as the block position. /// - /// When given a `[u8; 5]`, the word_pos is set similarly, but it is more + /// When given a `[u8; 9]`, the word_pos is set similarly, but it is more /// arbitrary. #[inline] pub fn set_word_pos>(&mut self, word_offset: W) { let word_pos: WordPosInput = word_offset.into(); - self.core.core.0.state[12] = word_pos.block_pos; + self.core.core.0.state[12] = word_pos.block_pos[0]; + self.core.core.0.state[13] = word_pos.block_pos[1]; // generate will increase block_pos by 4 self.core.generate_and_set(word_pos.index); } @@ -388,33 +456,35 @@ macro_rules! impl_shared_traits { /// /// This method takes any of the following: /// * `[u8; 4]` - /// * `u32` + /// * `[u32; 2]` #[inline] #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); - self.core.core.0.state[12] = block_pos.into().0.to_le() + let block_pos = block_pos.into().0; + self.core.core.0.state[12] = block_pos[0].to_le(); + self.core.core.0.state[13] = block_pos[1].to_le() } /// Get the block pos. #[inline] #[allow(unused)] - pub fn get_block_pos(&self) -> u32 { - self.core.core.0.state[12] + pub fn get_block_pos(&self) -> u64 { + self.core.core.0.state[12] as u64 | ((self.core.core.0.state[13] as u64) << 32) } /// Set the stream number. The lower 96 bits are used and the rest are /// discarded. This method takes any of the following: - /// * `[u32; 3]` - /// * `[u8; 12]` - /// * `u128` + /// * `[u32; 2]` + /// * `[u8; 8]` + /// * `u64` /// /// This is initialized to zero; 296 unique streams of output /// are available per seed/key. #[inline] pub fn set_stream>(&mut self, stream: S) { let stream: StreamId = stream.into(); - for (n, val) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + for (n, val) in self.core.core.0.state[Legacy::NONCE_INDEX..BLOCK_WORDS as usize] .as_mut() .iter_mut() .zip(stream.0.iter()) @@ -428,9 +498,9 @@ macro_rules! impl_shared_traits { /// Get the stream number. #[inline] - pub fn get_stream(&self) -> u128 { - let mut result = [0u8; 16]; - for (i, &big) in self.core.core.0.state[Ietf::NONCE_INDEX..BLOCK_WORDS as usize] + pub fn get_stream(&self) -> u64 { + let mut result = [0u8; 8]; + for (i, &big) in self.core.core.0.state[Legacy::NONCE_INDEX..BLOCK_WORDS as usize] .iter() .enumerate() { @@ -440,7 +510,7 @@ macro_rules! impl_shared_traits { result[index + 2] = (big >> 16) as u8; result[index + 3] = (big >> 24) as u8; } - u128::from_le_bytes(result) + u64::from_le_bytes(result) } /// Get the seed. @@ -458,38 +528,38 @@ macro_rules! impl_shared_traits { } } - impl PartialEq<$Rng> for $Rng { - fn eq(&self, rhs: &$Rng) -> bool { - let a: $abst::$Rng = self.into(); - let b: $abst::$Rng = rhs.into(); + impl PartialEq<$ChaChaXRng> for $ChaChaXRng { + fn eq(&self, rhs: &$ChaChaXRng) -> bool { + let a: $abst::$ChaChaXRng = self.into(); + let b: $abst::$ChaChaXRng = rhs.into(); a == b } } - impl Eq for $Rng {} + impl Eq for $ChaChaXRng {} #[cfg(feature = "serde1")] - impl Serialize for $Rng { + impl Serialize for $ChaChaXRng { fn serialize(&self, s: S) -> Result where S: Serializer, { - $abst::$Rng::from(self).serialize(s) + $abst::$ChaChaXRng::from(self).serialize(s) } } #[cfg(feature = "serde1")] - impl<'de> Deserialize<'de> for $Rng { + impl<'de> Deserialize<'de> for $ChaChaXRng { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { - $abst::$Rng::deserialize(d).map(|x| Self::from(&x)) + $abst::$ChaChaXRng::deserialize(d).map(|x| Self::from(&x)) } } - impl From<$Core> for $Rng { - fn from(core: $Core) -> Self { - $Rng { + impl From<$ChaChaXCore> for $ChaChaXRng { + fn from(core: $ChaChaXCore) -> Self { + $ChaChaXRng { core: BlockRng::new(core), } } @@ -504,16 +574,16 @@ macro_rules! impl_shared_traits { // the API. #[derive(Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] - pub(crate) struct $Rng { + pub(crate) struct $ChaChaXRng { seed: crate::rng::Seed, - stream: u128, - word_pos: u64, + stream: u64, + word_pos: u128, } - impl From<&super::$Rng> for $Rng { + impl From<&super::$ChaChaXRng> for $ChaChaXRng { // Forget all information about the input except what is necessary to determine the // outputs of any sequence of pub API calls. - fn from(r: &super::$Rng) -> Self { + fn from(r: &super::$ChaChaXRng) -> Self { Self { seed: r.get_seed().into(), stream: r.get_stream(), @@ -522,9 +592,9 @@ macro_rules! impl_shared_traits { } } - impl From<&$Rng> for super::$Rng { + impl From<&$ChaChaXRng> for super::$ChaChaXRng { // Construct one of the possible concrete RNGs realizing an abstract state. - fn from(a: &$Rng) -> Self { + fn from(a: &$ChaChaXRng) -> Self { use rand_core::SeedableRng; let mut r = Self::from_seed(a.seed.0.into()); r.set_stream(a.stream); @@ -533,203 +603,6 @@ macro_rules! impl_shared_traits { } } } - }; -} - -macro_rules! impl_chacha_rng { - ($ChaChaXRng:ident, $ChaChaXCore:ident, $ChaChaXLegacyRng:ident, $ChaChaXLegacyCore:ident, $rounds:ident, $abst:ident, $abst_legacy:ident) => { - /// A cryptographically secure random number generator that uses the ChaCha algorithm. - /// - /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is - /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream - /// ciphers suitable for widespread adoption" by eSTREAM[^2]. - /// - /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing - /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD - /// implementation to support high throughput on a variety of common hardware platforms. - /// - /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm - /// should run. The number of rounds is a tradeoff between performance and security, where 8 - /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a - /// conservative choice. - /// - /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] - /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte - /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows - /// 296 unique streams of output per seed. Both counter and stream are initialized - /// to zero but may be set via the `set_word_pos` and `set_stream` methods. - /// - /// The word layout is: - /// - /// ```text - /// constant constant constant constant - /// seed seed seed seed - /// seed seed seed seed - /// counter stream_id stream_id stream_id - /// ``` - /// This implementation uses an output buffer of sixteen `u32` words, and uses - /// [`BlockRng`] to implement the [`RngCore`] methods. - /// - /// # Example for `ChaCha20Rng` - /// - /// ```rust - /// use chacha20::ChaCha20Rng; - /// // use rand_core traits - /// use rand_core::{SeedableRng, RngCore}; - /// - /// // the following inputs are examples and are neither - /// // recommended nor suggested values - /// - /// let seed = [42u8; 32]; - /// let mut rng = ChaCha20Rng::from_seed(seed); - /// rng.set_stream(100); - /// - /// // you can also use a [u8; 12] in `.set_stream()` - /// rng.set_stream([3u8; 12]); - /// // or a [u32; 3] - /// rng.set_stream([4u32; 3]); - /// - /// - /// rng.set_word_pos(5); - /// - /// // you can also use a [u8; 5] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 5]); - /// - /// let x = rng.next_u32(); - /// let mut array = [0u8; 32]; - /// rng.fill_bytes(&mut array); - /// - /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" - /// // feature is enabled in Cargo.toml, and then it will zeroize on - /// // drop automatically - /// # #[cfg(feature = "zeroize")] - /// use zeroize::Zeroize; - /// ``` - /// - /// The other Rngs from this crate are initialized similarly. - /// - /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( - /// https://cr.yp.to/chacha.html) - /// - /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( - /// http://www.ecrypt.eu.org/stream/) - /// - /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( - /// https://www.rfc-editor.org/rfc/rfc8439) - #[derive(Clone)] - pub struct $ChaChaXRng { - /// The ChaChaCore struct - pub core: BlockRng<$ChaChaXCore>, - } - - /// A cryptographically secure random number generator that uses the ChaCha algorithm. - /// - /// ChaCha is a stream cipher designed by Daniel J. Bernstein[^1], that we use as an RNG. It is - /// an improved variant of the Salsa20 cipher family, which was selected as one of the "stream - /// ciphers suitable for widespread adoption" by eSTREAM[^2]. - /// - /// ChaCha uses add-rotate-xor (ARX) operations as its basis. These are safe against timing - /// attacks, although that is mostly a concern for ciphers and not for RNGs. We provide a SIMD - /// implementation to support high throughput on a variety of common hardware platforms. - /// - /// With the ChaCha algorithm it is possible to choose the number of rounds the core algorithm - /// should run. The number of rounds is a tradeoff between performance and security, where 8 - /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a - /// conservative choice. - /// - /// TODO: Fix this paragraph, some adjustments have been made already - /// We use a 64-bit counter and 64-bit stream identifier as in the IETF implementation[^3] - /// except that we use a stream identifier in place of a nonce. A 64-bit counter over 64-byte - /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows - /// 296 unique streams of output per seed. Both counter and stream are initialized - /// to zero but may be set via the `set_word_pos` and `set_stream` methods. - /// - /// The word layout is: - /// - /// ```text - /// constant constant constant constant - /// seed seed seed seed - /// seed seed seed seed - /// counter counter stream_id stream_id - /// ``` - /// This implementation uses an output buffer of sixteen `u32` words, and uses - /// [`BlockRng`] to implement the [`RngCore`] methods. - /// - /// # Example for `ChaCha20Rng` - /// - /// ```rust - /// use chacha20::ChaCha20Rng; - /// // use rand_core traits - /// use rand_core::{SeedableRng, RngCore}; - /// - /// // the following inputs are examples and are neither - /// // recommended nor suggested values - /// - /// let seed = [42u8; 32]; - /// let mut rng = ChaCha20Rng::from_seed(seed); - /// rng.set_stream(100); - /// - /// // you can also use a [u8; 12] in `.set_stream()` - /// rng.set_stream([3u8; 12]); - /// // or a [u32; 3] - /// rng.set_stream([4u32; 3]); - /// - /// - /// rng.set_word_pos(5); - /// - /// // you can also use a [u8; 5] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 5]); - /// - /// let x = rng.next_u32(); - /// let mut array = [0u8; 32]; - /// rng.fill_bytes(&mut array); - /// - /// // If you need to zeroize the RNG's buffer, ensure that "zeroize" - /// // feature is enabled in Cargo.toml, and then it will zeroize on - /// // drop automatically - /// # #[cfg(feature = "zeroize")] - /// use zeroize::Zeroize; - /// ``` - /// - /// The other Rngs from this crate are initialized similarly. - /// - /// [^1]: D. J. Bernstein, [*ChaCha, a variant of Salsa20*]( - /// https://cr.yp.to/chacha.html) - /// - /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( - /// http://www.ecrypt.eu.org/stream/) - /// - /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( - /// https://www.rfc-editor.org/rfc/rfc8439) - #[derive(Clone)] - pub struct $ChaChaXLegacyRng { - pub core: BlockRng<$ChaChaXLegacyCore>, - } - - /// The ChaCha core random number generator - #[derive(Clone)] - pub struct $ChaChaXCore(ChaChaCore<$rounds, Ietf>); - - /// The ChaCha core random number generator - #[derive(Clone)] - pub struct $ChaChaXLegacyCore(ChaChaCore<$rounds, Ietf>); - - impl_shared_traits!($ChaChaXRng, $ChaChaXCore, $rounds, $abst); - impl_shared_traits!($ChaChaXLegacyRng, $ChaChaXLegacyCore, $rounds, $abst_legacy); - - impl BlockRngCore for $ChaChaXLegacyCore { - type Item = u32; - type Results = BlockRngResults; - - #[inline] - fn generate(&mut self, r: &mut Self::Results) { - self.0.generate_64_bit_counter(&mut r.0); - #[cfg(target_endian = "big")] - for word in r.0.iter_mut() { - *word = word.to_le(); - } - } - } impl BlockRngCore for $ChaChaXCore { type Item = u32; @@ -747,35 +620,11 @@ macro_rules! impl_chacha_rng { }; } -impl_chacha_rng!( - ChaCha8Rng, - ChaCha8Core, - ChaCha8LegacyRng, - ChaCha8LegacyCore, - R8, - abst8, - abst8legacy -); - -impl_chacha_rng!( - ChaCha12Rng, - ChaCha12Core, - ChaCha12LegacyRng, - ChaCha12LegacyCore, - R12, - abst12, - abst12legacy -); - -impl_chacha_rng!( - ChaCha20Rng, - ChaCha20Core, - ChaCha20LegacyRng, - ChaCha20LegacyCore, - R20, - abst20, - abst20legacy -); +impl_chacha_rng!(ChaCha8Rng, ChaCha8Core, R8, abst8); + +impl_chacha_rng!(ChaCha12Rng, ChaCha12Core, R12, abst12); + +impl_chacha_rng!(ChaCha20Rng, ChaCha20Core, R20, abst20); #[cfg(test)] pub(crate) mod tests { @@ -810,13 +659,14 @@ pub(crate) mod tests { let mut rng = ChaCha20Rng::from_seed(KEY); rng.set_stream(1337); // test counter wrapping-add - rng.set_word_pos((2u64).pow(36) - 1); - let mut output = [3u8; 128]; + rng.set_word_pos((1 << 68) - 65); + let mut output = [3u8; 1280]; rng.fill_bytes(&mut output); - assert_ne!(output, [0u8; 128]); + assert_ne!(output, [0u8; 1280]); - assert!(rng.get_word_pos() < 2000 && rng.get_word_pos() != 0); + assert!(rng.get_word_pos() < 2000); + assert!(rng.get_word_pos() != 0); } #[test] @@ -825,11 +675,11 @@ pub(crate) mod tests { let mut rng = ChaCha20Rng::from_seed(seed); // test set_stream with [u32; 3] - rng.set_stream([313453u32, 0u32, 0u32]); + rng.set_stream([313453u32, 0u32]); assert_eq!(rng.get_stream(), 313453); // test set_stream with [u8; 12] - rng.set_stream([89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + rng.set_stream([89, 0, 0, 0, 0, 0, 0, 0]); assert_eq!(rng.get_stream(), 89); // test set_stream with u128 @@ -842,16 +692,17 @@ pub(crate) mod tests { // test word_pos = 16 * block_pos assert_eq!(rng.get_word_pos(), 58392 * 16); - // test set_block_pos with [u8; 4] - rng.set_block_pos([77, 0, 0, 0]); + // test set_block_pos with [u8; 8] + rng.set_block_pos([77, 0, 0, 0, 0, 0, 0, 0]); assert_eq!(rng.get_block_pos(), 77); // test set_word_pos with u64 rng.set_word_pos(8888); assert_eq!(rng.get_word_pos(), 8888); - // test set_word_pos with [u8; 5] - rng.set_word_pos([55, 0, 0, 0, 0]) + // test set_word_pos with [u8; 9] + rng.set_word_pos([55, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(rng.get_word_pos(), 55 * 16); } #[cfg(feature = "serde1")] @@ -1021,9 +872,9 @@ pub(crate) mod tests { assert_eq!(results, expected); assert_eq!(rng3.get_word_pos(), expected_end); - // Test block 2 by using `set_block_pos` and [u8; 4] + // Test block 2 by using `set_block_pos` and [u8; 8] let mut rng4 = ChaChaRng::from_seed(seed); - rng4.set_block_pos([2, 0, 0, 0]); + rng4.set_block_pos([2, 0, 0, 0, 0, 0, 0, 0]); results = [0u32; 16]; for i in results.iter_mut() { *i = rng4.next_u32(); @@ -1092,8 +943,9 @@ pub(crate) mod tests { let seed = hex!("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"); let mut rng = ChaChaRng::from_seed(seed); - let stream_id = hex!("000000090000004a00000000"); + let stream_id = hex!("0000004a00000000"); rng.set_stream(stream_id); + rng.set_block_pos(hex!("0000000000000009")); // The test vectors omit the first 64-bytes of the keystream let mut discard_first_64 = [0u8; 64]; @@ -1146,7 +998,7 @@ pub(crate) mod tests { use super::{BLOCK_WORDS, BUF_BLOCKS}; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter to 0 - let last_block = (2u64).pow(36) - u64::from(BUF_BLOCKS * BLOCK_WORDS); + let last_block = (2u128).pow(64) - u128::from(BUF_BLOCKS * BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -1156,7 +1008,7 @@ pub(crate) mod tests { use super::BLOCK_WORDS; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter past 0 - let last_block = (1 << 36) - u64::from(BLOCK_WORDS); + let last_block = (1 << 68) - u128::from(BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -1228,7 +1080,7 @@ pub(crate) mod tests { "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", debug_start_word_pos, index, - debug_start_word_pos + (index / 4) as u64 + debug_start_word_pos + (index / 4) as u128 ); } } @@ -1262,7 +1114,7 @@ pub(crate) mod tests { "Failed test at start_word_pos = {},\nfailed index: {:?}\nFailing word_pos = {}", debug_start_word_pos, index, - debug_start_word_pos + (index / 4) as u64 + debug_start_word_pos + (index / 4) as u128 ); } } @@ -1288,14 +1140,14 @@ pub(crate) mod tests { #[test] fn stream_id_endianness() { let mut rng = ChaCha20Rng::from_seed([0u8; 32]); - rng.set_stream([3, 3333, 333333]); - let expected = 2059058063; + rng.set_stream([3, 3333]); + let expected = 1152671828; assert_eq!(rng.next_u32(), expected); rng.set_stream(1234567); - let expected = 1254506509; + let expected = 3110319182; assert_eq!(rng.next_u32(), expected); - rng.set_stream([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]); - let expected = 1391671567; + rng.set_stream([1, 2, 3, 4, 5, 6, 7, 8]); + let expected = 3790367479; assert_eq!(rng.next_u32(), expected); } @@ -1303,7 +1155,7 @@ pub(crate) mod tests { /// performing 64-bit addition. #[test] #[ignore = "Counter is now a 64 bit counter"] - fn counter_wrapping() { + fn counter_wrapping_32_bit_counter() { let mut rng = ChaChaRng::from_seed([0u8; 32]); // get first four blocks and word pos @@ -1312,7 +1164,7 @@ pub(crate) mod tests { let word_pos = rng.get_word_pos(); // get first four blocks after wrapping - rng.set_block_pos(u32::MAX); + rng.set_block_pos(u64::MAX); let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); assert_eq!(word_pos, rng.get_word_pos()); From f2a24c64ccbe23cc51dc1f0d0e90320617ca6599 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 17:19:13 -0500 Subject: [PATCH 14/64] fixed test_chacha_word_pos_wrap_excess; not the cleanest fix --- chacha20/src/rng.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 1541ef38..d4a616d0 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1008,7 +1008,7 @@ pub(crate) mod tests { use super::BLOCK_WORDS; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter past 0 - let last_block = (1 << 68) - u128::from(BLOCK_WORDS); + let last_block = (1 << 68) - u128::from(BLOCK_WORDS) * 8; rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } From 19dc6656c1d09e86a7df8a6bdb5fc2f190083b28 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 17:20:46 -0500 Subject: [PATCH 15/64] fixed test_chacha_word_pos_wrap_exact; not the cleanest fix again --- chacha20/src/rng.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index d4a616d0..0d2fb724 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -998,7 +998,7 @@ pub(crate) mod tests { use super::{BLOCK_WORDS, BUF_BLOCKS}; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter to 0 - let last_block = (2u128).pow(64) - u128::from(BUF_BLOCKS * BLOCK_WORDS); + let last_block = (2u128).pow(64) - u128::from(BUF_BLOCKS * BLOCK_WORDS * 2); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } From 83d7063b3d9bfee0a3dde0c069ba2dae3abcff1f Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 15 Aug 2025 18:31:30 -0500 Subject: [PATCH 16/64] All tests pass on avx2, even the doctests --- chacha20/src/rng.rs | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 0d2fb724..edd85b67 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -317,16 +317,16 @@ macro_rules! impl_chacha_rng { /// let mut rng = ChaCha20Rng::from_seed(seed); /// rng.set_stream(100); /// - /// // you can also use a [u8; 12] in `.set_stream()` - /// rng.set_stream([3u8; 12]); - /// // or a [u32; 3] - /// rng.set_stream([4u32; 3]); + /// // you can also use a [u8; 8] in `.set_stream()` + /// rng.set_stream([3u8; 8]); + /// // or a [u32; 2] + /// rng.set_stream([4u32; 2]); /// /// /// rng.set_word_pos(5); /// - /// // you can also use a [u8; 5] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 5]); + /// // you can also use a [u8; 9] in `.set_word_pos()` + /// rng.set_word_pos([2u8; 9]); /// /// let x = rng.next_u32(); /// let mut array = [0u8; 32]; @@ -422,12 +422,13 @@ macro_rules! impl_chacha_rng { /// byte-offset. #[inline] pub fn get_word_pos(&self) -> u128 { - let mut result = - u128::from(self.core.core.0.state[12].wrapping_sub(BUF_BLOCKS.into())) << 4; - result += u128::from(self.core.core.0.state[13]) << (32 + 4); - result += self.core.index() as u128; + let mut block_counter = (u64::from(self.core.core.0.state[13]) << 32) + | u64::from(self.core.core.0.state[12]); + block_counter = block_counter.wrapping_sub(BUF_BLOCKS as u64); + let word_pos = + block_counter as u128 * BLOCK_WORDS as u128 + self.core.index() as u128; // eliminate bits above the 68th bit - result & ((1 << 68) - 1) + word_pos & ((1 << 68) - 1) } /// Set the offset from the start of the stream, in 32-bit words. This method @@ -1170,4 +1171,14 @@ pub(crate) mod tests { assert_eq!(word_pos, rng.get_word_pos()); assert_eq!(&first_blocks[0..64 * 4], &result[64..]); } + + #[test] + fn initial_state() { + let seed = [ + 1, 0, 52, 0, 0, 0, 0, 0, 1, 0, 10, 0, 22, 32, 0, 0, 2, 0, 55, 49, 0, 11, 0, 0, 3, 0, 0, + 0, 0, 0, 2, 92, + ]; + let rng = ChaCha20Rng::from_seed(seed); + assert_eq!(rng.get_word_pos(), 0); + } } From 23a335dea5837b4ff7f717fcd447fc61a5046a3f Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 09:52:33 -0500 Subject: [PATCH 17/64] updated paragraph that was previously marked 'TODO' --- chacha20/src/rng.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index edd85b67..4c5a1cd0 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -84,8 +84,8 @@ impl Debug for Seed { /// A wrapper for set_word_pos() input. /// /// Can be constructed from any of the following: -/// * `[u8; 9]` /// * `u128` +/// * `[u8; 9]` pub struct WordPosInput { block_pos: [u32; 2], index: usize, @@ -286,10 +286,10 @@ macro_rules! impl_chacha_rng { /// rounds is the minimum potentially secure configuration, and 20 rounds is widely used as a /// conservative choice. /// - /// We use a 32-bit counter and 96-bit stream identifier as in the IETF implementation[^3] - /// except that we use a stream identifier in place of a nonce. A 32-bit counter over 64-byte - /// (16 word) blocks allows 256 GiB of output before cycling, and the stream identifier allows - /// 296 unique streams of output per seed. Both counter and stream are initialized + /// We use a 64-bit counter and 64-bit stream identifier as in Bernstein's implementation[^3] + /// except that we use a stream identifier in place of a nonce. A 64-bit counter over 64-byte + /// (16 word) blocks allows 1 ZiB of output before cycling, and the stream identifier allows + /// 264 unique streams of output per seed. Both counter and stream are initialized /// to zero but may be set via the `set_word_pos` and `set_stream` methods. /// /// The word layout is: @@ -298,7 +298,7 @@ macro_rules! impl_chacha_rng { /// constant constant constant constant /// seed seed seed seed /// seed seed seed seed - /// counter stream_id stream_id stream_id + /// counter counter stream_id stream_id /// ``` /// This implementation uses an output buffer of sixteen `u32` words, and uses /// [`BlockRng`] to implement the [`RngCore`] methods. @@ -346,9 +346,6 @@ macro_rules! impl_chacha_rng { /// /// [^2]: [eSTREAM: the ECRYPT Stream Cipher Project]( /// http://www.ecrypt.eu.org/stream/) - /// - /// [^3]: Internet Research Task Force, [*ChaCha20 and Poly1305 for IETF Protocols*]( - /// https://www.rfc-editor.org/rfc/rfc8439) #[derive(Clone)] pub struct $ChaChaXRng { /// The ChaChaCore struct @@ -433,15 +430,16 @@ macro_rules! impl_chacha_rng { /// Set the offset from the start of the stream, in 32-bit words. This method /// takes any of the following: - /// * `[u8; 9]` /// * `u128` + /// * `[u8; 9]` /// /// As with `get_word_pos`, we use a 36-bit number. When given a `u64`, we use /// the least significant 4 bits as the RNG's index, and the 32 bits before it /// as the block position. /// /// When given a `[u8; 9]`, the word_pos is set similarly, but it is more - /// arbitrary. + /// arbitrary since the index is set using the lowest 4 bits of the last + /// byte. #[inline] pub fn set_word_pos>(&mut self, word_offset: W) { let word_pos: WordPosInput = word_offset.into(); @@ -456,7 +454,8 @@ macro_rules! impl_chacha_rng { /// The word pos will be equal to `block_pos * 16 words per block`. /// /// This method takes any of the following: - /// * `[u8; 4]` + /// * `u64` + /// * `[u8; 8]` /// * `[u32; 2]` #[inline] #[allow(unused)] @@ -476,9 +475,9 @@ macro_rules! impl_chacha_rng { /// Set the stream number. The lower 96 bits are used and the rest are /// discarded. This method takes any of the following: - /// * `[u32; 2]` - /// * `[u8; 8]` /// * `u64` + /// * `[u8; 8]` + /// * `[u32; 2]` /// /// This is initialized to zero; 296 unique streams of output /// are available per seed/key. From 9505bec54859f031f179bf9bd33cf13cea3a4e84 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 10:51:25 -0500 Subject: [PATCH 18/64] fixed set/get_block_pos; also added a cipher test to see if I could break it; I did not break it --- chacha20/src/lib.rs | 14 ++++------- chacha20/src/variants.rs | 52 ++++++++++++++++++++++++++++++++++++++++ chacha20/tests/kats.rs | 35 ++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 10 deletions(-) diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 15abd606..a4b444b9 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -265,16 +265,17 @@ impl ChaChaCore { #[cfg(feature = "cipher")] impl StreamCipherSeekCore for ChaChaCore { - type Counter = u32; + type Counter = V::Counter; #[inline(always)] fn get_block_pos(&self) -> Self::Counter { - self.state[12] + V::get_block_pos(&self.state[12..V::NONCE_INDEX]) } #[inline(always)] fn set_block_pos(&mut self, pos: Self::Counter) { - self.state[12] = pos + let block_pos_words = V::set_block_pos_helper(pos); + self.state[12..V::NONCE_INDEX].copy_from_slice(block_pos_words.as_ref()); } } @@ -282,12 +283,7 @@ impl StreamCipherSeekCore for ChaChaCore { impl StreamCipherCore for ChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { - let rem = V::COUNTER_MAX - self.get_block_pos() as u64; - if rem > usize::MAX as u64 { - None - } else { - rem.try_into().ok() - } + V::remaining_blocks(self.get_block_pos()) } fn process_with_backend( diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index c6f484a3..400d75aa 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -7,6 +7,23 @@ pub trait Variant: Clone { /// the size of the Nonce in u32s const NONCE_INDEX: usize; const COUNTER_MAX: u64; + #[cfg(feature = "cipher")] + type Counter: cipher::StreamCipherCounter; + #[cfg(not(feature = "cipher"))] + type Counter; + + type CounterWords: AsRef<[u32]>; + + /// Takes a slice of state[12..NONCE_INDEX] to convert it into + /// Self::Counter. + fn get_block_pos(counter_row: &[u32]) -> Self::Counter; + + /// Breaks down the Self::Counter type into a u32 array for setting the + /// block pos. + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords; + + /// A helper method for calculating the remaining blocks using these types + fn remaining_blocks(block_pos: Self::Counter) -> Option; } #[derive(Clone)] @@ -15,6 +32,21 @@ pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; const COUNTER_MAX: u64 = u32::MAX as u64; + type Counter = u32; + + type CounterWords = [u32; 1]; + #[inline(always)] + fn get_block_pos(counter_row: &[u32]) -> Self::Counter { + counter_row[0] + } + #[inline(always)] + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { + [value] + } + #[inline(always)] + fn remaining_blocks(block_pos: Self::Counter) -> Option { + (u32::MAX - block_pos).try_into().ok() + } } #[derive(Clone)] @@ -25,4 +57,24 @@ pub struct Legacy(); impl Variant for Legacy { const NONCE_INDEX: usize = 14; const COUNTER_MAX: u64 = u64::MAX; + type Counter = u64; + + type CounterWords = [u32; 2]; + #[inline(always)] + fn get_block_pos(counter_row: &[u32]) -> Self::Counter { + counter_row[0] as u64 | ((counter_row[1] as u64) << 32) + } + #[inline(always)] + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { + [value as u32, (value >> 32) as u32] + } + #[inline(always)] + fn remaining_blocks(block_pos: Self::Counter) -> Option { + let remaining = u64::MAX - block_pos; + #[cfg(target_pointer_width = "32")] + if remaining > usize::MAX as u64 { + return None; + } + remaining.try_into().ok() + } } diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 4e4aa33c..605a9824 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -21,7 +21,7 @@ cipher::stream_cipher_seek_test!(chacha20legacy_seek, ChaCha20Legacy); #[cfg(feature = "cipher")] mod chacha20test { use chacha20::{ChaCha20, KeyIvInit}; - use cipher::StreamCipher; + use cipher::{StreamCipher, StreamCipherSeek}; use hex_literal::hex; // @@ -94,6 +94,39 @@ mod chacha20test { cipher.apply_keystream(&mut buf); assert_eq!(&buf[..], &CIPHERTEXT[..]); } + + #[test] + fn chacha20_potential_counter_issue_1() { + use std::panic; + + let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); + let mut first_block = [0u8; 64]; + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block); + + let mut buf_1 = [0u8; 64]; + let mut buf_2 = [0u8; 65]; + + // seek to end of keystream + let max_bytes = (u64::from(u32::MAX) + 1) * 64; + let pos = max_bytes - 128; + cipher.try_seek(pos).unwrap(); + assert_eq!(cipher.current_pos::(), pos); + + // overshoot keystream length + let applied_keystream = cipher.try_apply_keystream(&mut buf_2); + assert_eq!(applied_keystream.is_err(), true); + + // exhaust keystream + cipher.apply_keystream(&mut buf_1); + + // seek to beginning and check if the first block is the same as before + let mut first_block_observation_2 = [0u8; 64]; + cipher.seek(0); + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block_observation_2); + assert_eq!(first_block_observation_2, first_block); + } } #[rustfmt::skip] From 7481ed11dfd4a15e01860cacaee6fa81dd65af0e Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 10:52:51 -0500 Subject: [PATCH 19/64] fix unused import --- chacha20/tests/kats.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 605a9824..153da3aa 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -97,8 +97,6 @@ mod chacha20test { #[test] fn chacha20_potential_counter_issue_1() { - use std::panic; - let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); let mut first_block = [0u8; 64]; assert_eq!(cipher.current_pos::(), 0); From 4d0e0d03a769962f82bcfea2b34a33673695f3f3 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 11:05:17 -0500 Subject: [PATCH 20/64] borrowed tests from rand_chacha to replace the ones we had that I had 'unclean fixes' for; added second nonce test --- chacha20/src/rng.rs | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 4c5a1cd0..a73bbf6d 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -751,7 +751,7 @@ pub(crate) mod tests { #[cfg(feature = "serde1")] #[test] fn test_chacha_serde_format_stability() { - let j = r#"{"seed":[4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8],"stream":27182818284,"word_pos":3141592653}"#; + let j = r#"{"seed":[4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8,15,16,23,42,4,8],"stream":27182818284,"word_pos":314159265359}"#; let r: ChaChaRng = serde_json::from_str(j).unwrap(); let j1 = serde_json::to_string(&r).unwrap(); assert_eq!(j, j1); @@ -964,6 +964,29 @@ pub(crate) mod tests { assert_eq!(results, expected); } + #[test] + fn test_chacha_nonce_2() { + // Test vector 5 from + // https://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-04 + // Although we do not support setting a nonce, we try it here anyway so + // we can use this test vector. + let seed = [0u8; 32]; + let mut rng = ChaChaRng::from_seed(seed); + // 96-bit nonce in LE order is: 0,0,0,0, 0,0,0,0, 0,0,0,2 + rng.set_stream(2u64 << (24 + 32)); + + let mut results = [0u32; 16]; + for i in results.iter_mut() { + *i = rng.next_u32(); + } + let expected = [ + 0x374dc6c2, 0x3736d58c, 0xb904e24a, 0xcd3f93ef, 0x88228b1a, 0x96a4dfb3, 0x5b76ab72, + 0xc727ee54, 0x0e0e978a, 0xf3145c95, 0x1b748ea8, 0xf786c297, 0x99c28f5f, 0x628314e8, + 0x398a19fa, 0x6ded1b53, + ]; + assert_eq!(results, expected); + } + #[test] fn test_chacha_clone_streams() { let seed = [ @@ -998,7 +1021,7 @@ pub(crate) mod tests { use super::{BLOCK_WORDS, BUF_BLOCKS}; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter to 0 - let last_block = (2u128).pow(64) - u128::from(BUF_BLOCKS * BLOCK_WORDS * 2); + let last_block = (1 << 68) - u128::from(BUF_BLOCKS * BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -1008,7 +1031,7 @@ pub(crate) mod tests { use super::BLOCK_WORDS; let mut rng = ChaChaRng::from_seed(Default::default()); // refilling the buffer in set_word_pos will wrap the block counter past 0 - let last_block = (1 << 68) - u128::from(BLOCK_WORDS) * 8; + let last_block = (1 << 68) - u128::from(BLOCK_WORDS); rng.set_word_pos(last_block); assert_eq!(rng.get_word_pos(), last_block); } @@ -1170,14 +1193,4 @@ pub(crate) mod tests { assert_eq!(word_pos, rng.get_word_pos()); assert_eq!(&first_blocks[0..64 * 4], &result[64..]); } - - #[test] - fn initial_state() { - let seed = [ - 1, 0, 52, 0, 0, 0, 0, 0, 1, 0, 10, 0, 22, 32, 0, 0, 2, 0, 55, 49, 0, 11, 0, 0, 3, 0, 0, - 0, 0, 0, 2, 92, - ]; - let rng = ChaCha20Rng::from_seed(seed); - assert_eq!(rng.get_word_pos(), 0); - } } From e2b8e414f95bda30c0587175c9ea14949fbdc061 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 11:07:48 -0500 Subject: [PATCH 21/64] unignored tests --- chacha20/src/rng.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index a73bbf6d..ff1d117c 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1037,7 +1037,6 @@ pub(crate) mod tests { } #[test] - #[ignore = "64 bit counter"] fn test_chacha_word_pos_zero() { let mut rng = ChaChaRng::from_seed(Default::default()); assert_eq!(rng.core.core.0.state[12], 0); @@ -1175,10 +1174,9 @@ pub(crate) mod tests { } /// If this test fails, the backend may be - /// performing 64-bit addition. + /// performing 32-bit addition. #[test] - #[ignore = "Counter is now a 64 bit counter"] - fn counter_wrapping_32_bit_counter() { + fn counter_nonwrapping_32_bit_counter() { let mut rng = ChaChaRng::from_seed([0u8; 32]); // get first four blocks and word pos @@ -1191,6 +1189,6 @@ pub(crate) mod tests { let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); assert_eq!(word_pos, rng.get_word_pos()); - assert_eq!(&first_blocks[0..64 * 4], &result[64..]); + assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } } From 77354d0b5fdc7de6925cd3aa4e2454887aa7dfd4 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 14:50:18 -0500 Subject: [PATCH 22/64] fixed counter wrapping tests --- chacha20/src/rng.rs | 26 +++++++++++++++++++++++--- chacha20/tests/kats.rs | 32 +++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index ff1d117c..461c7407 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1173,10 +1173,10 @@ pub(crate) mod tests { assert_eq!(rng.next_u32(), expected); } - /// If this test fails, the backend may be - /// performing 32-bit addition. + /// If this test fails, the backend may not be + /// performing 64-bit addition. #[test] - fn counter_nonwrapping_32_bit_counter() { + fn counter_wrapping_64_bit_counter() { let mut rng = ChaChaRng::from_seed([0u8; 32]); // get first four blocks and word pos @@ -1189,6 +1189,26 @@ pub(crate) mod tests { let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); assert_eq!(word_pos, rng.get_word_pos()); + assert_eq!(&first_blocks[0..64 * 4], &result[64..]); + } + + /// If this test fails, the backend may be doing + /// 32-bit addition. + #[test] + fn counter_not_wrapping_at_32_bits() { + let mut rng = ChaChaRng::from_seed([0u8; 32]); + + // get first four blocks and word pos + let mut first_blocks = [0u8; 64 * 4]; + rng.fill_bytes(&mut first_blocks); + let first_blocks_end_word_pos = rng.get_word_pos(); + + // get first four blocks after the supposed overflow + rng.set_block_pos(u32::MAX as u64); + let mut result = [0u8; 64 * 5]; + rng.fill_bytes(&mut result); + assert_ne!(first_blocks_end_word_pos, rng.get_word_pos()); + assert_eq!(rng.get_word_pos(), first_blocks_end_word_pos + (1 << 32) * BLOCK_WORDS as u128); assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } } diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 153da3aa..f2c4c150 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -96,7 +96,7 @@ mod chacha20test { } #[test] - fn chacha20_potential_counter_issue_1() { + fn chacha20_potential_counter_issue_v1() { let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); let mut first_block = [0u8; 64]; assert_eq!(cipher.current_pos::(), 0); @@ -125,6 +125,36 @@ mod chacha20test { cipher.apply_keystream(&mut first_block_observation_2); assert_eq!(first_block_observation_2, first_block); } + + #[test] + fn chacha20_potential_counter_issue_v2() { + let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); + let mut first_block = [0u8; 64]; + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block); + + let mut buf_1 = [0u8; 256]; + let mut buf_2 = [0u8; 257]; + + // seek to end of keystream + let pos = (1 << 32) * 64 - 256 - 64; + cipher.try_seek(pos).unwrap(); + assert_eq!(cipher.current_pos::(), pos); + + // overshoot keystream length + let applied_keystream = cipher.try_apply_keystream(&mut buf_2); + assert_eq!(applied_keystream.is_err(), true); + + // exhaust keystream + cipher.apply_keystream(&mut buf_1); + + // seek to beginning and check if the first block is the same as before + let mut first_block_observation_2 = [0u8; 64]; + cipher.seek(0); + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block_observation_2); + assert_eq!(first_block_observation_2, first_block); + } } #[rustfmt::skip] From c13cd532c6ad3587c1fdcb0a92859c9274f0b7d6 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 14:50:34 -0500 Subject: [PATCH 23/64] fmt --- chacha20/src/rng.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 461c7407..4350a8c6 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1192,7 +1192,7 @@ pub(crate) mod tests { assert_eq!(&first_blocks[0..64 * 4], &result[64..]); } - /// If this test fails, the backend may be doing + /// If this test fails, the backend may be doing /// 32-bit addition. #[test] fn counter_not_wrapping_at_32_bits() { @@ -1208,7 +1208,10 @@ pub(crate) mod tests { let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); assert_ne!(first_blocks_end_word_pos, rng.get_word_pos()); - assert_eq!(rng.get_word_pos(), first_blocks_end_word_pos + (1 << 32) * BLOCK_WORDS as u128); + assert_eq!( + rng.get_word_pos(), + first_blocks_end_word_pos + (1 << 32) * BLOCK_WORDS as u128 + ); assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } } From 64662db7969416995342ceed0d5f5c92675c7de5 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 17:34:00 -0500 Subject: [PATCH 24/64] avx2 passes tests now, but sse2 and neon don't --- chacha20/src/backends/avx2.rs | 4 ++-- chacha20/src/rng.rs | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 4b74aba9..6b0a4d3f 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -72,11 +72,11 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); } let mut backend = Backend:: { v, diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 4350a8c6..9fb86728 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -172,7 +172,8 @@ pub struct BlockPos([u32; 2]); impl From for BlockPos { #[inline] - fn from(value: u64) -> Self { + fn from(mut value: u64) -> Self { + value = value.to_le(); Self([value as u32, (value >> 32) as u32]) } } @@ -186,7 +187,8 @@ impl From<[u8; 8]> for BlockPos { impl From<[u32; 2]> for BlockPos { #[inline] - fn from(value: [u32; 2]) -> Self { + fn from(mut value: [u32; 2]) -> Self { + value.iter_mut().for_each(|x| *x = x.to_le()); Self(value) } } @@ -1182,13 +1184,23 @@ pub(crate) mod tests { // get first four blocks and word pos let mut first_blocks = [0u8; 64 * 4]; rng.fill_bytes(&mut first_blocks); - let word_pos = rng.get_word_pos(); + let first_blocks_end_word_pos = rng.get_word_pos(); + let first_blocks_end_block_counter = rng.get_block_pos(); // get first four blocks after wrapping - rng.set_block_pos(u64::MAX); + rng.set_block_pos([u32::MAX, u32::MAX]); let mut result = [0u8; 64 * 5]; rng.fill_bytes(&mut result); - assert_eq!(word_pos, rng.get_word_pos()); + assert_eq!(first_blocks_end_word_pos, rng.get_word_pos()); + assert_eq!(first_blocks_end_block_counter, rng.get_block_pos() - 3); + + if first_blocks[0..64 * 4].ne(&result[64..]) { + for (i, (a, b)) in first_blocks.iter().zip(result.iter().skip(64)).enumerate() { + if a.ne(b) { + panic!("i = {}\na = {}\nb = {}", i, a, b); + } + } + } assert_eq!(&first_blocks[0..64 * 4], &result[64..]); } From 32f733c396cb86f7101b7ba02b6d500901186495 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 17:55:17 -0500 Subject: [PATCH 25/64] fixed sse2! --- chacha20/src/backends/sse2.rs | 5 ++--- chacha20/src/rng.rs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 321c802c..0886659b 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -146,11 +146,10 @@ unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { } for block in 0..PAR_BLOCKS { - for i in 0..4 { + for i in 0..3 { res[block][i] = _mm_add_epi32(res[block][i], v[i]); } - // add the counter since `v` is lacking updated counter values - res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + res[block][3] = _mm_add_epi32(res[block][3], _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64))); } res diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 9fb86728..0fca1346 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1193,7 +1193,7 @@ pub(crate) mod tests { rng.fill_bytes(&mut result); assert_eq!(first_blocks_end_word_pos, rng.get_word_pos()); assert_eq!(first_blocks_end_block_counter, rng.get_block_pos() - 3); - + if first_blocks[0..64 * 4].ne(&result[64..]) { for (i, (a, b)) in first_blocks.iter().zip(result.iter().skip(64)).enumerate() { if a.ne(b) { From d889eb8a6a1ace29c297671d5271822b15e1311b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 18:11:06 -0500 Subject: [PATCH 26/64] fixed neon! same problem sse2 had --- chacha20/src/backends/neon.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index d15c8c9c..96d4c950 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -235,11 +235,16 @@ impl Backend { let mut dest_ptr = buffer.as_mut_ptr() as *mut u8; for block in 0..4 { // add state to block - for state_row in 0..4 { + for state_row in 0..3 { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add64!(self.state[3], self.ctrs[block - 1]) + ); + } else { + add_assign_vec!(blocks[block][3], self.state[3]); } // write blocks to buffer for state_row in 0..4 { From faa32a53cc6b4e45c3fdb28a7356632666706b78 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 18:12:48 -0500 Subject: [PATCH 27/64] fixed cipher neon code --- chacha20/src/backends/neon.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 96d4c950..3a0bd09c 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -159,7 +159,12 @@ impl StreamCipherBackend for Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); + add_assign_vec!( + blocks[block][3], + add64!(self.state[3], self.ctrs[block - 1]) + ); + } else { + add_assign_vec!(blocks[block][3], self.state[3]); } // write blocks to dest for state_row in 0..4 { From 8dd7db844bc576f13c9baa7433f29d2e0eacf76b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 18:59:55 -0500 Subject: [PATCH 28/64] missed something in neon when copying code changes --- chacha20/src/backends/neon.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 3a0bd09c..f03f6b0d 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -155,7 +155,7 @@ impl StreamCipherBackend for Backend { for block in 0..4 { // add state to block - for state_row in 0..4 { + for state_row in 0..3 { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { From b26db0dd142709112ec22d91c875445511474a10 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 19:50:51 -0500 Subject: [PATCH 29/64] added multiple counter tests using a macro --- chacha20/tests/kats.rs | 67 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index f2c4c150..182d1a95 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -95,6 +95,39 @@ mod chacha20test { assert_eq!(&buf[..], &CIPHERTEXT[..]); } + macro_rules! impl_chacha20_potential_counter_issue { + ($name:ident, $num_blocks:literal) => { + #[test] + fn $name() { + let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); + let mut first_block = [0u8; 64]; + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block); + + let mut buf_1 = [0u8; $num_blocks * 64]; + let mut buf_2 = [0u8; $num_blocks * 64 + 1]; + + // seek to end of keystream + let pos = (1 << 32) * 64 - $num_blocks * 64 - 64; + cipher.try_seek(pos).unwrap(); + assert_eq!(cipher.current_pos::(), pos); + + // overshoot keystream length + let applied_keystream = cipher.try_apply_keystream(&mut buf_2); + assert_eq!(applied_keystream.is_err(), true); + + // exhaust keystream + cipher.apply_keystream(&mut buf_1); + + // seek to beginning and check if the first block is the same as before + let mut first_block_observation_2 = [0u8; 64]; + cipher.seek(0); + assert_eq!(cipher.current_pos::(), 0); + cipher.apply_keystream(&mut first_block_observation_2); + assert_eq!(first_block_observation_2, first_block); + } + }; + } #[test] fn chacha20_potential_counter_issue_v1() { let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); @@ -126,35 +159,13 @@ mod chacha20test { assert_eq!(first_block_observation_2, first_block); } - #[test] - fn chacha20_potential_counter_issue_v2() { - let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); - let mut first_block = [0u8; 64]; - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block); - - let mut buf_1 = [0u8; 256]; - let mut buf_2 = [0u8; 257]; - - // seek to end of keystream - let pos = (1 << 32) * 64 - 256 - 64; - cipher.try_seek(pos).unwrap(); - assert_eq!(cipher.current_pos::(), pos); - - // overshoot keystream length - let applied_keystream = cipher.try_apply_keystream(&mut buf_2); - assert_eq!(applied_keystream.is_err(), true); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v2, 4); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v3, 11); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v4, 10); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); + impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); - // exhaust keystream - cipher.apply_keystream(&mut buf_1); - - // seek to beginning and check if the first block is the same as before - let mut first_block_observation_2 = [0u8; 64]; - cipher.seek(0); - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block_observation_2); - assert_eq!(first_block_observation_2, first_block); - } } #[rustfmt::skip] From 2e4cf103078fb810dbc694b8d0a94eef9f53ab0c Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 16 Aug 2025 19:51:27 -0500 Subject: [PATCH 30/64] fmt --- chacha20/tests/kats.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 182d1a95..266ece00 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -165,7 +165,6 @@ mod chacha20test { impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); - } #[rustfmt::skip] From a6962aa8266013cbe6ea0eedf24dead511398a93 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sun, 17 Aug 2025 10:54:53 -0500 Subject: [PATCH 31/64] updated docs for set_stream() and added another equivalence test with the previous chacha20 rng --- chacha20/Cargo.toml | 1 + chacha20/src/rng.rs | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index cb87d5b3..71c68049 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -31,6 +31,7 @@ zeroize = { version = "1.8.1", optional = true, default-features = false } cpufeatures = "0.2" [dev-dependencies] +chacha20_old = { package = "chacha20", version = "0.10.0-rc.0", features = ["rng"] } cipher = { version = "0.5.0-rc.0", features = ["dev"] } hex-literal = "1" proptest = "1" diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 0fca1346..e2225423 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -482,7 +482,26 @@ macro_rules! impl_chacha_rng { /// * `[u32; 2]` /// /// This is initialized to zero; 296 unique streams of output - /// are available per seed/key. + /// are available per seed/key. In theory a 96-bit nonce can be used by + /// passing the last 64-bits to this function and using the first 32-bits as + /// the most significant half of the 64-bit counter, which may be set + /// directly via `set_block_pos` like so: + /// + /// ``` + /// use chacha20::ChaCha20Rng; + /// use rand_core::{SeedableRng, RngCore}; + /// + /// let seed = [1u8; 32]; + /// let mut rng = ChaCha20Rng::from_seed(seed); + /// + /// // set state[12] to 0, state[13] to 1, state[14] to 2, state[15] to 3 + /// rng.set_block_pos([0u32, 1u32]); + /// rng.set_stream([2u32, 3u32]); + /// + /// // confirm that state is set correctly + /// assert_eq!(rng.get_block_pos(), 1 << 32); + /// assert_eq!(rng.get_stream(), (3 << 32) + 2); + /// ``` #[inline] pub fn set_stream>(&mut self, stream: S) { let stream: StreamId = stream.into(); @@ -1226,4 +1245,17 @@ pub(crate) mod tests { ); assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } + + /// Ensures that old `StreamId` behavior is the same as before when + /// using `set_block_pos`. + #[test] + fn stream_id_equivalence_test() { + use chacha20_old::ChaCha20Rng as OldRng; + let mut old_rng = OldRng::from_seed([0u8; 32]); + let mut rng = ChaCha20Rng::from_seed([0u8; 32]); + old_rng.set_stream([1, 2, 3]); + rng.set_block_pos([0, 1]); + rng.set_stream([2, 3]); + assert_eq!(rng.next_u32(), old_rng.next_u32()); + } } From 56e13050d6e7b21093e0bffbb907bb02ad0ce088 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 10:04:12 -0500 Subject: [PATCH 32/64] rerun PR tests because they failed to execute the tests --- chacha20/src/rng.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index e2225423..edf99ee8 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -491,7 +491,7 @@ macro_rules! impl_chacha_rng { /// use chacha20::ChaCha20Rng; /// use rand_core::{SeedableRng, RngCore}; /// - /// let seed = [1u8; 32]; + /// let seed = [2u8; 32]; /// let mut rng = ChaCha20Rng::from_seed(seed); /// /// // set state[12] to 0, state[13] to 1, state[14] to 2, state[15] to 3 From 289f2bc1a1daef86d86ca3a4bd84c47057503e7c Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 10:11:57 -0500 Subject: [PATCH 33/64] fix failing tests because apparently there can't be 2 chacha20 packages --- chacha20/Cargo.toml | 1 - chacha20/src/rng.rs | 13 ------------- 2 files changed, 14 deletions(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index 71c68049..cb87d5b3 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -31,7 +31,6 @@ zeroize = { version = "1.8.1", optional = true, default-features = false } cpufeatures = "0.2" [dev-dependencies] -chacha20_old = { package = "chacha20", version = "0.10.0-rc.0", features = ["rng"] } cipher = { version = "0.5.0-rc.0", features = ["dev"] } hex-literal = "1" proptest = "1" diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index edf99ee8..8bcf46b0 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1245,17 +1245,4 @@ pub(crate) mod tests { ); assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } - - /// Ensures that old `StreamId` behavior is the same as before when - /// using `set_block_pos`. - #[test] - fn stream_id_equivalence_test() { - use chacha20_old::ChaCha20Rng as OldRng; - let mut old_rng = OldRng::from_seed([0u8; 32]); - let mut rng = ChaCha20Rng::from_seed([0u8; 32]); - old_rng.set_stream([1, 2, 3]); - rng.set_block_pos([0, 1]); - rng.set_stream([2, 3]); - assert_eq!(rng.next_u32(), old_rng.next_u32()); - } } From ddeba2487205bb87fd025bd0924a2f8e7fd89b05 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 19:41:31 -0500 Subject: [PATCH 34/64] proof that restricting block_pos to 0..max instead of 0..=max passes this updated test --- chacha20/tests/kats.rs | 43 ++++++++---------------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 266ece00..835f1bc9 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -100,9 +100,9 @@ mod chacha20test { #[test] fn $name() { let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); - let mut first_block = [0u8; 64]; + let mut first_4_blocks = [0u8; 256]; assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block); + cipher.apply_keystream(&mut first_4_blocks); let mut buf_1 = [0u8; $num_blocks * 64]; let mut buf_2 = [0u8; $num_blocks * 64 + 1]; @@ -120,44 +120,17 @@ mod chacha20test { cipher.apply_keystream(&mut buf_1); // seek to beginning and check if the first block is the same as before - let mut first_block_observation_2 = [0u8; 64]; cipher.seek(0); assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block_observation_2); - assert_eq!(first_block_observation_2, first_block); + cipher.apply_keystream(&mut first_4_blocks); + + // if this assert fails, exhausting the keystream increments + // state[13], resulting in a different keystream when it + // should be the same + assert_eq!(first_4_blocks, [0u8; 256]); } }; } - #[test] - fn chacha20_potential_counter_issue_v1() { - let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); - let mut first_block = [0u8; 64]; - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block); - - let mut buf_1 = [0u8; 64]; - let mut buf_2 = [0u8; 65]; - - // seek to end of keystream - let max_bytes = (u64::from(u32::MAX) + 1) * 64; - let pos = max_bytes - 128; - cipher.try_seek(pos).unwrap(); - assert_eq!(cipher.current_pos::(), pos); - - // overshoot keystream length - let applied_keystream = cipher.try_apply_keystream(&mut buf_2); - assert_eq!(applied_keystream.is_err(), true); - - // exhaust keystream - cipher.apply_keystream(&mut buf_1); - - // seek to beginning and check if the first block is the same as before - let mut first_block_observation_2 = [0u8; 64]; - cipher.seek(0); - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_block_observation_2); - assert_eq!(first_block_observation_2, first_block); - } impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v2, 4); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v3, 11); From eff2b26573cdddbc28611d80caa84ee518b6bc4b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 19:45:53 -0500 Subject: [PATCH 35/64] proof that fixing the '- 64' and 'remaining_blocks()' fails the test, indicating that there is in fact overflow --- chacha20/src/variants.rs | 13 +++++++++---- chacha20/tests/kats.rs | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 400d75aa..62dc7634 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -45,7 +45,13 @@ impl Variant for Ietf { } #[inline(always)] fn remaining_blocks(block_pos: Self::Counter) -> Option { - (u32::MAX - block_pos).try_into().ok() + let total_blocks = 1u64 << 32; + let rem = total_blocks - block_pos as u64; + if rem > usize::MAX as u64 { + None + } else { + Some(rem as usize) + } } } @@ -70,9 +76,8 @@ impl Variant for Legacy { } #[inline(always)] fn remaining_blocks(block_pos: Self::Counter) -> Option { - let remaining = u64::MAX - block_pos; - #[cfg(target_pointer_width = "32")] - if remaining > usize::MAX as u64 { + let remaining = (1u128 << 64) - block_pos as u128; + if remaining > usize::MAX as u128 { return None; } remaining.try_into().ok() diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 835f1bc9..c6669f0b 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -108,7 +108,7 @@ mod chacha20test { let mut buf_2 = [0u8; $num_blocks * 64 + 1]; // seek to end of keystream - let pos = (1 << 32) * 64 - $num_blocks * 64 - 64; + let pos = (1 << 32) * 64 - $num_blocks * 64; cipher.try_seek(pos).unwrap(); assert_eq!(cipher.current_pos::(), pos); From b29d7695913eecf657507aa872f7e52b98dee573 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 19:59:58 -0500 Subject: [PATCH 36/64] proof that the backends can be fixed to prevent overflowing --- chacha20/src/backends/avx2.rs | 7 +++++-- chacha20/src/backends/neon.rs | 15 ++++++++++----- chacha20/src/backends/soft.rs | 4 +++- chacha20/src/backends/sse2.rs | 7 +++++-- chacha20/src/lib.rs | 10 +++++----- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 6b0a4d3f..aa0105c3 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -27,10 +27,11 @@ const N: usize = PAR_BLOCKS / 2; #[inline] #[target_feature(enable = "avx2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; let v = [ @@ -54,7 +55,9 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + if size_of::() == 8 { + state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + } } #[inline] diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index f03f6b0d..561e8cb5 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -60,19 +60,24 @@ impl Backend { #[inline] #[cfg(feature = "cipher")] #[target_feature(enable = "neon")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { let mut backend = Backend::::new(state); f.call(&mut backend); - vst1q_u64( - state.as_mut_ptr().offset(12) as *mut u64, - vreinterpretq_u64_u32(backend.state[3]), - ); + if size_of::() == 8 { + vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); + } else { + state[12] = vgetq_lane_u32(backend.state[3], 0); + } } #[inline] diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index b19b7079..8125b641 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -33,7 +33,9 @@ impl StreamCipherBackend for Backend<'_, R, V> { self.0.state[12] = v; } else { self.0.state[12] = 0; - self.0.state[13] = self.0.state[13].wrapping_add(1); + if size_of::() == 8 { + self.0.state[13] = self.0.state[13].wrapping_add(1); + } } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 0886659b..e200e55f 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -23,10 +23,11 @@ const PAR_BLOCKS: usize = 4; #[inline] #[target_feature(enable = "sse2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamCipherClosure, + V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; let mut backend = Backend:: { @@ -42,7 +43,9 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + if size_of::() == 8 { + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + } } struct Backend { diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index a4b444b9..6ee49cd2 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -297,21 +297,21 @@ impl StreamCipherCore for ChaChaCore { cfg_if! { if #[cfg(chacha20_force_avx2)] { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if #[cfg(chacha20_force_sse2)] { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { let (avx2_token, sse2_token) = self.tokens; if avx2_token.get() { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if sse2_token.get() { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); @@ -320,7 +320,7 @@ impl StreamCipherCore for ChaChaCore { } } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { unsafe { - backends::neon::inner::(&mut self.state, f); + backends::neon::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); From f304e1421e204a19ca8d832da7498f40bca7aa9f Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 20:03:51 -0500 Subject: [PATCH 37/64] fix imports --- chacha20/src/backends/avx2.rs | 4 ++-- chacha20/src/backends/neon.rs | 4 ++-- chacha20/src/backends/sse2.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index aa0105c3..b617730e 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -1,9 +1,9 @@ #![allow(unsafe_op_in_unsafe_fn)] -use crate::Rounds; +use crate::{Rounds, Variant}; use core::marker::PhantomData; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::{chacha::Block, STATE_WORDS}; diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 561e8cb5..bbd10202 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -4,11 +4,11 @@ //! Adapted from the Crypto++ `chacha_simd` implementation by Jack Lloyd and //! Jeffrey Walton (public domain). -use crate::{Rounds, STATE_WORDS}; +use crate::{Rounds, STATE_WORDS, Variant}; use core::{arch::aarch64::*, marker::PhantomData}; #[cfg(feature = "rand_core")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::chacha::Block; diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index e200e55f..2aea519c 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -1,8 +1,8 @@ #![allow(unsafe_op_in_unsafe_fn)] -use crate::Rounds; +use crate::{Rounds, Variant}; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::{ChaChaCore}; #[cfg(feature = "cipher")] use crate::{chacha::Block, STATE_WORDS}; From acaf4be4f83806912b4cd704e4696aa18b4126e6 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 20:40:51 -0500 Subject: [PATCH 38/64] added conditional compilation for IETF remaining_blocks() --- chacha20/src/variants.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 62dc7634..a8f9dec5 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -47,11 +47,13 @@ impl Variant for Ietf { fn remaining_blocks(block_pos: Self::Counter) -> Option { let total_blocks = 1u64 << 32; let rem = total_blocks - block_pos as u64; + #[cfg(target_pointer_width = "32")] if rem > usize::MAX as u64 { - None + return None; } else { - Some(rem as usize) + return Some(rem as usize); } + rem.try_into().ok() } } From 0834318533e821189520fdbfa7ba65580d1b484b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Mon, 18 Aug 2025 20:45:14 -0500 Subject: [PATCH 39/64] fix unreachable expression :/ --- chacha20/src/variants.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index a8f9dec5..f131db80 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -50,8 +50,6 @@ impl Variant for Ietf { #[cfg(target_pointer_width = "32")] if rem > usize::MAX as u64 { return None; - } else { - return Some(rem as usize); } rem.try_into().ok() } From 9de7c2fbbf66311825e9c506783adf5afd460785 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 19 Aug 2025 09:03:28 -0500 Subject: [PATCH 40/64] corrected code for #438; results in big endian issues... --- chacha20/src/rng.rs | 64 ++++++--------------------------------------- 1 file changed, 8 insertions(+), 56 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 8bcf46b0..a0624183 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -81,40 +81,6 @@ impl Debug for Seed { } } -/// A wrapper for set_word_pos() input. -/// -/// Can be constructed from any of the following: -/// * `u128` -/// * `[u8; 9]` -pub struct WordPosInput { - block_pos: [u32; 2], - index: usize, -} - -impl From<[u8; 9]> for WordPosInput { - fn from(value: [u8; 9]) -> Self { - let s12 = u32::from_le_bytes(value[0..4].try_into().unwrap()); - let s13 = u32::from_le_bytes(value[4..8].try_into().unwrap()); - Self { - block_pos: [s12, s13], - index: (value[8] & 0b1111) as usize, - } - } -} - -impl From for WordPosInput { - fn from(value: u128) -> Self { - let index = (value.to_le_bytes()[0] & 0b1111) as usize; - let counter = value >> 4; - let s12 = counter as u32; - let s13 = (counter >> 32) as u32; - Self { - block_pos: [s12, s13], - index, - } - } -} - /// A wrapper for `stream_id`. /// /// Can be constructed from any of the following: @@ -324,12 +290,8 @@ macro_rules! impl_chacha_rng { /// // or a [u32; 2] /// rng.set_stream([4u32; 2]); /// - /// /// rng.set_word_pos(5); /// - /// // you can also use a [u8; 9] in `.set_word_pos()` - /// rng.set_word_pos([2u8; 9]); - /// /// let x = rng.next_u32(); /// let mut array = [0u8; 32]; /// rng.fill_bytes(&mut array); @@ -430,25 +392,19 @@ macro_rules! impl_chacha_rng { word_pos & ((1 << 68) - 1) } - /// Set the offset from the start of the stream, in 32-bit words. This method - /// takes any of the following: - /// * `u128` - /// * `[u8; 9]` + /// Set the offset from the start of the stream, in 32-bit words. /// /// As with `get_word_pos`, we use a 36-bit number. When given a `u64`, we use /// the least significant 4 bits as the RNG's index, and the 32 bits before it /// as the block position. - /// - /// When given a `[u8; 9]`, the word_pos is set similarly, but it is more - /// arbitrary since the index is set using the lowest 4 bits of the last - /// byte. #[inline] - pub fn set_word_pos>(&mut self, word_offset: W) { - let word_pos: WordPosInput = word_offset.into(); - self.core.core.0.state[12] = word_pos.block_pos[0]; - self.core.core.0.state[13] = word_pos.block_pos[1]; - // generate will increase block_pos by 4 - self.core.generate_and_set(word_pos.index); + pub fn set_word_pos(&mut self, word_offset: u128) { + let index = (word_offset.to_le_bytes()[0] & 0b1111) as usize; + let counter = word_offset >> 4; + //self.set_block_pos(counter as u64); + self.core.core.0.state[12] = counter as u32; + self.core.core.0.state[13] = (counter >> 32) as u32; + self.core.generate_and_set(index); } /// Set the block pos and reset the RNG's index. @@ -720,10 +676,6 @@ pub(crate) mod tests { // test set_word_pos with u64 rng.set_word_pos(8888); assert_eq!(rng.get_word_pos(), 8888); - - // test set_word_pos with [u8; 9] - rng.set_word_pos([55, 0, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!(rng.get_word_pos(), 55 * 16); } #[cfg(feature = "serde1")] From a1535f5c33af05b5608f02b7ae7b7f9031dfad73 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 19 Aug 2025 10:07:10 -0500 Subject: [PATCH 41/64] fixed endianness thanks to dhardy --- chacha20/src/rng.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index a0624183..40971baa 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -87,6 +87,8 @@ impl Debug for Seed { /// * `[u32; 3]` /// * `[u8; 12]` /// * `u128` +/// +/// The arrays should be in little endian order. pub struct StreamId([u32; Self::LEN]); impl StreamId { @@ -131,15 +133,16 @@ impl From for StreamId { /// A wrapper for `block_pos`. /// /// Can be constructed from any of the following: -/// * `[u8; 8]` /// * `u64` +/// * `[u8; 8]` /// * `[u32; 2]` +/// +/// The arrays should be in little endian order. pub struct BlockPos([u32; 2]); impl From for BlockPos { #[inline] - fn from(mut value: u64) -> Self { - value = value.to_le(); + fn from(value: u64) -> Self { Self([value as u32, (value >> 32) as u32]) } } @@ -153,8 +156,7 @@ impl From<[u8; 8]> for BlockPos { impl From<[u32; 2]> for BlockPos { #[inline] - fn from(mut value: [u32; 2]) -> Self { - value.iter_mut().for_each(|x| *x = x.to_le()); + fn from(value: [u32; 2]) -> Self { Self(value) } } @@ -399,7 +401,7 @@ macro_rules! impl_chacha_rng { /// as the block position. #[inline] pub fn set_word_pos(&mut self, word_offset: u128) { - let index = (word_offset.to_le_bytes()[0] & 0b1111) as usize; + let index = (word_offset & 0b1111) as usize; let counter = word_offset >> 4; //self.set_block_pos(counter as u64); self.core.core.0.state[12] = counter as u32; @@ -415,13 +417,15 @@ macro_rules! impl_chacha_rng { /// * `u64` /// * `[u8; 8]` /// * `[u32; 2]` + /// + /// Note: the arrays should be in little endian order. #[inline] #[allow(unused)] pub fn set_block_pos>(&mut self, block_pos: B) { self.core.reset(); let block_pos = block_pos.into().0; - self.core.core.0.state[12] = block_pos[0].to_le(); - self.core.core.0.state[13] = block_pos[1].to_le() + self.core.core.0.state[12] = block_pos[0]; + self.core.core.0.state[13] = block_pos[1] } /// Get the block pos. @@ -436,6 +440,8 @@ macro_rules! impl_chacha_rng { /// * `u64` /// * `[u8; 8]` /// * `[u32; 2]` + /// + /// Note: the arrays should be in little endian order. /// /// This is initialized to zero; 296 unique streams of output /// are available per seed/key. In theory a 96-bit nonce can be used by @@ -651,7 +657,7 @@ pub(crate) mod tests { let seed = [44u8; 32]; let mut rng = ChaCha20Rng::from_seed(seed); - // test set_stream with [u32; 3] + // test set_stream with [u32; 2] rng.set_stream([313453u32, 0u32]); assert_eq!(rng.get_stream(), 313453); From b6e705489fdb39d27ae6a2568e16362026f0600b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 19 Aug 2025 10:12:02 -0500 Subject: [PATCH 42/64] fmt --- chacha20/src/rng.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 40971baa..73699dee 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -87,7 +87,7 @@ impl Debug for Seed { /// * `[u32; 3]` /// * `[u8; 12]` /// * `u128` -/// +/// /// The arrays should be in little endian order. pub struct StreamId([u32; Self::LEN]); @@ -136,7 +136,7 @@ impl From for StreamId { /// * `u64` /// * `[u8; 8]` /// * `[u32; 2]` -/// +/// /// The arrays should be in little endian order. pub struct BlockPos([u32; 2]); @@ -417,7 +417,7 @@ macro_rules! impl_chacha_rng { /// * `u64` /// * `[u8; 8]` /// * `[u32; 2]` - /// + /// /// Note: the arrays should be in little endian order. #[inline] #[allow(unused)] @@ -440,7 +440,7 @@ macro_rules! impl_chacha_rng { /// * `u64` /// * `[u8; 8]` /// * `[u32; 2]` - /// + /// /// Note: the arrays should be in little endian order. /// /// This is initialized to zero; 296 unique streams of output From d42b703d09f7133d74f8e21d8e836e2f8de5e862 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 19 Aug 2025 10:23:17 -0500 Subject: [PATCH 43/64] forbidden use of exhausted/wrapped cipher regarding #444, passes test --- chacha20/tests/kats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index c6669f0b..5f886e99 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -120,7 +120,7 @@ mod chacha20test { cipher.apply_keystream(&mut buf_1); // seek to beginning and check if the first block is the same as before - cipher.seek(0); + //cipher.seek(0); assert_eq!(cipher.current_pos::(), 0); cipher.apply_keystream(&mut first_4_blocks); From 7b73d5cac8a2e8a10fbffd6f147ad3137b5a53f2 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 19 Aug 2025 10:33:23 -0500 Subject: [PATCH 44/64] reverted 'remaining_blocks()' and backends due to #444 --- chacha20/src/backends/avx2.rs | 4 +--- chacha20/src/backends/neon.rs | 12 ++++-------- chacha20/src/backends/soft.rs | 4 +--- chacha20/src/backends/sse2.rs | 4 +--- chacha20/src/variants.rs | 13 ++++--------- chacha20/tests/kats.rs | 7 +++++-- 6 files changed, 16 insertions(+), 28 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index b617730e..15c2b074 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -55,9 +55,7 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - if size_of::() == 8 { - state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; - } + state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } #[inline] diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index bbd10202..adfaf887 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -70,14 +70,10 @@ where f.call(&mut backend); - if size_of::() == 8 { - vst1q_u64( - state.as_mut_ptr().offset(12) as *mut u64, - vreinterpretq_u64_u32(backend.state[3]), - ); - } else { - state[12] = vgetq_lane_u32(backend.state[3], 0); - } + vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); } #[inline] diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 8125b641..b19b7079 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -33,9 +33,7 @@ impl StreamCipherBackend for Backend<'_, R, V> { self.0.state[12] = v; } else { self.0.state[12] = 0; - if size_of::() == 8 { - self.0.state[13] = self.0.state[13].wrapping_add(1); - } + self.0.state[13] = self.0.state[13].wrapping_add(1); } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 2aea519c..6ade18c1 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -43,9 +43,7 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - if size_of::() == 8 { - state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; - } + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } struct Backend { diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index f131db80..400d75aa 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -45,13 +45,7 @@ impl Variant for Ietf { } #[inline(always)] fn remaining_blocks(block_pos: Self::Counter) -> Option { - let total_blocks = 1u64 << 32; - let rem = total_blocks - block_pos as u64; - #[cfg(target_pointer_width = "32")] - if rem > usize::MAX as u64 { - return None; - } - rem.try_into().ok() + (u32::MAX - block_pos).try_into().ok() } } @@ -76,8 +70,9 @@ impl Variant for Legacy { } #[inline(always)] fn remaining_blocks(block_pos: Self::Counter) -> Option { - let remaining = (1u128 << 64) - block_pos as u128; - if remaining > usize::MAX as u128 { + let remaining = u64::MAX - block_pos; + #[cfg(target_pointer_width = "32")] + if remaining > usize::MAX as u64 { return None; } remaining.try_into().ok() diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 5f886e99..0e8c4f1b 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -108,7 +108,7 @@ mod chacha20test { let mut buf_2 = [0u8; $num_blocks * 64 + 1]; // seek to end of keystream - let pos = (1 << 32) * 64 - $num_blocks * 64; + let pos = (1 << 32) * 64 - $num_blocks * 64 - 64; cipher.try_seek(pos).unwrap(); assert_eq!(cipher.current_pos::(), pos); @@ -119,8 +119,11 @@ mod chacha20test { // exhaust keystream cipher.apply_keystream(&mut buf_1); + // verify that we cannot write another byte + assert!(cipher.try_apply_keystream(&mut [0u8; 1]).is_err()); + // seek to beginning and check if the first block is the same as before - //cipher.seek(0); + cipher.seek(0); assert_eq!(cipher.current_pos::(), 0); cipher.apply_keystream(&mut first_4_blocks); From 62a7e949167b1faf3f2c92b9df6bd45d2cd3da38 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 22 Aug 2025 14:23:05 -0500 Subject: [PATCH 45/64] implemented 32-bit counter. edited docs a little. implemented core wrap/overflow test --- chacha20/src/backends/avx2.rs | 45 ++++++++++++++++------ chacha20/src/backends/neon.rs | 70 ++++++++++++++++++++--------------- chacha20/src/backends/soft.rs | 4 +- chacha20/src/backends/sse2.rs | 54 +++++++++++++++++++-------- chacha20/src/legacy.rs | 4 -- chacha20/src/variants.rs | 7 +--- chacha20/tests/kats.rs | 32 ++++++++++++++++ 7 files changed, 147 insertions(+), 69 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 15c2b074..3f1a97ac 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -40,22 +40,33 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); + if size_of::() == 8 { + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); + } else { + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + } let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); + if size_of::() == 8 { + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); + } else { + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + } } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, + _variant: PhantomData, }; f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + if size_of::() == 8 { + state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + } } #[inline] @@ -79,10 +90,11 @@ where ctr[i] = c; c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, + _variant: PhantomData }; backend.rng_gen_par_ks_blocks(buffer); @@ -91,30 +103,35 @@ where core.state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } -struct Backend { +struct Backend { v: [__m256i; 3], ctr: [__m256i; N], _pd: PhantomData, + _variant: PhantomData, } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); + if size_of::() == 8 { + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); + } else { + *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); + } } let res0: [__m128i; 8] = core::mem::transmute(res[0]); @@ -133,7 +150,11 @@ impl StreamCipherBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); + if size_of::() == 8 { + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); + } else { + *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + } } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; @@ -150,7 +171,7 @@ impl StreamCipherBackend for Backend { } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn rng_gen_par_ks_blocks(&mut self, blocks: &mut [u32; 64]) { unsafe { diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index adfaf887..ced030a0 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -19,22 +19,27 @@ use cipher::{ consts::{U4, U64}, }; -struct Backend { +struct Backend { state: [uint32x4_t; 4], ctrs: [uint32x4_t; 4], _pd: PhantomData, + _variant: PhantomData, } -macro_rules! add64 { - ($a:expr, $b:expr) => { - vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32($a), - vreinterpretq_u64_u32($b), - )) +macro_rules! add_counter { + ($a:expr, $b:expr, $variant:ty) => { + if size_of::<<$variant>::Counter>() == 8 { + vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vreinterpretq_u64_u32($b), + )) + } else { + vaddq_u32($a, $b) + } }; } -impl Backend { +impl Backend { #[inline] unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self { let state = [ @@ -49,10 +54,11 @@ impl Backend { vld1q_u32([3, 0, 0, 0].as_ptr()), vld1q_u32([4, 0, 0, 0].as_ptr()), ]; - Backend:: { + Backend:: { state, ctrs, _pd: PhantomData, + _variant: PhantomData, } } } @@ -66,14 +72,18 @@ where F: StreamCipherClosure, V: Variant, { - let mut backend = Backend::::new(state); + let mut backend = Backend::::new(state); f.call(&mut backend); - vst1q_u64( - state.as_mut_ptr().offset(12) as *mut u64, - vreinterpretq_u64_u32(backend.state[3]), - ); + if size_of::() == 8 { + vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); + } else { + state[12] = vgetq_lane_u32(backend.state[3], 0); + } } #[inline] @@ -86,7 +96,7 @@ where R: Rounds, V: Variant, { - let mut backend = Backend::::new(&mut core.state); + let mut backend = Backend::::new(&mut core.state); backend.write_par_ks_blocks(buffer); @@ -97,11 +107,11 @@ where } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } @@ -113,7 +123,7 @@ macro_rules! add_assign_vec { } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let state3 = self.state[3]; @@ -121,7 +131,7 @@ impl StreamCipherBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr()), V); } } @@ -134,19 +144,19 @@ impl StreamCipherBackend for Backend { self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -162,7 +172,7 @@ impl StreamCipherBackend for Backend { if block > 0 { add_assign_vec!( blocks[block][3], - add64!(self.state[3], self.ctrs[block - 1]) + add_counter!(self.state[3], self.ctrs[block - 1], V) ); } else { add_assign_vec!(blocks[block][3], self.state[3]); @@ -175,7 +185,7 @@ impl StreamCipherBackend for Backend { ); } } - self.state[3] = add64!(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } } @@ -201,7 +211,7 @@ macro_rules! extract { }; } -impl Backend { +impl Backend { #[inline(always)] /// Generates `num_blocks` blocks and blindly writes them to `dest_ptr` /// @@ -218,19 +228,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -247,7 +257,7 @@ impl Backend { if block > 0 { add_assign_vec!( blocks[block][3], - add64!(self.state[3], self.ctrs[block - 1]) + add_counter!(self.state[3], self.ctrs[block - 1], V) ); } else { add_assign_vec!(blocks[block][3], self.state[3]); @@ -261,7 +271,7 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = add64!(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index b19b7079..8125b641 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -33,7 +33,9 @@ impl StreamCipherBackend for Backend<'_, R, V> { self.0.state[12] = v; } else { self.0.state[12] = 0; - self.0.state[13] = self.0.state[13].wrapping_add(1); + if size_of::() == 8 { + self.0.state[13] = self.0.state[13].wrapping_add(1); + } } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 6ade18c1..eb55a271 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -30,7 +30,7 @@ where V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -38,36 +38,44 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, + _variant: PhantomData, }; f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + if size_of::() == 8 { + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + } } -struct Backend { +struct Backend { v: [__m128i; 4], _pd: PhantomData, + _variant: PhantomData, } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } #[cfg(feature = "cipher")] -impl StreamCipherBackend for Backend { +impl StreamCipherBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { - let res = rounds::(&self.v); - self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); + let res = rounds::(&self.v); + if size_of::() == 8 { + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); + } else { + self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + } let block_ptr = block.as_mut_ptr() as *mut __m128i; for i in 0..4 { @@ -78,8 +86,12 @@ impl StreamCipherBackend for Backend { #[inline(always)] fn gen_par_ks_blocks(&mut self, blocks: &mut cipher::ParBlocks) { unsafe { - let res = rounds::(&self.v); - self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); + let res = rounds::(&self.v); + if size_of::() == 8 { + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); + } else { + self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); + } let blocks_ptr = blocks.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -100,7 +112,7 @@ where V: Variant, { let state_ptr = core.state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -108,6 +120,7 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, + _variant: PhantomData, }; backend.gen_ks_blocks(buffer); @@ -117,11 +130,11 @@ where } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn gen_ks_blocks(&mut self, block: &mut [u32]) { unsafe { - let res = rounds::(&self.v); + let res = rounds::(&self.v); self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); let blocks_ptr = block.as_mut_ptr() as *mut __m128i; @@ -136,10 +149,14 @@ impl Backend { #[inline] #[target_feature(enable = "sse2")] -unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { +unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { let mut res = [*v; 4]; for block in 1..PAR_BLOCKS { - res[block][3] = _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)); + if size_of::() == 8 { + res[block][3] = _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)); + } else { + res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + } } for _ in 0..R::COUNT { @@ -150,7 +167,12 @@ unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { for i in 0..3 { res[block][i] = _mm_add_epi32(res[block][i], v[i]); } - res[block][3] = _mm_add_epi32(res[block][3], _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64))); + let ctr = if size_of::() == 8 { + _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64)) + } else { + _mm_add_epi32(v[3], _mm_set_epi32(0, 0, 0, block as i32)) + }; + res[block][3] = _mm_add_epi32(res[block][3], ctr); } res diff --git a/chacha20/src/legacy.rs b/chacha20/src/legacy.rs index c95fd86e..4d49f4ef 100644 --- a/chacha20/src/legacy.rs +++ b/chacha20/src/legacy.rs @@ -13,10 +13,6 @@ pub type LegacyNonce = Array; use crate::variants::Legacy; /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). -/// -/// **WARNING:** this implementation uses 32-bit counter, while the original -/// implementation uses 64-bit counter. In other words, it does -/// not allow encrypting of more than 256 GiB of data. pub type ChaCha20Legacy = StreamCipherCoreWrapper; /// /// The ChaCha20 stream cipher (legacy "djb" construction with 64-bit nonce). diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 400d75aa..764d94a4 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -1,12 +1,9 @@ //! Distinguishing features of ChaCha variants. -//! -//! To be revisited for the 64-bit counter. /// A trait that distinguishes some ChaCha variants pub trait Variant: Clone { - /// the size of the Nonce in u32s + /// Where the nonce starts in the state array const NONCE_INDEX: usize; - const COUNTER_MAX: u64; #[cfg(feature = "cipher")] type Counter: cipher::StreamCipherCounter; #[cfg(not(feature = "cipher"))] @@ -31,7 +28,6 @@ pub trait Variant: Clone { pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; - const COUNTER_MAX: u64 = u32::MAX as u64; type Counter = u32; type CounterWords = [u32; 1]; @@ -56,7 +52,6 @@ pub struct Legacy(); #[cfg(feature = "legacy")] impl Variant for Legacy { const NONCE_INDEX: usize = 14; - const COUNTER_MAX: u64 = u64::MAX; type Counter = u64; type CounterWords = [u32; 2]; diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 0e8c4f1b..bb1e8aef 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -141,6 +141,38 @@ mod chacha20test { impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); + + #[test] + fn chacha20core_counter_overflow() { + use cipher::{StreamCipherCore, StreamCipherSeekCore}; + let cipher = ChaCha20::new(&KEY.into(), &IV.into()); + let mut core = cipher.get_core().clone(); + + // observe the first block two "different" ways + let mut first_block_observation_1 = Default::default(); + core.write_keystream_block(&mut first_block_observation_1); + core.set_block_pos(0); + let mut first_block_observation_2 = Default::default(); + core.write_keystream_block(&mut first_block_observation_2); + // proof that setting block pos to 0 results in first_block_observation_1 + assert_eq!(first_block_observation_1, first_block_observation_2); + + // try to make the counter overflow/wrap + core.set_block_pos(u32::MAX); + core.write_keystream_block(&mut Default::default()); + + let mut first_block_observation_3 = Default::default(); + core.write_keystream_block(&mut first_block_observation_3); + // fails if the counter doesn't wrap + assert_eq!(first_block_observation_1, first_block_observation_3); + + core.set_block_pos(0); + let mut first_block_observation_4 = Default::default(); + core.write_keystream_block(&mut first_block_observation_4); + + // fails when `state[13]` changes + assert_eq!(first_block_observation_1, first_block_observation_4) + } } #[rustfmt::skip] From de470e113cdb3ee30e44de41fa79e9531b44f44f Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 22 Aug 2025 14:35:17 -0500 Subject: [PATCH 46/64] removed passing counter overflow detection test because get_core() was not available somehow --- chacha20/tests/kats.rs | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index bb1e8aef..0e8c4f1b 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -141,38 +141,6 @@ mod chacha20test { impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); - - #[test] - fn chacha20core_counter_overflow() { - use cipher::{StreamCipherCore, StreamCipherSeekCore}; - let cipher = ChaCha20::new(&KEY.into(), &IV.into()); - let mut core = cipher.get_core().clone(); - - // observe the first block two "different" ways - let mut first_block_observation_1 = Default::default(); - core.write_keystream_block(&mut first_block_observation_1); - core.set_block_pos(0); - let mut first_block_observation_2 = Default::default(); - core.write_keystream_block(&mut first_block_observation_2); - // proof that setting block pos to 0 results in first_block_observation_1 - assert_eq!(first_block_observation_1, first_block_observation_2); - - // try to make the counter overflow/wrap - core.set_block_pos(u32::MAX); - core.write_keystream_block(&mut Default::default()); - - let mut first_block_observation_3 = Default::default(); - core.write_keystream_block(&mut first_block_observation_3); - // fails if the counter doesn't wrap - assert_eq!(first_block_observation_1, first_block_observation_3); - - core.set_block_pos(0); - let mut first_block_observation_4 = Default::default(); - core.write_keystream_block(&mut first_block_observation_4); - - // fails when `state[13]` changes - assert_eq!(first_block_observation_1, first_block_observation_4) - } } #[rustfmt::skip] From bf8845d7719b76d12610a98e17ec302d632fc5ff Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 22 Aug 2025 20:20:57 -0500 Subject: [PATCH 47/64] made variants public; edited docs a little; added counter wrapping tests with ChaChaCore --- chacha20/src/lib.rs | 2 +- chacha20/src/variants.rs | 22 +++++++++----- chacha20/tests/kats.rs | 64 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 6ee49cd2..75a3963a 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -130,7 +130,7 @@ mod rng; #[cfg(feature = "xchacha")] mod xchacha; -mod variants; +pub mod variants; use variants::Variant; #[cfg(feature = "cipher")] diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 764d94a4..fb55f921 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -1,21 +1,28 @@ -//! Distinguishing features of ChaCha variants. +//! ChaCha variant-specific configurations. -/// A trait that distinguishes some ChaCha variants +/// A trait that distinguishes some ChaCha variants. Contains configurations +/// for "Legacy" DJB variant and the IETF variant. pub trait Variant: Clone { - /// Where the nonce starts in the state array + /// Where the nonce starts in the state array. const NONCE_INDEX: usize; + + /// The counter's type. #[cfg(feature = "cipher")] type Counter: cipher::StreamCipherCounter; + + /// The counter's type. #[cfg(not(feature = "cipher"))] type Counter; + /// An intermediate helper type for using generics. Should be either + /// a `[u32; 1]` or a `[u32; 2]`. type CounterWords: AsRef<[u32]>; - /// Takes a slice of state[12..NONCE_INDEX] to convert it into - /// Self::Counter. + /// Takes a slice of `state[12..NONCE_INDEX]` to convert it into + /// `Self::Counter`. fn get_block_pos(counter_row: &[u32]) -> Self::Counter; - /// Breaks down the Self::Counter type into a u32 array for setting the + /// Breaks down the `Self::Counter` type into a u32 array for setting the /// block pos. fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords; @@ -24,7 +31,7 @@ pub trait Variant: Clone { } #[derive(Clone)] -/// The details pertaining to the IETF variant +/// IETF ChaCha configuration to use a 32-bit counter and 96-bit nonce. pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; @@ -45,6 +52,7 @@ impl Variant for Ietf { } } +/// DJB variant specific features: 64-bit counter and 64-bit nonce. #[derive(Clone)] #[cfg(feature = "legacy")] pub struct Legacy(); diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 0e8c4f1b..0bd6a6b5 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -141,6 +141,38 @@ mod chacha20test { impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); + + #[test] + fn chacha20core_counter_overflow() { + use chacha20::{ChaChaCore, R20, variants::Ietf}; + use cipher::{StreamCipherCore, StreamCipherSeekCore}; + let mut core = ChaChaCore::::new(&KEY.into(), &IV.into()); + + // observe the first block two "different" ways + let mut first_block_observation_1 = Default::default(); + core.write_keystream_block(&mut first_block_observation_1); + core.set_block_pos(0); + let mut first_block_observation_2 = Default::default(); + core.write_keystream_block(&mut first_block_observation_2); + // proof that setting block pos to 0 results in first_block_observation_1 + assert_eq!(first_block_observation_1, first_block_observation_2); + + // try to make the counter overflow/wrap + core.set_block_pos(u32::MAX); + core.write_keystream_block(&mut Default::default()); + + let mut first_block_observation_3 = Default::default(); + core.write_keystream_block(&mut first_block_observation_3); + // fails if the counter doesn't wrap + assert_eq!(first_block_observation_1, first_block_observation_3); + + core.set_block_pos(0); + let mut first_block_observation_4 = Default::default(); + core.write_keystream_block(&mut first_block_observation_4); + + // fails when `state[13]` changes + assert_eq!(first_block_observation_1, first_block_observation_4) + } } #[rustfmt::skip] @@ -280,4 +312,36 @@ mod legacy { } } } + + #[test] + fn chacha20core_counter_overflow() { + use cipher::{StreamCipherCore, StreamCipherSeekCore}; + use chacha20::{ChaChaCore, R20, variants::Legacy}; + let mut core = ChaChaCore::::new(&KEY_LONG.into(), &IV_LONG.into()); + + // observe the first block two "different" ways + let mut first_block_observation_1 = Default::default(); + core.write_keystream_block(&mut first_block_observation_1); + core.set_block_pos(0); + let mut first_block_observation_2 = Default::default(); + core.write_keystream_block(&mut first_block_observation_2); + // proof that setting block pos to 0 results in first_block_observation_1 + assert_eq!(first_block_observation_1, first_block_observation_2); + + // try to make the counter wrap + core.set_block_pos(u64::MAX); + core.write_keystream_block(&mut Default::default()); + + let mut first_block_observation_3 = Default::default(); + core.write_keystream_block(&mut first_block_observation_3); + // fails if the counter doesn't wrap + assert_eq!(first_block_observation_1, first_block_observation_3); + + core.set_block_pos(0); + let mut first_block_observation_4 = Default::default(); + core.write_keystream_block(&mut first_block_observation_4); + + // fails when `state[13]` changes + assert_eq!(first_block_observation_1, first_block_observation_4) + } } From b222fea317597e488935ea40e8af7f8d2087f905 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 22 Aug 2025 20:38:33 -0500 Subject: [PATCH 48/64] resolve conflicts, but removing the big endian cfg results in a lot of tests failing --- chacha20/Cargo.toml | 11 +++++++++++ chacha20/src/backends/sse2.rs | 3 ++- chacha20/src/lib.rs | 5 +++-- chacha20/src/rng.rs | 8 +++----- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index cb87d5b3..4256bcb8 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -47,3 +47,14 @@ xchacha = ["cipher"] [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] + +[lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(chacha20_force_soft)', + 'cfg(chacha20_force_sse2)', + 'cfg(chacha20_force_avx2)', +] + +[lints.clippy] +needless_range_loop = "allow" \ No newline at end of file diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index eb55a271..01929411 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -132,7 +132,8 @@ where #[cfg(feature = "rng")] impl Backend { #[inline(always)] - fn gen_ks_blocks(&mut self, block: &mut [u32]) { + fn gen_ks_blocks(&mut self, block: &mut [u32; 64]) { + const _: () = assert!(4 * PAR_BLOCKS * size_of::<__m128i>() == size_of::<[u32; 64]>()); unsafe { let res = rounds::(&self.v); self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 75a3963a..b3f8ad78 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -105,8 +105,6 @@ html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/8f1a9894/logo.svg", html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/8f1a9894/logo.svg" )] -#![allow(clippy::needless_range_loop)] -#![allow(unexpected_cfgs)] #![warn(missing_docs, rust_2018_idioms, trivial_casts, unused_qualifications)] #[cfg(feature = "cipher")] @@ -228,10 +226,13 @@ impl ChaChaCore { fn new(key: &[u8; 32], iv: &[u8]) -> Self { let mut state = [0u32; STATE_WORDS]; state[0..4].copy_from_slice(&CONSTANTS); + let key_chunks = key.chunks_exact(4); for (val, chunk) in state[4..12].iter_mut().zip(key_chunks) { *val = u32::from_le_bytes(chunk.try_into().unwrap()); } + + assert_eq!(iv.len(), 4 * (16 - V::NONCE_INDEX)); let iv_chunks = iv.as_ref().chunks_exact(4); for (val, chunk) in state[V::NONCE_INDEX..16].iter_mut().zip(iv_chunks) { *val = u32::from_le_bytes(chunk.try_into().unwrap()); diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 73699dee..1cf66cee 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -198,7 +198,6 @@ const BUF_BLOCKS: u8 = BUFFER_SIZE as u8 >> 4; impl ChaChaCore { /// Generates 4 blocks in parallel with avx2 & neon, but merely fills /// 4 blocks with sse2 & soft - #[cfg(feature = "rand_core")] fn generate(&mut self, buffer: &mut [u32; 64]) { cfg_if! { if #[cfg(chacha20_force_soft)] { @@ -327,10 +326,7 @@ macro_rules! impl_chacha_rng { #[inline] fn from_seed(seed: Self::Seed) -> Self { - Self(ChaChaCore::<$rounds, Legacy>::new( - seed.as_ref(), - &[0u8; 12], - )) + Self(ChaChaCore::<$rounds, Legacy>::new(seed.as_ref(), &[0u8; 8])) } } impl SeedableRng for $ChaChaXRng { @@ -745,6 +741,8 @@ pub(crate) mod tests { let mut rng1 = ChaChaRng::from_seed(seed); assert_eq!(rng1.next_u32(), 137206642); + assert_eq!(rng1.get_seed(), seed); + let mut rng2 = ChaChaRng::from_rng(&mut rng1); assert_eq!(rng2.next_u32(), 1325750369); } From 87adeedc32ef88194e4c668fab3bacc14ee19b95 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 10:18:22 -0500 Subject: [PATCH 49/64] added mod sealed; accidentally re-added some stuff from master, but it is up to date with master; --- chacha20/Cargo.toml | 11 +++++++++++ chacha20/src/variants.rs | 11 ++++++++++- chacha20/tests/kats.rs | 1 - 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index 2dc66f03..5d94c4c6 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -58,3 +58,14 @@ check-cfg = [ [lints.clippy] needless_range_loop = "allow" + +[lints.rust.unexpected_cfgs] +level = "warn" +check-cfg = [ + 'cfg(chacha20_force_soft)', + 'cfg(chacha20_force_sse2)', + 'cfg(chacha20_force_avx2)', +] + +[lints.clippy] +needless_range_loop = "allow" \ No newline at end of file diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index fb55f921..931b6853 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -1,8 +1,12 @@ //! ChaCha variant-specific configurations. +mod sealed { + pub trait Sealed {} +} + /// A trait that distinguishes some ChaCha variants. Contains configurations /// for "Legacy" DJB variant and the IETF variant. -pub trait Variant: Clone { +pub trait Variant: Clone + sealed::Sealed { /// Where the nonce starts in the state array. const NONCE_INDEX: usize; @@ -33,6 +37,9 @@ pub trait Variant: Clone { #[derive(Clone)] /// IETF ChaCha configuration to use a 32-bit counter and 96-bit nonce. pub struct Ietf(); + +impl sealed::Sealed for Ietf {} + impl Variant for Ietf { const NONCE_INDEX: usize = 13; type Counter = u32; @@ -57,6 +64,8 @@ impl Variant for Ietf { #[cfg(feature = "legacy")] pub struct Legacy(); +impl sealed::Sealed for Legacy {} + #[cfg(feature = "legacy")] impl Variant for Legacy { const NONCE_INDEX: usize = 14; diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index 0bd6a6b5..d0608993 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -341,7 +341,6 @@ mod legacy { let mut first_block_observation_4 = Default::default(); core.write_keystream_block(&mut first_block_observation_4); - // fails when `state[13]` changes assert_eq!(first_block_observation_1, first_block_observation_4) } } From 1dcf4538e0a02d9e34df2dc3dcdf3282bce510d1 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 10:21:22 -0500 Subject: [PATCH 50/64] re-added chacha20.yml from master... I don't know how to git --- .github/workflows/chacha20.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/chacha20.yml b/.github/workflows/chacha20.yml index a479375a..37b070ff 100644 --- a/.github/workflows/chacha20.yml +++ b/.github/workflows/chacha20.yml @@ -213,4 +213,4 @@ jobs: toolchain: ${{ matrix.rust }} targets: ${{ matrix.target }} - uses: RustCrypto/actions/cross-install@master - - run: RUSTFLAGS="${{ matrix.rustflags }}" cross test --package chacha20 --target ${{ matrix.target }} --all-features + - run: RUSTFLAGS="${{ matrix.rustflags }}" cross test --package chacha20 --target ${{ matrix.target }} --all-features \ No newline at end of file From 54cff81061f61f517db0c913f465299affab2017 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 10:26:24 -0500 Subject: [PATCH 51/64] remove duplicate entries in Cargo.toml --- chacha20/Cargo.toml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index 5d94c4c6..fc1efb4b 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -56,16 +56,5 @@ check-cfg = [ 'cfg(chacha20_force_avx2)', ] -[lints.clippy] -needless_range_loop = "allow" - -[lints.rust.unexpected_cfgs] -level = "warn" -check-cfg = [ - 'cfg(chacha20_force_soft)', - 'cfg(chacha20_force_sse2)', - 'cfg(chacha20_force_avx2)', -] - [lints.clippy] needless_range_loop = "allow" \ No newline at end of file From dde2315922630aa97b2d9b536797a301c1ab79eb Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 10:27:54 -0500 Subject: [PATCH 52/64] fixed compile error --- chacha20/src/variants.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 931b6853..c9b91731 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -64,6 +64,7 @@ impl Variant for Ietf { #[cfg(feature = "legacy")] pub struct Legacy(); +#[cfg(feature = "legacy")] impl sealed::Sealed for Legacy {} #[cfg(feature = "legacy")] From 4fae98302a7e3492d0427c64a0ccda2f6fa7d101 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 12:35:48 -0500 Subject: [PATCH 53/64] added empty lines --- chacha20/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index fc1efb4b..2dc66f03 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -57,4 +57,4 @@ check-cfg = [ ] [lints.clippy] -needless_range_loop = "allow" \ No newline at end of file +needless_range_loop = "allow" From 6cc9acafb13c9120c3ea7c36c21236e6009b55fb Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 12:52:01 -0500 Subject: [PATCH 54/64] consolidated phantomdata into one field --- chacha20/src/backends/avx2.rs | 5 +---- chacha20/src/backends/neon.rs | 4 +--- chacha20/src/backends/sse2.rs | 5 +---- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 3f1a97ac..b68a2eaa 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -58,7 +58,6 @@ where v, ctr, _pd: PhantomData, - _variant: PhantomData, }; f.call(&mut backend); @@ -94,7 +93,6 @@ where v, ctr, _pd: PhantomData, - _variant: PhantomData }; backend.rng_gen_par_ks_blocks(buffer); @@ -106,8 +104,7 @@ where struct Backend { v: [__m256i; 3], ctr: [__m256i; N], - _pd: PhantomData, - _variant: PhantomData, + _pd: PhantomData<(R, V)>, } #[cfg(feature = "cipher")] diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index ced030a0..c3ebcd67 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -22,8 +22,7 @@ use cipher::{ struct Backend { state: [uint32x4_t; 4], ctrs: [uint32x4_t; 4], - _pd: PhantomData, - _variant: PhantomData, + _pd: PhantomData<(R, V)>, } macro_rules! add_counter { @@ -58,7 +57,6 @@ impl Backend { state, ctrs, _pd: PhantomData, - _variant: PhantomData, } } } diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 01929411..481e612e 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -38,7 +38,6 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, - _variant: PhantomData, }; f.call(&mut backend); @@ -51,8 +50,7 @@ where struct Backend { v: [__m128i; 4], - _pd: PhantomData, - _variant: PhantomData, + _pd: PhantomData<(R, V)>, } #[cfg(feature = "cipher")] @@ -120,7 +118,6 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, - _variant: PhantomData, }; backend.gen_ks_blocks(buffer); From a49df62e96e2b37494d76d275486a5194a986a8d Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:10:26 -0500 Subject: [PATCH 55/64] switched to match statements in backends --- chacha20/src/backends/avx2.rs | 44 ++++++++++++++++++----------------- chacha20/src/backends/neon.rs | 20 ++++++++-------- chacha20/src/backends/sse2.rs | 41 ++++++++++++++++---------------- 3 files changed, 54 insertions(+), 51 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index b68a2eaa..7de35100 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -40,19 +40,19 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - if size_of::() == 8 { - c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); - } else { - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); - } + c = match size_of::() { + 4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)), + 8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)), + _ => unreachable!() + }; let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - if size_of::() == 8 { - c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); - } else { - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); - } + c = match size_of::() { + 4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)), + 8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)), + _ => unreachable!(), + }; } let mut backend = Backend:: { v, @@ -63,8 +63,10 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - if size_of::() == 8 { - state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + match size_of::() { + 4 => {}, + 8 => state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32, + _ => unreachable!() } } @@ -124,11 +126,11 @@ impl StreamCipherBackend for Backend { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - if size_of::() == 8 { - *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); - } else { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); - } + *c = match size_of::() { + 4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)), + 8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)), + _ => unreachable!() + }; } let res0: [__m128i; 8] = core::mem::transmute(res[0]); @@ -147,10 +149,10 @@ impl StreamCipherBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - if size_of::() == 8 { - *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); - } else { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + *c = match size_of::() { + 4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)), + 8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)), + _ => unreachable!() } } diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index c3ebcd67..67cbed49 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -27,13 +27,13 @@ struct Backend { macro_rules! add_counter { ($a:expr, $b:expr, $variant:ty) => { - if size_of::<<$variant>::Counter>() == 8 { - vreinterpretq_u32_u64(vaddq_u64( + match size_of::<<$variant>::Counter>() { + 4 => vaddq_u32($a, $b), + 8 => vreinterpretq_u32_u64(vaddq_u64( vreinterpretq_u64_u32($a), vreinterpretq_u64_u32($b), - )) - } else { - vaddq_u32($a, $b) + )), + _ => unreachable!(), } }; } @@ -74,13 +74,13 @@ where f.call(&mut backend); - if size_of::() == 8 { - vst1q_u64( + match size_of::() { + 4 => state[12] = vgetq_lane_u32(backend.state[3], 0), + 8 => vst1q_u64( state.as_mut_ptr().offset(12) as *mut u64, vreinterpretq_u64_u32(backend.state[3]), - ); - } else { - state[12] = vgetq_lane_u32(backend.state[3], 0); + ), + _ => unreachable!(), } } diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 481e612e..ecefb0b2 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -43,8 +43,9 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - if size_of::() == 8 { - state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + match size_of::() == 8 { + true => state[13] = _mm_extract_epi32(backend.v[3], 1) as u32, + false => {} } } @@ -69,11 +70,11 @@ impl StreamCipherBackend for Backend { fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v); - if size_of::() == 8 { - self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); - } else { - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); - } + self.v[3] = match size_of::() { + 4 => _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)), + 8 => _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)), + _ => unreachable!() + }; let block_ptr = block.as_mut_ptr() as *mut __m128i; for i in 0..4 { @@ -85,11 +86,11 @@ impl StreamCipherBackend for Backend { fn gen_par_ks_blocks(&mut self, blocks: &mut cipher::ParBlocks) { unsafe { let res = rounds::(&self.v); - if size_of::() == 8 { - self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)); - } else { - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)); - } + self.v[3] = match size_of::() { + 4 => _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, PAR_BLOCKS as i32)), + 8 => _mm_add_epi64(self.v[3], _mm_set_epi64x(0, PAR_BLOCKS as i64)), + _ => unreachable!() + }; let blocks_ptr = blocks.as_mut_ptr() as *mut __m128i; for block in 0..PAR_BLOCKS { @@ -150,10 +151,10 @@ impl Backend { unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_BLOCKS] { let mut res = [*v; 4]; for block in 1..PAR_BLOCKS { - if size_of::() == 8 { - res[block][3] = _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)); - } else { - res[block][3] = _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)); + res[block][3] = match size_of::() { + 4 => _mm_add_epi32(res[block][3], _mm_set_epi32(0, 0, 0, block as i32)), + 8 => _mm_add_epi64(res[block][3], _mm_set_epi64x(0, block as i64)), + _ => unreachable!() } } @@ -165,10 +166,10 @@ unsafe fn rounds(v: &[__m128i; 4]) -> [[__m128i; 4]; PAR_ for i in 0..3 { res[block][i] = _mm_add_epi32(res[block][i], v[i]); } - let ctr = if size_of::() == 8 { - _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64)) - } else { - _mm_add_epi32(v[3], _mm_set_epi32(0, 0, 0, block as i32)) + let ctr = match size_of::() { + 4 => _mm_add_epi32(v[3], _mm_set_epi32(0, 0, 0, block as i32)), + 8 => _mm_add_epi64(v[3], _mm_set_epi64x(0, block as i64)), + _ => unreachable!() }; res[block][3] = _mm_add_epi32(res[block][3], ctr); } From 5d54acaef4491860391b50202ac76a4629efd99b Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:15:13 -0500 Subject: [PATCH 56/64] rewrote soft.rs counter logic --- chacha20/src/backends/soft.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 8125b641..2be98b51 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -28,14 +28,12 @@ impl StreamCipherBackend for Backend<'_, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - let no_carry = self.0.state[12].checked_add(1); - if let Some(v) = no_carry { - self.0.state[12] = v; - } else { - self.0.state[12] = 0; - if size_of::() == 8 { - self.0.state[13] = self.0.state[13].wrapping_add(1); - } + let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); + ctr = ctr.wrapping_add(1); + self.0.state[12] = ctr as u32; + match size_of::() == 8 { + true => self.0.state[13] = (ctr >> 32) as u32, + false => {} } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { From 4b1cee10c3e7a9d9fd5207687bc4550dbca647c5 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:17:24 -0500 Subject: [PATCH 57/64] rewrote rng soft.rs counter --- chacha20/src/backends/soft.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 2be98b51..a081e1c4 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -48,13 +48,10 @@ impl Backend<'_, R, V> { pub(crate) fn gen_ks_blocks(&mut self, buffer: &mut [u32; 64]) { for i in 0..4 { let res = run_rounds::(&self.0.state); - let no_carry = self.0.state[12].checked_add(1); - if let Some(v) = no_carry { - self.0.state[12] = v; - } else { - self.0.state[12] = 0; - self.0.state[13] = self.0.state[13].wrapping_add(1); - } + let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); + ctr = ctr.wrapping_add(1); + self.0.state[12] = ctr as u32; + self.0.state[13] = (ctr >> 32) as u32; for (word, val) in buffer[i << 4..(i + 1) << 4].iter_mut().zip(res.iter()) { *word = val.to_le(); From aa76088520e7aee10d6df21747ce3021b71ccc5d Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:17:48 -0500 Subject: [PATCH 58/64] added newline --- .github/workflows/chacha20.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/chacha20.yml b/.github/workflows/chacha20.yml index 37b070ff..a479375a 100644 --- a/.github/workflows/chacha20.yml +++ b/.github/workflows/chacha20.yml @@ -213,4 +213,4 @@ jobs: toolchain: ${{ matrix.rust }} targets: ${{ matrix.target }} - uses: RustCrypto/actions/cross-install@master - - run: RUSTFLAGS="${{ matrix.rustflags }}" cross test --package chacha20 --target ${{ matrix.target }} --all-features \ No newline at end of file + - run: RUSTFLAGS="${{ matrix.rustflags }}" cross test --package chacha20 --target ${{ matrix.target }} --all-features From f17c7a1f7fc967319fa8e599fc9401d04131f2ac Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:22:11 -0500 Subject: [PATCH 59/64] fix clippy warnings --- chacha20/src/backends/soft.rs | 5 ++--- chacha20/src/backends/sse2.rs | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index a081e1c4..c012a58a 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -31,9 +31,8 @@ impl StreamCipherBackend for Backend<'_, R, V> { let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); ctr = ctr.wrapping_add(1); self.0.state[12] = ctr as u32; - match size_of::() == 8 { - true => self.0.state[13] = (ctr >> 32) as u32, - false => {} + if size_of::() == 8 { + self.0.state[13] = (ctr >> 32) as u32 } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index ecefb0b2..e2f66dc1 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -43,9 +43,8 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - match size_of::() == 8 { - true => state[13] = _mm_extract_epi32(backend.v[3], 1) as u32, - false => {} + if size_of::() == 8 { + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32 } } From 788e930313e0b3ef18004709ee6577a607dbcd9a Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 13:24:16 -0500 Subject: [PATCH 60/64] fixed another clippy warning. I thought I ran clippy already --- chacha20/src/backends/soft.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index c012a58a..619a26f3 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -28,7 +28,7 @@ impl StreamCipherBackend for Backend<'_, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); + let mut ctr = (u64::from(self.0.state[13]) << 32) | u64::from(self.0.state[12]); ctr = ctr.wrapping_add(1); self.0.state[12] = ctr as u32; if size_of::() == 8 { From 3d9b99301cd4808ba8a6a43e016c4ccb0fcbd0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sun, 24 Aug 2025 00:12:03 +0300 Subject: [PATCH 61/64] tweak variant --- chacha20/src/lib.rs | 45 +++++++++++++++++------------ chacha20/src/rng.rs | 4 +-- chacha20/src/variants.rs | 61 ++++++++++++++++------------------------ 3 files changed, 54 insertions(+), 56 deletions(-) diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index b3f8ad78..52463bad 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -206,17 +206,24 @@ cfg_if! { } /// The ChaCha core function. -#[cfg_attr(feature = "rng", derive(Clone))] pub struct ChaChaCore { /// Internal state of the core function state: [u32; STATE_WORDS], /// CPU target feature tokens #[allow(dead_code)] tokens: Tokens, - /// Number of rounds to perform - rounds: PhantomData, - /// the variant of the implementation - variant: PhantomData, + /// Number of rounds to perform and the cipher variant + _pd: PhantomData<(R, V)>, +} + +impl Clone for ChaChaCore { + fn clone(&self) -> Self { + Self { + state: self.state, + tokens: self.tokens, + _pd: PhantomData, + } + } } impl ChaChaCore { @@ -225,17 +232,21 @@ impl ChaChaCore { /// directly. fn new(key: &[u8; 32], iv: &[u8]) -> Self { let mut state = [0u32; STATE_WORDS]; - state[0..4].copy_from_slice(&CONSTANTS); - let key_chunks = key.chunks_exact(4); - for (val, chunk) in state[4..12].iter_mut().zip(key_chunks) { - *val = u32::from_le_bytes(chunk.try_into().unwrap()); + let ctr_size = size_of::() / size_of::(); + let (const_dst, state_rem) = state.split_at_mut(4); + let (key_dst, state_rem) = state_rem.split_at_mut(8); + let (_ctr_dst, iv_dst) = state_rem.split_at_mut(ctr_size); + + const_dst.copy_from_slice(&CONSTANTS); + + for (src, dst) in key.chunks_exact(4).zip(key_dst) { + *dst = u32::from_le_bytes(src.try_into().unwrap()); } - assert_eq!(iv.len(), 4 * (16 - V::NONCE_INDEX)); - let iv_chunks = iv.as_ref().chunks_exact(4); - for (val, chunk) in state[V::NONCE_INDEX..16].iter_mut().zip(iv_chunks) { - *val = u32::from_le_bytes(chunk.try_into().unwrap()); + assert_eq!(size_of_val(iv_dst), size_of_val(iv)); + for (src, dst) in iv.chunks_exact(4).zip(iv_dst) { + *dst = u32::from_le_bytes(src.try_into().unwrap()); } cfg_if! { @@ -258,8 +269,7 @@ impl ChaChaCore { Self { state, tokens, - rounds: PhantomData, - variant: PhantomData, + _pd: PhantomData, } } } @@ -270,13 +280,12 @@ impl StreamCipherSeekCore for ChaChaCore { #[inline(always)] fn get_block_pos(&self) -> Self::Counter { - V::get_block_pos(&self.state[12..V::NONCE_INDEX]) + V::get_block_pos(&self.state[12..]) } #[inline(always)] fn set_block_pos(&mut self, pos: Self::Counter) { - let block_pos_words = V::set_block_pos_helper(pos); - self.state[12..V::NONCE_INDEX].copy_from_slice(block_pos_words.as_ref()); + V::set_block_pos(&mut self.state[12..], pos); } } diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 624bb91f..4f2219fd 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -463,7 +463,7 @@ macro_rules! impl_chacha_rng { #[inline] pub fn set_stream>(&mut self, stream: S) { let stream: StreamId = stream.into(); - for (n, val) in self.core.core.0.state[Legacy::NONCE_INDEX..BLOCK_WORDS as usize] + for (n, val) in self.core.core.0.state[14..BLOCK_WORDS as usize] .as_mut() .iter_mut() .zip(stream.0.iter()) @@ -479,7 +479,7 @@ macro_rules! impl_chacha_rng { #[inline] pub fn get_stream(&self) -> u64 { let mut result = [0u8; 8]; - for (i, &big) in self.core.core.0.state[Legacy::NONCE_INDEX..BLOCK_WORDS as usize] + for (i, &big) in self.core.core.0.state[14..BLOCK_WORDS as usize] .iter() .enumerate() { diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index c9b91731..9bb0e7a0 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -6,88 +6,77 @@ mod sealed { /// A trait that distinguishes some ChaCha variants. Contains configurations /// for "Legacy" DJB variant and the IETF variant. -pub trait Variant: Clone + sealed::Sealed { - /// Where the nonce starts in the state array. - const NONCE_INDEX: usize; - - /// The counter's type. - #[cfg(feature = "cipher")] - type Counter: cipher::StreamCipherCounter; - +pub trait Variant: sealed::Sealed { /// The counter's type. #[cfg(not(feature = "cipher"))] type Counter; - /// An intermediate helper type for using generics. Should be either - /// a `[u32; 1]` or a `[u32; 2]`. - type CounterWords: AsRef<[u32]>; + /// The counter's type. + #[cfg(feature = "cipher")] + type Counter: cipher::StreamCipherCounter; /// Takes a slice of `state[12..NONCE_INDEX]` to convert it into /// `Self::Counter`. - fn get_block_pos(counter_row: &[u32]) -> Self::Counter; + fn get_block_pos(row: &[u32]) -> Self::Counter; /// Breaks down the `Self::Counter` type into a u32 array for setting the /// block pos. - fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords; + fn set_block_pos(row: &mut [u32], pos: Self::Counter); /// A helper method for calculating the remaining blocks using these types fn remaining_blocks(block_pos: Self::Counter) -> Option; } -#[derive(Clone)] /// IETF ChaCha configuration to use a 32-bit counter and 96-bit nonce. -pub struct Ietf(); +pub enum Ietf {} impl sealed::Sealed for Ietf {} impl Variant for Ietf { - const NONCE_INDEX: usize = 13; type Counter = u32; - type CounterWords = [u32; 1]; #[inline(always)] - fn get_block_pos(counter_row: &[u32]) -> Self::Counter { - counter_row[0] + fn get_block_pos(row: &[u32]) -> u32 { + row[0] } + #[inline(always)] - fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { - [value] + fn set_block_pos(row: &mut [u32], pos: u32) { + row[0] = pos; } + #[inline(always)] - fn remaining_blocks(block_pos: Self::Counter) -> Option { - (u32::MAX - block_pos).try_into().ok() + fn remaining_blocks(block_pos: u32) -> Option { + let remaining = u32::MAX - block_pos; + remaining.try_into().ok() } } /// DJB variant specific features: 64-bit counter and 64-bit nonce. -#[derive(Clone)] #[cfg(feature = "legacy")] -pub struct Legacy(); +pub enum Legacy {} #[cfg(feature = "legacy")] impl sealed::Sealed for Legacy {} #[cfg(feature = "legacy")] impl Variant for Legacy { - const NONCE_INDEX: usize = 14; type Counter = u64; - type CounterWords = [u32; 2]; #[inline(always)] - fn get_block_pos(counter_row: &[u32]) -> Self::Counter { - counter_row[0] as u64 | ((counter_row[1] as u64) << 32) + fn get_block_pos(row: &[u32]) -> u64 { + (u64::from(row[1]) << 32) | u64::from(row[0]) } + #[inline(always)] - fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { - [value as u32, (value >> 32) as u32] + fn set_block_pos(row: &mut [u32], pos: u64) { + row[0] = (pos & 0xFFFF_FFFF).try_into().unwrap(); + row[1] = (pos >> 32).try_into().unwrap(); } + #[inline(always)] - fn remaining_blocks(block_pos: Self::Counter) -> Option { + fn remaining_blocks(block_pos: u64) -> Option { let remaining = u64::MAX - block_pos; - #[cfg(target_pointer_width = "32")] - if remaining > usize::MAX as u64 { - return None; - } remaining.try_into().ok() } } From 033711c7d8aacb8370fa5e6e9758635cb7f13003 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 16:25:26 -0500 Subject: [PATCH 62/64] remove unneeded tests --- chacha20/tests/kats.rs | 112 +---------------------------------------- 1 file changed, 1 insertion(+), 111 deletions(-) diff --git a/chacha20/tests/kats.rs b/chacha20/tests/kats.rs index d0608993..4e4aa33c 100644 --- a/chacha20/tests/kats.rs +++ b/chacha20/tests/kats.rs @@ -21,7 +21,7 @@ cipher::stream_cipher_seek_test!(chacha20legacy_seek, ChaCha20Legacy); #[cfg(feature = "cipher")] mod chacha20test { use chacha20::{ChaCha20, KeyIvInit}; - use cipher::{StreamCipher, StreamCipherSeek}; + use cipher::StreamCipher; use hex_literal::hex; // @@ -94,85 +94,6 @@ mod chacha20test { cipher.apply_keystream(&mut buf); assert_eq!(&buf[..], &CIPHERTEXT[..]); } - - macro_rules! impl_chacha20_potential_counter_issue { - ($name:ident, $num_blocks:literal) => { - #[test] - fn $name() { - let mut cipher = ChaCha20::new(&KEY.into(), &IV.into()); - let mut first_4_blocks = [0u8; 256]; - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_4_blocks); - - let mut buf_1 = [0u8; $num_blocks * 64]; - let mut buf_2 = [0u8; $num_blocks * 64 + 1]; - - // seek to end of keystream - let pos = (1 << 32) * 64 - $num_blocks * 64 - 64; - cipher.try_seek(pos).unwrap(); - assert_eq!(cipher.current_pos::(), pos); - - // overshoot keystream length - let applied_keystream = cipher.try_apply_keystream(&mut buf_2); - assert_eq!(applied_keystream.is_err(), true); - - // exhaust keystream - cipher.apply_keystream(&mut buf_1); - - // verify that we cannot write another byte - assert!(cipher.try_apply_keystream(&mut [0u8; 1]).is_err()); - - // seek to beginning and check if the first block is the same as before - cipher.seek(0); - assert_eq!(cipher.current_pos::(), 0); - cipher.apply_keystream(&mut first_4_blocks); - - // if this assert fails, exhausting the keystream increments - // state[13], resulting in a different keystream when it - // should be the same - assert_eq!(first_4_blocks, [0u8; 256]); - } - }; - } - - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v2, 4); - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v3, 11); - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v4, 10); - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v5, 9); - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v6, 8); - impl_chacha20_potential_counter_issue!(chacha20_potential_counter_issue_v7, 7); - - #[test] - fn chacha20core_counter_overflow() { - use chacha20::{ChaChaCore, R20, variants::Ietf}; - use cipher::{StreamCipherCore, StreamCipherSeekCore}; - let mut core = ChaChaCore::::new(&KEY.into(), &IV.into()); - - // observe the first block two "different" ways - let mut first_block_observation_1 = Default::default(); - core.write_keystream_block(&mut first_block_observation_1); - core.set_block_pos(0); - let mut first_block_observation_2 = Default::default(); - core.write_keystream_block(&mut first_block_observation_2); - // proof that setting block pos to 0 results in first_block_observation_1 - assert_eq!(first_block_observation_1, first_block_observation_2); - - // try to make the counter overflow/wrap - core.set_block_pos(u32::MAX); - core.write_keystream_block(&mut Default::default()); - - let mut first_block_observation_3 = Default::default(); - core.write_keystream_block(&mut first_block_observation_3); - // fails if the counter doesn't wrap - assert_eq!(first_block_observation_1, first_block_observation_3); - - core.set_block_pos(0); - let mut first_block_observation_4 = Default::default(); - core.write_keystream_block(&mut first_block_observation_4); - - // fails when `state[13]` changes - assert_eq!(first_block_observation_1, first_block_observation_4) - } } #[rustfmt::skip] @@ -312,35 +233,4 @@ mod legacy { } } } - - #[test] - fn chacha20core_counter_overflow() { - use cipher::{StreamCipherCore, StreamCipherSeekCore}; - use chacha20::{ChaChaCore, R20, variants::Legacy}; - let mut core = ChaChaCore::::new(&KEY_LONG.into(), &IV_LONG.into()); - - // observe the first block two "different" ways - let mut first_block_observation_1 = Default::default(); - core.write_keystream_block(&mut first_block_observation_1); - core.set_block_pos(0); - let mut first_block_observation_2 = Default::default(); - core.write_keystream_block(&mut first_block_observation_2); - // proof that setting block pos to 0 results in first_block_observation_1 - assert_eq!(first_block_observation_1, first_block_observation_2); - - // try to make the counter wrap - core.set_block_pos(u64::MAX); - core.write_keystream_block(&mut Default::default()); - - let mut first_block_observation_3 = Default::default(); - core.write_keystream_block(&mut first_block_observation_3); - // fails if the counter doesn't wrap - assert_eq!(first_block_observation_1, first_block_observation_3); - - core.set_block_pos(0); - let mut first_block_observation_4 = Default::default(); - core.write_keystream_block(&mut first_block_observation_4); - - assert_eq!(first_block_observation_1, first_block_observation_4) - } } From 1c2afcc414eaca2338799dfdfa41479f2c359222 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 23 Aug 2025 19:54:22 -0500 Subject: [PATCH 63/64] added test vectors found on github from PyCA --- chacha20/src/rng.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 4f2219fd..88b21a1c 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -608,6 +608,8 @@ impl_chacha_rng!(ChaCha20Rng, ChaCha20Core, R20, abst20); #[cfg(test)] pub(crate) mod tests { + use hex_literal::hex; + use super::*; const KEY: [u8; 32] = [ @@ -1201,4 +1203,36 @@ pub(crate) mod tests { ); assert_ne!(&first_blocks[0..64 * 4], &result[64..]); } + + /// Test vector 8 from https://github.com/pyca/cryptography/blob/main/vectors/cryptography_vectors/ciphers/ChaCha20/counter-overflow.txt + #[test] + fn counter_overflow_1() { + let mut rng = ChaCha20Rng::from_seed([0u8; 32]); + let block_pos = 4294967295; + assert_eq!(block_pos, u32::MAX as u64); + rng.set_block_pos(4294967295); + + let mut output = [0u8; 64 * 3]; + rng.fill_bytes(&mut output); + let expected = hex!( + "ace4cd09e294d1912d4ad205d06f95d9c2f2bfcf453e8753f128765b62215f4d92c74f2f626c6a640c0b1284d839ec81f1696281dafc3e684593937023b58b1d3db41d3aa0d329285de6f225e6e24bd59c9a17006943d5c9b680e3873bdc683a5819469899989690c281cd17c96159af0682b5b903468a61f50228cf09622b5a46f0f6efee15c8f1b198cb49d92b990867905159440cc723916dc0012826981039ce1766aa2542b05db3bd809ab142489d5dbfe1273e7399637b4b3213768aaa" + ); + assert_eq!(expected, output); + } + + /// Test vector 9 from https://github.com/pyca/cryptography/blob/main/vectors/cryptography_vectors/ciphers/ChaCha20/counter-overflow.txt + #[test] + fn counter_wrap_1() { + let mut rng = ChaCha20Rng::from_seed([0u8; 32]); + let block_pos = 18446744073709551615; + assert_eq!(block_pos, u64::MAX); + rng.set_block_pos(block_pos); + + let mut output = [0u8; 64 * 3]; + rng.fill_bytes(&mut output); + let expected = hex!( + "d7918cd8620cf832532652c04c01a553092cfb32e7b3f2f5467ae9674a2e9eec17368ec8027a357c0c51e6ea747121fec45284be0f099d2b3328845607b1768976b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee65869f07e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f" + ); + assert_eq!(expected, output); + } } From a5cb4389150e0e23ca38444327ce872c5238f6ef Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Tue, 26 Aug 2025 10:33:08 -0500 Subject: [PATCH 64/64] fixed endian issue for the rng regarding #447 --- chacha20/src/backends/soft.rs | 10 ++++++---- chacha20/src/rng.rs | 6 +----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 619a26f3..3ce4f6b1 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -11,6 +11,9 @@ use cipher::{ consts::{U1, U64}, }; +#[cfg(feature = "rng")] +use crate::rng::BLOCK_WORDS; + pub(crate) struct Backend<'a, R: Rounds, V: Variant>(pub(crate) &'a mut ChaChaCore); #[cfg(feature = "cipher")] @@ -45,16 +48,15 @@ impl StreamCipherBackend for Backend<'_, R, V> { impl Backend<'_, R, V> { #[inline(always)] pub(crate) fn gen_ks_blocks(&mut self, buffer: &mut [u32; 64]) { - for i in 0..4 { + for block in 0..4 { let res = run_rounds::(&self.0.state); let mut ctr = u64::from(self.0.state[13]) << 32 | u64::from(self.0.state[12]); ctr = ctr.wrapping_add(1); self.0.state[12] = ctr as u32; self.0.state[13] = (ctr >> 32) as u32; - for (word, val) in buffer[i << 4..(i + 1) << 4].iter_mut().zip(res.iter()) { - *word = val.to_le(); - } + buffer[block * BLOCK_WORDS as usize..(block + 1) * BLOCK_WORDS as usize] + .copy_from_slice(&res); } } } diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 88b21a1c..f58152fe 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -27,7 +27,7 @@ use crate::{ use cfg_if::cfg_if; /// Number of 32-bit words per ChaCha block (fixed by algorithm definition). -const BLOCK_WORDS: u8 = 16; +pub(crate) const BLOCK_WORDS: u8 = 16; /// The seed for ChaCha20. Implements ZeroizeOnDrop when the /// zeroize feature is enabled. @@ -590,10 +590,6 @@ macro_rules! impl_chacha_rng { #[inline] fn generate(&mut self, r: &mut Self::Results) { self.0.generate(&mut r.0); - #[cfg(target_endian = "big")] - for word in r.0.iter_mut() { - *word = word.to_le(); - } } } };