Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
86341e0
added newtype that should be able to simulate a 64-bit counter; needs…
nstilt1 Aug 14, 2025
22d06d5
fixed some unused warnings the only way I know that I can
nstilt1 Aug 14, 2025
3bcd93b
attempted to restrict block pos to multiple of 4, all tests pass
nstilt1 Aug 15, 2025
768b0c8
added to_le()
nstilt1 Aug 15, 2025
385ebea
updated backends to use a 64-bit counter
nstilt1 Aug 15, 2025
dbcdb41
ignored counter_wrapping rng test
nstilt1 Aug 15, 2025
8a8f8fe
undid block pos multiple of 4
nstilt1 Aug 15, 2025
5ae5b22
fixed sse2 tests
nstilt1 Aug 15, 2025
5a15d23
fixed neon.rs syntax
nstilt1 Aug 15, 2025
87cb35d
fixed neon.rs syntax again
nstilt1 Aug 15, 2025
a671af7
fixed neon code; tested with cross
nstilt1 Aug 15, 2025
9235c1e
impled remaining_blocks()
nstilt1 Aug 15, 2025
655d7fc
fixed test_wrapping_add; not quite perfect
nstilt1 Aug 15, 2025
f2a24c6
fixed test_chacha_word_pos_wrap_excess; not the cleanest fix
nstilt1 Aug 15, 2025
19dc665
fixed test_chacha_word_pos_wrap_exact; not the cleanest fix again
nstilt1 Aug 15, 2025
83d7063
All tests pass on avx2, even the doctests
nstilt1 Aug 15, 2025
23a335d
updated paragraph that was previously marked 'TODO'
nstilt1 Aug 16, 2025
9505bec
fixed set/get_block_pos; also added a cipher test to see if I could b…
nstilt1 Aug 16, 2025
7481ed1
fix unused import
nstilt1 Aug 16, 2025
4d0e0d0
borrowed tests from rand_chacha to replace the ones we had that I had…
nstilt1 Aug 16, 2025
e2b8e41
unignored tests
nstilt1 Aug 16, 2025
77354d0
fixed counter wrapping tests
nstilt1 Aug 16, 2025
c13cd53
fmt
nstilt1 Aug 16, 2025
64662db
avx2 passes tests now, but sse2 and neon don't
nstilt1 Aug 16, 2025
32f733c
fixed sse2!
nstilt1 Aug 16, 2025
d889eb8
fixed neon! same problem sse2 had
nstilt1 Aug 16, 2025
faa32a5
fixed cipher neon code
nstilt1 Aug 16, 2025
8dd7db8
missed something in neon when copying code changes
nstilt1 Aug 16, 2025
b26db0d
added multiple counter tests using a macro
nstilt1 Aug 17, 2025
2e4cf10
fmt
nstilt1 Aug 17, 2025
a6962aa
updated docs for set_stream() and added another equivalence test with…
nstilt1 Aug 17, 2025
56e1305
rerun PR tests because they failed to execute the tests
nstilt1 Aug 18, 2025
289f2bc
fix failing tests because apparently there can't be 2 chacha20 packages
nstilt1 Aug 18, 2025
ddeba24
proof that restricting block_pos to 0..max instead of 0..=max passes …
nstilt1 Aug 19, 2025
eff2b26
proof that fixing the '- 64' and 'remaining_blocks()' fails the test,…
nstilt1 Aug 19, 2025
b29d769
proof that the backends can be fixed to prevent overflowing
nstilt1 Aug 19, 2025
f304e14
fix imports
nstilt1 Aug 19, 2025
acaf4be
added conditional compilation for IETF remaining_blocks()
nstilt1 Aug 19, 2025
0834318
fix unreachable expression :/
nstilt1 Aug 19, 2025
9de7c2f
corrected code for #438; results in big endian issues...
nstilt1 Aug 19, 2025
a1535f5
fixed endianness thanks to dhardy
nstilt1 Aug 19, 2025
b6e7054
fmt
nstilt1 Aug 19, 2025
d42b703
forbidden use of exhausted/wrapped cipher regarding #444, passes test
nstilt1 Aug 19, 2025
7b73d5c
reverted 'remaining_blocks()' and backends due to #444
nstilt1 Aug 19, 2025
62a7e94
implemented 32-bit counter. edited docs a little. implemented core wr…
nstilt1 Aug 22, 2025
de470e1
removed passing counter overflow detection test because get_core() wa…
nstilt1 Aug 22, 2025
bf8845d
made variants public; edited docs a little; added counter wrapping te…
nstilt1 Aug 23, 2025
b222fea
resolve conflicts, but removing the big endian cfg results in a lot o…
nstilt1 Aug 23, 2025
cc0d864
Merge branch 'master' into 64-bit-counter-support-newtype
nstilt1 Aug 23, 2025
10043e8
Merge branch 'master' into 64-bit-counter-support-newtype
nstilt1 Aug 23, 2025
87adeed
added mod sealed; accidentally re-added some stuff from master, but i…
nstilt1 Aug 23, 2025
1dcf453
re-added chacha20.yml from master... I don't know how to git
nstilt1 Aug 23, 2025
54cff81
remove duplicate entries in Cargo.toml
nstilt1 Aug 23, 2025
dde2315
fixed compile error
nstilt1 Aug 23, 2025
4fae983
added empty lines
nstilt1 Aug 23, 2025
6cc9aca
consolidated phantomdata into one field
nstilt1 Aug 23, 2025
a49df62
switched to match statements in backends
nstilt1 Aug 23, 2025
5d54aca
rewrote soft.rs counter logic
nstilt1 Aug 23, 2025
4b1cee1
rewrote rng soft.rs counter
nstilt1 Aug 23, 2025
aa76088
added newline
nstilt1 Aug 23, 2025
f17c7a1
fix clippy warnings
nstilt1 Aug 23, 2025
788e930
fixed another clippy warning. I thought I ran clippy already
nstilt1 Aug 23, 2025
3d9b993
tweak variant
newpavlov Aug 23, 2025
033711c
remove unneeded tests
nstilt1 Aug 23, 2025
1c2afcc
added test vectors found on github from PyCA
nstilt1 Aug 24, 2025
a5cb438
fixed endian issue for the rng regarding #447
nstilt1 Aug 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 41 additions & 18 deletions chacha20/src/backends/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#![allow(unsafe_op_in_unsafe_fn)]
use crate::Rounds;
use crate::{Rounds, Variant};
use core::marker::PhantomData;

#[cfg(feature = "rng")]
use crate::{ChaChaCore, Variant};
use crate::ChaChaCore;

#[cfg(feature = "cipher")]
use crate::{chacha::Block, STATE_WORDS};
Expand All @@ -27,10 +27,11 @@ const N: usize = PAR_BLOCKS / 2;
#[inline]
#[target_feature(enable = "avx2")]
#[cfg(feature = "cipher")]
pub(crate) unsafe fn inner<R, F>(state: &mut [u32; STATE_WORDS], f: F)
pub(crate) unsafe fn inner<R, F, V>(state: &mut [u32; STATE_WORDS], f: F)
where
R: Rounds,
F: StreamCipherClosure<BlockSize = U64>,
V: Variant,
{
let state_ptr = state.as_ptr() as *const __m128i;
let v = [
Expand All @@ -39,13 +40,21 @@ where
_mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))),
];
let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3)));
c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0));
c = match size_of::<V::Counter>() {
4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)),
8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)),
_ => unreachable!()
};
let mut ctr = [c; N];
for i in 0..N {
ctr[i] = c;
c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2));
c = match size_of::<V::Counter>() {
4 => _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)),
8 => _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)),
_ => unreachable!(),
};
}
let mut backend = Backend::<R> {
let mut backend = Backend::<R, V> {
v,
ctr,
_pd: PhantomData,
Expand All @@ -54,6 +63,11 @@ where
f.call(&mut backend);

state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32;
match size_of::<V::Counter>() {
4 => {},
8 => state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32,
_ => unreachable!()
}
}

#[inline]
Expand All @@ -71,13 +85,13 @@ where
_mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))),
];
let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3)));
c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0));
c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0));
let mut ctr = [c; N];
for i in 0..N {
ctr[i] = c;
c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2));
c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2));
}
let mut backend = Backend::<R> {
let mut backend = Backend::<R, V> {
v,
ctr,
_pd: PhantomData,
Expand All @@ -86,32 +100,37 @@ where
backend.rng_gen_par_ks_blocks(buffer);

core.state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32;
core.state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32;
}

struct Backend<R: Rounds> {
struct Backend<R: Rounds, V: Variant> {
v: [__m256i; 3],
ctr: [__m256i; N],
_pd: PhantomData<R>,
_pd: PhantomData<(R, V)>,
}

#[cfg(feature = "cipher")]
impl<R: Rounds> BlockSizeUser for Backend<R> {
impl<R: Rounds, V: Variant> BlockSizeUser for Backend<R, V> {
type BlockSize = U64;
}

#[cfg(feature = "cipher")]
impl<R: Rounds> ParBlocksSizeUser for Backend<R> {
impl<R: Rounds, V: Variant> ParBlocksSizeUser for Backend<R, V> {
type ParBlocksSize = U4;
}

#[cfg(feature = "cipher")]
impl<R: Rounds> StreamCipherBackend for Backend<R> {
impl<R: Rounds, V: Variant> StreamCipherBackend for Backend<R, V> {
#[inline(always)]
fn gen_ks_block(&mut self, block: &mut Block) {
unsafe {
let res = rounds::<R>(&self.v, &self.ctr);
for c in self.ctr.iter_mut() {
*c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1));
*c = match size_of::<V::Counter>() {
4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)),
8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)),
_ => unreachable!()
};
}

let res0: [__m128i; 8] = core::mem::transmute(res[0]);
Expand All @@ -130,7 +149,11 @@ impl<R: Rounds> StreamCipherBackend for Backend<R> {

let pb = PAR_BLOCKS as i32;
for c in self.ctr.iter_mut() {
*c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb));
*c = match size_of::<V::Counter>() {
4 => _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)),
8 => _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)),
_ => unreachable!()
}
}

let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i;
Expand All @@ -147,15 +170,15 @@ impl<R: Rounds> StreamCipherBackend for Backend<R> {
}

#[cfg(feature = "rng")]
impl<R: Rounds> Backend<R> {
impl<R: Rounds, V: Variant> Backend<R, V> {
#[inline(always)]
fn rng_gen_par_ks_blocks(&mut self, blocks: &mut [u32; 64]) {
unsafe {
let vs = rounds::<R>(&self.v, &self.ctr);

let pb = PAR_BLOCKS as i32;
for c in self.ctr.iter_mut() {
*c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb));
*c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64));
}

let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i;
Expand Down
90 changes: 62 additions & 28 deletions chacha20/src/backends/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
//! Adapted from the Crypto++ `chacha_simd` implementation by Jack Lloyd and
//! Jeffrey Walton (public domain).

use crate::{Rounds, STATE_WORDS};
use crate::{Rounds, STATE_WORDS, Variant};
use core::{arch::aarch64::*, marker::PhantomData};

#[cfg(feature = "rand_core")]
use crate::{ChaChaCore, Variant};
use crate::ChaChaCore;

#[cfg(feature = "cipher")]
use crate::chacha::Block;
Expand All @@ -19,13 +19,26 @@ use cipher::{
consts::{U4, U64},
};

struct Backend<R: Rounds> {
struct Backend<R: Rounds, V: Variant> {
state: [uint32x4_t; 4],
ctrs: [uint32x4_t; 4],
_pd: PhantomData<R>,
_pd: PhantomData<(R, V)>,
}

impl<R: Rounds> Backend<R> {
macro_rules! add_counter {
($a:expr, $b:expr, $variant:ty) => {
match size_of::<<$variant>::Counter>() {
4 => vaddq_u32($a, $b),
8 => vreinterpretq_u32_u64(vaddq_u64(
vreinterpretq_u64_u32($a),
vreinterpretq_u64_u32($b),
)),
_ => unreachable!(),
}
};
}

impl<R: Rounds, V: Variant> Backend<R, V> {
#[inline]
unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self {
let state = [
Expand All @@ -40,7 +53,7 @@ impl<R: Rounds> Backend<R> {
vld1q_u32([3, 0, 0, 0].as_ptr()),
vld1q_u32([4, 0, 0, 0].as_ptr()),
];
Backend::<R> {
Backend::<R, V> {
state,
ctrs,
_pd: PhantomData,
Expand All @@ -51,16 +64,24 @@ impl<R: Rounds> Backend<R> {
#[inline]
#[cfg(feature = "cipher")]
#[target_feature(enable = "neon")]
pub(crate) unsafe fn inner<R, F>(state: &mut [u32; STATE_WORDS], f: F)
pub(crate) unsafe fn inner<R, F, V>(state: &mut [u32; STATE_WORDS], f: F)
where
R: Rounds,
F: StreamCipherClosure<BlockSize = U64>,
V: Variant,
{
let mut backend = Backend::<R>::new(state);
let mut backend = Backend::<R, V>::new(state);

f.call(&mut backend);

vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]);
match size_of::<V::Counter>() {
4 => state[12] = vgetq_lane_u32(backend.state[3], 0),
8 => vst1q_u64(
state.as_mut_ptr().offset(12) as *mut u64,
vreinterpretq_u64_u32(backend.state[3]),
),
_ => unreachable!(),
}
}

#[inline]
Expand All @@ -73,19 +94,22 @@ where
R: Rounds,
V: Variant,
{
let mut backend = Backend::<R>::new(&mut core.state);
let mut backend = Backend::<R, V>::new(&mut core.state);

backend.write_par_ks_blocks(buffer);

vst1q_u32(core.state.as_mut_ptr().offset(12), backend.state[3]);
vst1q_u64(
core.state.as_mut_ptr().offset(12) as *mut u64,
vreinterpretq_u64_u32(backend.state[3]),
);
}

#[cfg(feature = "cipher")]
impl<R: Rounds> BlockSizeUser for Backend<R> {
impl<R: Rounds, V: Variant> BlockSizeUser for Backend<R, V> {
type BlockSize = U64;
}
#[cfg(feature = "cipher")]
impl<R: Rounds> ParBlocksSizeUser for Backend<R> {
impl<R: Rounds, V: Variant> ParBlocksSizeUser for Backend<R, V> {
type ParBlocksSize = U4;
}

Expand All @@ -97,15 +121,15 @@ macro_rules! add_assign_vec {
}

#[cfg(feature = "cipher")]
impl<R: Rounds> StreamCipherBackend for Backend<R> {
impl<R: Rounds, V: Variant> StreamCipherBackend for Backend<R, V> {
#[inline(always)]
fn gen_ks_block(&mut self, block: &mut Block) {
let state3 = self.state[3];
let mut par = ParBlocks::<Self>::default();
self.gen_par_ks_blocks(&mut par);
*block = par[0];
unsafe {
self.state[3] = vaddq_u32(state3, vld1q_u32([1, 0, 0, 0].as_ptr()));
self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr()), V);
}
}

Expand All @@ -118,19 +142,19 @@ impl<R: Rounds> StreamCipherBackend for Backend<R> {
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[0]),
add_counter!(self.state[3], self.ctrs[0], V),
],
[
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[1]),
add_counter!(self.state[3], self.ctrs[1], V),
],
[
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[2]),
add_counter!(self.state[3], self.ctrs[2], V),
],
];

Expand All @@ -140,11 +164,16 @@ impl<R: Rounds> StreamCipherBackend for Backend<R> {

for block in 0..4 {
// add state to block
for state_row in 0..4 {
for state_row in 0..3 {
add_assign_vec!(blocks[block][state_row], self.state[state_row]);
}
if block > 0 {
blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]);
add_assign_vec!(
blocks[block][3],
add_counter!(self.state[3], self.ctrs[block - 1], V)
);
} else {
add_assign_vec!(blocks[block][3], self.state[3]);
}
// write blocks to dest
for state_row in 0..4 {
Expand All @@ -154,7 +183,7 @@ impl<R: Rounds> StreamCipherBackend for Backend<R> {
);
}
}
self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]);
self.state[3] = add_counter!(self.state[3], self.ctrs[3], V);
}
}
}
Expand All @@ -180,7 +209,7 @@ macro_rules! extract {
};
}

impl<R: Rounds> Backend<R> {
impl<R: Rounds, V: Variant> Backend<R, V> {
#[inline(always)]
/// Generates `num_blocks` blocks and blindly writes them to `dest_ptr`
///
Expand All @@ -197,19 +226,19 @@ impl<R: Rounds> Backend<R> {
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[0]),
add_counter!(self.state[3], self.ctrs[0], V),
],
[
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[1]),
add_counter!(self.state[3], self.ctrs[1], V),
],
[
self.state[0],
self.state[1],
self.state[2],
vaddq_u32(self.state[3], self.ctrs[2]),
add_counter!(self.state[3], self.ctrs[2], V),
],
];

Expand All @@ -220,11 +249,16 @@ impl<R: Rounds> Backend<R> {
let mut dest_ptr = buffer.as_mut_ptr() as *mut u8;
for block in 0..4 {
// add state to block
for state_row in 0..4 {
for state_row in 0..3 {
add_assign_vec!(blocks[block][state_row], self.state[state_row]);
}
if block > 0 {
blocks[block][3] = vaddq_u32(blocks[block][3], self.ctrs[block - 1]);
add_assign_vec!(
blocks[block][3],
add_counter!(self.state[3], self.ctrs[block - 1], V)
);
} else {
add_assign_vec!(blocks[block][3], self.state[3]);
}
// write blocks to buffer
for state_row in 0..4 {
Expand All @@ -235,7 +269,7 @@ impl<R: Rounds> Backend<R> {
}
dest_ptr = dest_ptr.add(64);
}
self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]);
self.state[3] = add_counter!(self.state[3], self.ctrs[3], V);
}
}

Expand Down
Loading