diff --git a/crypto/fipsmodule/ec/gfp_p256.c b/crypto/fipsmodule/ec/gfp_p256.c index 2aa0ae2ce2..f74613f946 100644 --- a/crypto/fipsmodule/ec/gfp_p256.c +++ b/crypto/fipsmodule/ec/gfp_p256.c @@ -13,42 +13,3 @@ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "./p256_shared.h" - -#include "../../limbs/limbs.h" - -#if !defined(OPENSSL_USE_NISTZ256) - -typedef Limb ScalarMont[P256_LIMBS]; -typedef Limb Scalar[P256_LIMBS]; - -#include "../bn/internal.h" - -static const BN_ULONG N[P256_LIMBS] = { -#if defined(OPENSSL_64_BIT) - 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 -#else - 0xfc632551, 0xf3b9cac2, 0xa7179e84, 0xbce6faad, 0xffffffff, 0xffffffff, 0, - 0xffffffff -#endif -}; - -static const BN_ULONG N_N0[] = { - BN_MONT_CTX_N0(0xccd1c8aa, 0xee00bc4f) -}; - -void p256_scalar_mul_mont(ScalarMont r, const ScalarMont a, - const ScalarMont b) { - /* XXX: Inefficient. TODO: optimize with dedicated multiplication routine. */ - bn_mul_mont_small(r, a, b, N, N_N0, P256_LIMBS); -} - -/* XXX: Inefficient. TODO: optimize with dedicated squaring routine. */ -void p256_scalar_sqr_rep_mont(ScalarMont r, const ScalarMont a, Limb rep) { - dev_assert_secret(rep >= 1); - p256_scalar_mul_mont(r, a, a); - for (Limb i = 1; i < rep; ++i) { - p256_scalar_mul_mont(r, r, r); - } -} - -#endif diff --git a/crypto/fipsmodule/ec/gfp_p384.c b/crypto/fipsmodule/ec/gfp_p384.c index 67f8931b89..780d14e224 100644 --- a/crypto/fipsmodule/ec/gfp_p384.c +++ b/crypto/fipsmodule/ec/gfp_p384.c @@ -39,16 +39,6 @@ static const BN_ULONG Q[P384_LIMBS] = { #endif }; -static const BN_ULONG N[P384_LIMBS] = { -#if defined(OPENSSL_64_BIT) - 0xecec196accc52973, 0x581a0db248b0a77a, 0xc7634d81f4372ddf, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff -#else - 0xccc52973, 0xecec196a, 0x48b0a77a, 0x581a0db2, 0xf4372ddf, 0xc7634d81, - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff -#endif -}; - static const BN_ULONG ONE[P384_LIMBS] = { #if defined(OPENSSL_64_BIT) 0xffffffff00000001, 0xffffffff, 1, 0, 0 @@ -71,10 +61,6 @@ static const BN_ULONG Q_N0[] = { BN_MONT_CTX_N0(1, 1) }; -static const BN_ULONG N_N0[] = { - BN_MONT_CTX_N0(0x6ed46089, 0xe88fdc45) -}; - /* XXX: MSVC for x86 warns when it fails to inline these functions it should * probably inline. */ #if defined(_MSC_VER) && !defined(__clang__) && defined(OPENSSL_X86) @@ -212,13 +198,6 @@ void p384_elem_neg(Elem r, const Elem a) { } -void p384_scalar_mul_mont(ScalarMont r, const ScalarMont a, - const ScalarMont b) { - /* XXX: Inefficient. TODO: Add dedicated multiplication routine. */ - bn_mul_mont_small(r, a, b, N, N_N0, P384_LIMBS); -} - - /* TODO(perf): Optimize this. */ static void p384_point_select_w5(P384_POINT *out, diff --git a/src/arithmetic/inout.rs b/src/arithmetic/inout.rs index 54a5400348..4366320290 100644 --- a/src/arithmetic/inout.rs +++ b/src/arithmetic/inout.rs @@ -237,3 +237,36 @@ where ra.with_potentially_dangling_non_null_pointers_ra(expected_len, |r, a| f(r, a, b.as_ptr())) } } + +pub struct AliasingSlices3FromRawParts { + r: *mut T, + a: *const T, + b: *const T, + len: NonZeroUsize, +} + +impl AliasingSlices3FromRawParts { + #[inline(always)] + pub unsafe fn new_rab_unchecked( + r: *mut T, + a: *const T, + b: *const T, + len: NonZeroUsize, + ) -> Self { + Self { r, a, b, len } + } +} + +impl AliasingSlices3 for AliasingSlices3FromRawParts { + #[inline(always)] + fn with_potentially_dangling_non_null_pointers_rab( + self, + expected_len: usize, + f: impl FnOnce(*mut T, *const T, *const T) -> R, + ) -> Result { + if expected_len != self.len.get() { + return Err(LenMismatchError::new(self.len.get())); + } + Ok(f(self.r, self.a, self.b)) + } +} diff --git a/src/arithmetic/montgomery.rs b/src/arithmetic/montgomery.rs index 091c2d7f29..cd1f48a6c1 100644 --- a/src/arithmetic/montgomery.rs +++ b/src/arithmetic/montgomery.rs @@ -123,7 +123,7 @@ impl ProductEncoding for (RRR, RInverse) { use crate::{bssl, c, limb::Limb}; #[inline(always)] -pub(super) fn limbs_mul_mont( +pub(crate) fn limbs_mul_mont( in_out: impl AliasingSlices3, n: &[Limb], n0: &N0, diff --git a/src/ec/suite_b/ops.rs b/src/ec/suite_b/ops.rs index 7c2ed7f208..1e25748279 100644 --- a/src/ec/suite_b/ops.rs +++ b/src/ec/suite_b/ops.rs @@ -908,9 +908,7 @@ mod tests { #[test] fn p256_scalar_square_test() { - prefixed_extern! { - fn p256_scalar_sqr_rep_mont(r: *mut Limb, a: *const Limb, rep: LeakyWord); - } + use super::p256::p256_scalar_sqr_rep_mont; scalar_square_test( &p256::SCALAR_OPS, p256_scalar_sqr_rep_mont, diff --git a/src/ec/suite_b/ops/elem.rs b/src/ec/suite_b/ops/elem.rs index 22c60161ea..7970f661dc 100644 --- a/src/ec/suite_b/ops/elem.rs +++ b/src/ec/suite_b/ops/elem.rs @@ -35,8 +35,8 @@ impl NumLimbs { pub(super) const fn into(self) -> usize { match self { - NumLimbs::P256 => P256_NUM_LIMBS, - NumLimbs::P384 => P384_NUM_LIMBS, + NumLimbs::P256 => P256_NUM_LIMBS.get(), + NumLimbs::P384 => P384_NUM_LIMBS.get(), } } } diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index cd5806606e..99ec12c292 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -16,9 +16,11 @@ use super::{ elem::{binary_op, binary_op_assign}, elem_sqr_mul, elem_sqr_mul_acc, PublicModulus, *, }; +use crate::polyfill::unwrap_const; use cfg_if::cfg_if; +use core::num::NonZeroUsize; -pub(super) const NUM_LIMBS: usize = 256 / LIMB_BITS; +pub(super) const NUM_LIMBS: NonZeroUsize = unwrap_const(NonZeroUsize::new(256 / LIMB_BITS)); pub static COMMON_OPS: CommonOps = CommonOps { num_limbs: elem::NumLimbs::P256, @@ -119,6 +121,54 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { scalar_mul_mont: p256_scalar_mul_mont, }; +cfg_if! { + if #[cfg(any(all(target_arch = "aarch64", target_endian = "little"), + target_arch = "x86_64"))] { + prefixed_extern! { + fn p256_scalar_mul_mont( + r: *mut Limb, // [COMMON_OPS.num_limbs] + a: *const Limb, // [COMMON_OPS.num_limbs] + b: *const Limb); // [COMMON_OPS.num_limbs] + pub(super) fn p256_scalar_sqr_rep_mont( + r: *mut Limb, // [COMMON_OPS.num_limbs] + a: *const Limb, // [COMMON_OPS.num_limbs] + rep: LeakyWord); + } + } else { + use crate::arithmetic::{inout::AliasingSlices3FromRawParts, LimbSliceError}; + + static N_N0: N0 = N0::precalculated(0xccd1c8aa_ee00bc4f); + + unsafe extern "C" fn p256_scalar_mul_mont( + r: *mut Limb, // [COMMON_OPS.num_limbs] + a: *const Limb, // [COMMON_OPS.num_limbs] + b: *const Limb, // [COMMON_OPS.num_limbs] + ) { + // XXX: Inefficient. TODO: optimize with dedicated multiplication routine + // TODO: Caller should pass in an `impl AliasingSlices3`. + let in_out = unsafe { AliasingSlices3FromRawParts::new_rab_unchecked(r, a, b, NUM_LIMBS) }; + let n = &COMMON_OPS.n.limbs[..NUM_LIMBS.get()]; + let cpu = cpu::features(); // TODO: caller should supply this + limbs_mul_mont(in_out, n, &N_N0, cpu).unwrap_or_else(|e| match e { + LimbSliceError::LenMismatch(_) + | LimbSliceError::TooShort(_) + | LimbSliceError::TooLong(_) => unreachable!(), + }) + } + + pub(super) unsafe extern "C" fn p256_scalar_sqr_rep_mont( + r: *mut Limb, // [COMMON_OPS.num_limbs] + a: *const Limb, // [COMMON_OPS.num_limbs] + rep: LeakyWord) { + debug_assert!(rep >= 1); + unsafe { p256_scalar_mul_mont(r, a, a); } + for _ in 1..rep { + unsafe { p256_scalar_mul_mont(r, r, r); } + } + } + } +} + pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, @@ -307,17 +357,6 @@ prefixed_extern! { p_x: *const Limb, // [COMMON_OPS.num_limbs] p_y: *const Limb, // [COMMON_OPS.num_limbs] ); - - fn p256_scalar_mul_mont( - r: *mut Limb, // [COMMON_OPS.num_limbs] - a: *const Limb, // [COMMON_OPS.num_limbs] - b: *const Limb, // [COMMON_OPS.num_limbs] - ); - fn p256_scalar_sqr_rep_mont( - r: *mut Limb, // [COMMON_OPS.num_limbs] - a: *const Limb, // [COMMON_OPS.num_limbs] - rep: LeakyWord, - ); } #[cfg(test)] diff --git a/src/ec/suite_b/ops/p384.rs b/src/ec/suite_b/ops/p384.rs index 9d541264c0..03d1318c67 100644 --- a/src/ec/suite_b/ops/p384.rs +++ b/src/ec/suite_b/ops/p384.rs @@ -16,8 +16,13 @@ use super::{ elem::{binary_op, binary_op_assign}, elem_sqr_mul, elem_sqr_mul_acc, PublicModulus, *, }; +use crate::{ + arithmetic::{inout::AliasingSlices3FromRawParts, LimbSliceError}, + polyfill::unwrap_const, +}; +use core::num::NonZeroUsize; -pub(super) const NUM_LIMBS: usize = 384 / LIMB_BITS; +pub(super) const NUM_LIMBS: NonZeroUsize = unwrap_const(NonZeroUsize::new(384 / LIMB_BITS)); pub static COMMON_OPS: CommonOps = CommonOps { num_limbs: elem::NumLimbs::P384, @@ -120,6 +125,25 @@ pub static SCALAR_OPS: ScalarOps = ScalarOps { scalar_mul_mont: p384_scalar_mul_mont, }; +static N_N0: N0 = N0::precalculated(0x6ed46089_e88fdc45); + +unsafe extern "C" fn p384_scalar_mul_mont( + r: *mut Limb, // [COMMON_OPS.num_limbs] + a: *const Limb, // [COMMON_OPS.num_limbs] + b: *const Limb, // [COMMON_OPS.num_limbs] +) { + // XXX: Inefficient. TODO: optimize with dedicated multiplication routine + // TODO: Caller should pass in an `impl AliasingSlices3`. + let in_out = unsafe { AliasingSlices3FromRawParts::new_rab_unchecked(r, a, b, NUM_LIMBS) }; + let n = &COMMON_OPS.n.limbs[..NUM_LIMBS.get()]; + let cpu = cpu::features(); // TODO: caller should supply this + limbs_mul_mont(in_out, n, &N_N0, cpu).unwrap_or_else(|e| match e { + LimbSliceError::LenMismatch(_) + | LimbSliceError::TooShort(_) + | LimbSliceError::TooLong(_) => unreachable!(), + }) +} + pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { scalar_ops: &SCALAR_OPS, public_key_ops: &PUBLIC_KEY_OPS, @@ -311,10 +335,4 @@ prefixed_extern! { p_x: *const Limb, // [COMMON_OPS.num_limbs] p_y: *const Limb, // [COMMON_OPS.num_limbs] ); - - fn p384_scalar_mul_mont( - r: *mut Limb, // [COMMON_OPS.num_limbs] - a: *const Limb, // [COMMON_OPS.num_limbs] - b: *const Limb, // [COMMON_OPS.num_limbs] - ); }