From 8c5e37ce7ba23579c216b981485c1575e59882b2 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Wed, 17 Jun 2026 22:17:27 -0600 Subject: [PATCH] k256: w-NAF linear combinations / multiscalar multiplications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires up `WnafBase::multiscalar_mul` as the backend for `LinearCombination::lincomb_vartime` when the `alloc` feature is enabled. Provides a ~16% speedup for the 2-term case. high-level operations/lincomb_vartime (2-term) time: [29.102 µs 29.277 µs 29.465 µs] change: [−16.175% −15.781% −15.426%] (p = 0.00 < 0.05) Performance has improved. --- k256/benches/scalar.rs | 12 +++++- k256/src/arithmetic/mul.rs | 74 +++++++++++++++++++++++++-------- k256/src/arithmetic/mul/wnaf.rs | 16 ++++++- 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/k256/benches/scalar.rs b/k256/benches/scalar.rs index f265b9022..8fe27c92a 100644 --- a/k256/benches/scalar.rs +++ b/k256/benches/scalar.rs @@ -54,14 +54,22 @@ fn bench_point_lincomb(group: &mut BenchmarkGroup<'_, M>) { let p = ProjectivePoint::GENERATOR; let m = hex!("AA5E28D6A97A2479A65527F7290311A3624D4CC0FA1578598EE3C2613BF99522"); let s = Scalar::from_repr(m.into()).unwrap(); - group.bench_function("lincomb via mul+add", |b| { + group.bench_function("lincomb (unoptimized, 2-term)", |b| { b.iter(|| black_box(p) * black_box(s) + black_box(p) * black_box(s)) }); - group.bench_function("lincomb()", |b| { + group.bench_function("lincomb (optimized, 2-term)", |b| { b.iter(|| { ProjectivePoint::lincomb(&[(black_box(p), black_box(s)), (black_box(p), black_box(s))]) }) }); + group.bench_function("lincomb_vartime (2-term)", |b| { + b.iter(|| { + ProjectivePoint::lincomb_vartime(&[ + (black_box(p), black_box(s)), + (black_box(p), black_box(s)), + ]) + }) + }); } fn bench_point_mul_by_generator(group: &mut BenchmarkGroup<'_, M>) { diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 65b2a027f..a7f89f654 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -53,9 +53,9 @@ use primeorder::Radix16Decomposition; #[cfg(feature = "alloc")] use { self::wnaf::{WnafBase, WnafScalar}, + alloc::vec::Vec, primeorder::PrimeFieldExt, }; - #[cfg(feature = "precomputed-tables")] use {super::tables::BASEPOINT_TABLE, elliptic_curve::array::sizes::U65}; @@ -224,10 +224,18 @@ impl LinearCombination<[(ProjectivePoint, Scalar); N]> for Proje } fn lincomb_vartime(points_and_scalars: &[(ProjectivePoint, Scalar); N]) -> Self { - let mut tables = [(LookupTable::default(), LookupTable::default()); N]; - let mut digits: [(Radix16Decomposition, Radix16Decomposition); N] = - array::from_fn(|_| Default::default()); - lincomb_vartime(points_and_scalars, &mut tables, &mut digits) + #[cfg(not(feature = "alloc"))] + { + let mut tables = [(LookupTable::default(), LookupTable::default()); N]; + let mut digits: [(Radix16Decomposition, Radix16Decomposition); N] = + array::from_fn(|_| Default::default()); + lincomb_vartime(points_and_scalars, &mut tables, &mut digits) + } + + #[cfg(feature = "alloc")] + { + lincomb_vartime(points_and_scalars) + } } } @@ -249,17 +257,25 @@ impl LinearCombination<[(ProjectivePoint, Scalar)]> for ProjectivePoint { #[cfg(feature = "alloc")] fn lincomb_vartime(points_and_scalars: &[(ProjectivePoint, Scalar)]) -> Self { - let mut tables = - vec![(LookupTable::default(), LookupTable::default()); points_and_scalars.len()]; - let mut digits = vec![ - ( - Radix16Decomposition::::default(), - Radix16Decomposition::::default(), - ); - points_and_scalars.len() - ]; + #[cfg(not(feature = "alloc"))] + { + let mut tables = + vec![(LookupTable::default(), LookupTable::default()); points_and_scalars.len()]; + let mut digits = vec![ + ( + Radix16Decomposition::::default(), + Radix16Decomposition::::default(), + ); + points_and_scalars.len() + ]; + + lincomb_vartime(points_and_scalars, &mut tables, &mut digits) + } - lincomb_vartime(points_and_scalars, &mut tables, &mut digits) + #[cfg(feature = "alloc")] + { + lincomb_vartime(points_and_scalars) + } } } @@ -318,7 +334,12 @@ fn lincomb( } /// Linear combination (a.k.a. multiscalar multiplication) implemented in variable-time. +/// +/// This implementation reuses the radix-16 decomposition from the constant-time implementation and +/// has no `alloc` dependency but is slower than w-NAF, so it is only used when `alloc` isn't +/// available. // TODO(tarcieri): fully eliminate constant-time constructions +#[cfg(not(feature = "alloc"))] fn lincomb_vartime( xks: &[(ProjectivePoint, Scalar)], tables: &mut [(LookupTable, LookupTable)], @@ -372,6 +393,25 @@ fn lincomb_vartime( acc } +/// Linear combination (a.k.a. multiscalar multiplication) implemented in variable-time. +/// +/// This implementation uses w-NAF and provides the best performance but requires `alloc`. +#[cfg(feature = "alloc")] +fn lincomb_vartime(xks: &[(ProjectivePoint, Scalar)]) -> ProjectivePoint { + let mut bases: Vec> = Vec::with_capacity(xks.len() * 2); + let mut scalars: Vec> = Vec::with_capacity(xks.len() * 2); + + for (x, k) in xks { + let ([b0, b1], [s0, s1]) = decompose_glv_wnaf(x, k); + bases.push(b0); + bases.push(b1); + scalars.push(s0); + scalars.push(s1); + } + + WnafBase::multiscalar_mul(scalars, bases) +} + impl ProjectivePoint { /// Calculates `k * G`, where `G` is the generator. pub fn mul_by_generator(k: &Scalar) -> ProjectivePoint { @@ -443,7 +483,7 @@ fn mul_vartime(x: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { #[cfg(feature = "alloc")] fn mul_vartime(x: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { let (bases, scalars) = decompose_glv_wnaf(x, k); - WnafBase::multiscalar_mul(&scalars, &bases) + WnafBase::multiscalar_mul_array(&scalars, &bases) } /// GLV-decompose `k` for `x`: two `(WnafBase, WnafScalar)` pairs representing `r1 * self_signed` @@ -540,7 +580,7 @@ impl MulByGeneratorVartime for ProjectivePoint { fn mul_by_generator_and_mul_add_vartime(a: &Self::Scalar, b: &Self::Scalar, p: &Self) -> Self { let ([gb0, gb1], [gs0, gs1]) = decompose_glv_wnaf(&ProjectivePoint::GENERATOR, a); let ([pb0, pb1], [ps0, ps1]) = decompose_glv_wnaf(p, b); - WnafBase::multiscalar_mul(&[gs0, gs1, ps0, ps1], &[gb0, gb1, pb0, pb1]) + WnafBase::multiscalar_mul_array(&[gs0, gs1, ps0, ps1], &[gb0, gb1, pb0, pb1]) } } diff --git a/k256/src/arithmetic/mul/wnaf.rs b/k256/src/arithmetic/mul/wnaf.rs index 2ee4c460e..136a11ed1 100644 --- a/k256/src/arithmetic/mul/wnaf.rs +++ b/k256/src/arithmetic/mul/wnaf.rs @@ -64,8 +64,22 @@ impl WnafBase { /// /// Computes a sum-of-products `aA + bB + ...` in variable time with w-NAF multi-exponentiation /// using the interleaved window method, also known as Straus' method. + pub fn multiscalar_mul(scalars: I, bases: J) -> G + where + I: IntoIterator>, + J: IntoIterator, + { + let wnafs = scalars.into_iter().map(|s| s.wnaf).collect::>(); + let tables = bases.into_iter().map(|b| b.table).collect::>(); + wnaf_multi_exp(tables.as_slice(), wnafs.as_slice()) + } + + /// Perform a multiscalar multiplication with fixed-size array arguments. + /// + /// Computes a sum-of-products `aA + bB + ...` in variable time with w-NAF multi-exponentiation + /// using the interleaved window method, also known as Straus' method. #[must_use] - pub fn multiscalar_mul( + pub fn multiscalar_mul_array( scalars: &[WnafScalar; N], bases: &[Self; N], ) -> G {