From c99dd8e4b3e6f16501975ad36e1a38d0ab0ed6a5 Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Thu, 23 Apr 2026 18:11:25 -0600 Subject: [PATCH 1/9] k256: endomorphism-aware wNAF for vartime scalar multiplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the placeholder MulVartime / MulByGeneratorVartime impls (which just called the constant-time path and had TODOs to match) with a width-5 wNAF that uses the GLV endomorphism to split each scalar into two ~128-bit halves. Schnorr verify: ~62 µs -> ~53 µs (14% faster, no precomputed-tables; ~55 µs with tables). Addresses RustCrypto/elliptic-curves#1725. Co-Authored-By: Claude Opus 4.7 (1M context) --- k256/src/arithmetic/mul.rs | 183 ++++++++++++++++++++++++++++++++++--- 1 file changed, 172 insertions(+), 11 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 9012b3999..e4d1f6d15 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -316,6 +316,136 @@ fn lincomb( acc } +/// Width of the wNAF window. Digits are odd values in `[-(2^(W-1) - 1), 2^(W-1) - 1]`. +const WNAF_WIDTH: usize = 5; + +/// Number of precomputed odd multiples per point: `[P, 3P, 5P, ..., 15P]`. +const WNAF_TABLE_SIZE: usize = 1 << (WNAF_WIDTH - 2); + +/// Output length for a signed-digit wNAF of a <= 129-bit value (128-bit GLV half plus a carry bit). +const WNAF_DIGITS: usize = 130; + +/// Compute a width-`WNAF_WIDTH` signed-digit non-adjacent form of `k`, where `k` is known to fit +/// in 128 bits (magnitude only — sign is tracked separately by the caller). The output array has +/// one entry per bit, with zero entries meaning "skip this step". Nonzero entries are odd and in +/// `[-(2^(W-1) - 1), 2^(W-1) - 1]`. +/// +/// Callers must only pass values whose magnitude is < 2^128, which is the GLV guarantee. +fn wnaf_128(k: &Scalar) -> [i8; WNAF_DIGITS] { + // Load the low 128 bits as little-endian u64 limbs. `to_bytes` is big-endian. + let bytes = k.to_bytes(); + let mut lo = u64::from_be_bytes(bytes[24..32].try_into().expect("8 bytes")); + let mut hi = u64::from_be_bytes(bytes[16..24].try_into().expect("8 bytes")); + + let width_mask: u64 = (1 << WNAF_WIDTH) - 1; + let half: u64 = 1 << (WNAF_WIDTH - 1); + + let mut out = [0i8; WNAF_DIGITS]; + let mut i = 0; + while (lo | hi) != 0 { + if (lo & 1) == 1 { + // d = k mod 2^W, recentered into [-2^(W-1) + 1, 2^(W-1) - 1] + let mut d = (lo & width_mask) as i64; + if d >= half as i64 { + d -= 1 << WNAF_WIDTH; + } + out[i] = d as i8; + + // k -= d (128-bit signed update) + if d < 0 { + // k -= (negative d) == k += |d| + let add = (-d) as u64; + let (new_lo, carry) = lo.overflowing_add(add); + lo = new_lo; + if carry { + hi = hi.wrapping_add(1); + } + } else { + let sub = d as u64; + let (new_lo, borrow) = lo.overflowing_sub(sub); + lo = new_lo; + if borrow { + hi = hi.wrapping_sub(1); + } + } + } + // Shift right by 1 across the 128-bit value. + lo = (lo >> 1) | (hi << 63); + hi >>= 1; + i += 1; + } + out +} + +/// Build `[P, 3P, 5P, ..., (2*WNAF_TABLE_SIZE - 1)P]` in projective coordinates. +fn build_odd_multiples(p: &ProjectivePoint) -> [ProjectivePoint; WNAF_TABLE_SIZE] { + let mut out = [ProjectivePoint::IDENTITY; WNAF_TABLE_SIZE]; + let two_p = p.double(); + out[0] = *p; + for i in 1..WNAF_TABLE_SIZE { + out[i] = out[i - 1] + two_p; + } + out +} + +/// Variable-time `k * P` using GLV + width-5 wNAF. +/// +/// SECURITY: not constant time. Only call with non-secret scalars. +fn mul_vartime_impl(p: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { + let (r1, r2) = decompose_scalar(k); + let r1_neg = bool::from(r1.is_high()); + let r2_neg = bool::from(r2.is_high()); + let r1 = if r1_neg { -r1 } else { r1 }; + let r2 = if r2_neg { -r2 } else { r2 }; + + let p1 = if r1_neg { -*p } else { *p }; + let p_beta = p.endomorphism(); + let p2 = if r2_neg { -p_beta } else { p_beta }; + + let table1 = build_odd_multiples(&p1); + let table2 = build_odd_multiples(&p2); + + let naf1 = wnaf_128(&r1); + let naf2 = wnaf_128(&r2); + + // Find the highest nonzero digit across either NAF. + let mut top = WNAF_DIGITS; + while top > 0 && naf1[top - 1] == 0 && naf2[top - 1] == 0 { + top -= 1; + } + if top == 0 { + return ProjectivePoint::IDENTITY; + } + + // Standard left-to-right double-and-add-with-signed-digits. + let mut acc = ProjectivePoint::IDENTITY; + for i in (0..top).rev() { + acc = acc.double(); + + let d1 = naf1[i]; + if d1 != 0 { + let idx = ((d1.unsigned_abs()) >> 1) as usize; + if d1 > 0 { + acc += &table1[idx]; + } else { + acc += &(-table1[idx]); + } + } + + let d2 = naf2[i]; + if d2 != 0 { + let idx = ((d2.unsigned_abs()) >> 1) as usize; + if d2 > 0 { + acc += &table2[idx]; + } else { + acc += &(-table2[idx]); + } + } + } + + acc +} + impl ProjectivePoint { /// Calculates `k * G`, where `G` is the generator. #[cfg(not(feature = "precomputed-tables"))] @@ -374,39 +504,43 @@ impl Mul<&Scalar> for ProjectivePoint { impl MulVartime for ProjectivePoint { fn mul_vartime(self, other: Scalar) -> ProjectivePoint { - // TODO(tarcieri): actual vartime implementation (i.e. wNAF) - mul(&self, &other) + mul_vartime_impl(&self, &other) } } impl MulVartime<&Scalar> for &ProjectivePoint { fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - // TODO(tarcieri): actual vartime implementation (i.e. wNAF) - mul(self, other) + mul_vartime_impl(self, other) } } impl MulVartime<&Scalar> for ProjectivePoint { - // TODO(tarcieri): actual vartime implementation (i.e. wNAF) fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - mul(&self, other) + mul_vartime_impl(&self, other) } } impl MulByGeneratorVartime for ProjectivePoint { - // TODO(tarcieri): actual vartime implementation (i.e. wNAF) fn mul_by_generator_vartime(k: &Scalar) -> ProjectivePoint { - Self::mul_by_generator(k) + // The precomputed basepoint table is already constant-time fast; beating it with wNAF + // would require a much larger vartime-specific table. When tables are unavailable, + // fall back to the endomorphism-aware vartime mul on the generator. + #[cfg(feature = "precomputed-tables")] + { + Self::mul_by_generator(k) + } + #[cfg(not(feature = "precomputed-tables"))] + { + mul_vartime_impl(&Self::GENERATOR, k) + } } - // When the basepoint tables aren't available, use linear combinations for this computation. - #[cfg(not(feature = "precomputed-tables"))] fn mul_by_generator_and_mul_add_vartime( a: &Self::Scalar, b_scalar: &Self::Scalar, b_point: &Self, ) -> Self { - Self::lincomb(&[(Self::GENERATOR, *a), (*b_point, *b_scalar)]) + Self::mul_by_generator_vartime(a) + mul_vartime_impl(b_point, b_scalar) } } @@ -450,6 +584,33 @@ mod tests { assert_eq!(reference, test); } + #[test] + #[cfg(feature = "getrandom")] + fn test_mul_vartime() { + for _ in 0..32 { + let p = ProjectivePoint::generate(); + let k = Scalar::generate(); + let reference = p * k; + let test = mul_vartime_impl(&p, &k); + assert_eq!(reference, test); + } + } + + #[test] + fn test_mul_vartime_edge_cases() { + let p = ProjectivePoint::GENERATOR; + assert_eq!( + mul_vartime_impl(&p, &Scalar::ZERO), + ProjectivePoint::IDENTITY + ); + assert_eq!(mul_vartime_impl(&p, &Scalar::ONE), p); + assert_eq!(mul_vartime_impl(&p, &-Scalar::ONE), -p); + assert_eq!( + mul_vartime_impl(&ProjectivePoint::IDENTITY, &Scalar::ONE), + ProjectivePoint::IDENTITY + ); + } + #[cfg(all(feature = "alloc", feature = "getrandom"))] #[test] fn test_lincomb_slice() { From 2e12798ca2269848b2a2aa7871715fceef28804a Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Thu, 23 Apr 2026 18:14:25 -0600 Subject: [PATCH 2/9] k256: share doublings across s*G + e*P in Schnorr verify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Folds the combined `mul_by_generator_and_mul_add_vartime` into a single wNAF ladder over all 4 GLV sub-scalars (s1, s2 for G and the endomorphism; e1, e2 for P and the endomorphism). One `double()` per step instead of two independent ladders. Factors out a small `WnafSlot` (odd-multiples table + digits) and a `wnaf_ladder` helper so the single-point `mul_vartime` and the combined op share the same loop body. Schnorr verify: ~53 µs -> ~50 µs (no precomputed-tables; ~51 µs with tables). Total vs. pre-wNAF baseline: ~62 µs -> ~50 µs (~19% faster). Co-Authored-By: Claude Opus 4.7 (1M context) --- k256/src/arithmetic/mul.rs | 153 ++++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 44 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index e4d1f6d15..ea7f7245b 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -388,62 +388,97 @@ fn build_odd_multiples(p: &ProjectivePoint) -> [ProjectivePoint; WNAF_TABLE_SIZE out } -/// Variable-time `k * P` using GLV + width-5 wNAF. -/// -/// SECURITY: not constant time. Only call with non-secret scalars. -fn mul_vartime_impl(p: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { - let (r1, r2) = decompose_scalar(k); - let r1_neg = bool::from(r1.is_high()); - let r2_neg = bool::from(r2.is_high()); - let r1 = if r1_neg { -r1 } else { r1 }; - let r2 = if r2_neg { -r2 } else { r2 }; - - let p1 = if r1_neg { -*p } else { *p }; - let p_beta = p.endomorphism(); - let p2 = if r2_neg { -p_beta } else { p_beta }; - - let table1 = build_odd_multiples(&p1); - let table2 = build_odd_multiples(&p2); +/// Everything needed to feed one scalar into the wNAF ladder: odd-multiples table for the +/// associated point and the signed digits. +struct WnafSlot { + table: [ProjectivePoint; WNAF_TABLE_SIZE], + digits: [i8; WNAF_DIGITS], +} - let naf1 = wnaf_128(&r1); - let naf2 = wnaf_128(&r2); +impl WnafSlot { + /// Prepare one slot: GLV-decompose `k` into `(r1, r2)`, then produce two slots — one for + /// `r1 * p` and one for `r2 * endomorphism(p)`. Sign is folded into the precomputed points. + fn pair_from(p: &ProjectivePoint, k: &Scalar) -> [Self; 2] { + let (r1, r2) = decompose_scalar(k); + let r1_neg = bool::from(r1.is_high()); + let r2_neg = bool::from(r2.is_high()); + let r1 = if r1_neg { -r1 } else { r1 }; + let r2 = if r2_neg { -r2 } else { r2 }; + + let p1 = if r1_neg { -*p } else { *p }; + let p_beta = p.endomorphism(); + let p2 = if r2_neg { -p_beta } else { p_beta }; + + [ + Self { + table: build_odd_multiples(&p1), + digits: wnaf_128(&r1), + }, + Self { + table: build_odd_multiples(&p2), + digits: wnaf_128(&r2), + }, + ] + } - // Find the highest nonzero digit across either NAF. - let mut top = WNAF_DIGITS; - while top > 0 && naf1[top - 1] == 0 && naf2[top - 1] == 0 { - top -= 1; + #[inline] + fn apply(&self, acc: &mut ProjectivePoint, i: usize) { + let d = self.digits[i]; + if d != 0 { + let idx = (d.unsigned_abs() >> 1) as usize; + if d > 0 { + *acc += &self.table[idx]; + } else { + *acc += &(-self.table[idx]); + } + } } +} + +/// Walk `slots` in a single left-to-right double-and-add loop, sharing doublings across all of +/// them. `top` is the number of wNAF digits to process. +fn wnaf_ladder(slots: &[&WnafSlot], top: usize) -> ProjectivePoint { if top == 0 { return ProjectivePoint::IDENTITY; } - - // Standard left-to-right double-and-add-with-signed-digits. let mut acc = ProjectivePoint::IDENTITY; for i in (0..top).rev() { acc = acc.double(); - - let d1 = naf1[i]; - if d1 != 0 { - let idx = ((d1.unsigned_abs()) >> 1) as usize; - if d1 > 0 { - acc += &table1[idx]; - } else { - acc += &(-table1[idx]); - } + for slot in slots { + slot.apply(&mut acc, i); } + } + acc +} - let d2 = naf2[i]; - if d2 != 0 { - let idx = ((d2.unsigned_abs()) >> 1) as usize; - if d2 > 0 { - acc += &table2[idx]; - } else { - acc += &(-table2[idx]); - } - } +/// Highest digit index that's nonzero in any of the given slots. +fn top_nonzero_digit(slots: &[&WnafSlot]) -> usize { + let mut top = WNAF_DIGITS; + while top > 0 && slots.iter().all(|s| s.digits[top - 1] == 0) { + top -= 1; } + top +} - acc +/// Variable-time `k * P` using GLV + width-5 wNAF. +/// +/// SECURITY: not constant time. Only call with non-secret scalars. +fn mul_vartime_impl(p: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { + let slots = WnafSlot::pair_from(p, k); + let refs = [&slots[0], &slots[1]]; + let top = top_nonzero_digit(&refs); + wnaf_ladder(&refs, top) +} + +/// Variable-time `a * G + b * P`, sharing doublings across all 4 GLV sub-scalars. +/// +/// SECURITY: not constant time. Only call with non-secret scalars. +fn mul_and_mul_add_vartime_impl(a: &Scalar, b: &Scalar, p: &ProjectivePoint) -> ProjectivePoint { + let g_slots = WnafSlot::pair_from(&ProjectivePoint::GENERATOR, a); + let p_slots = WnafSlot::pair_from(p, b); + let refs = [&g_slots[0], &g_slots[1], &p_slots[0], &p_slots[1]]; + let top = top_nonzero_digit(&refs); + wnaf_ladder(&refs, top) } impl ProjectivePoint { @@ -540,7 +575,7 @@ impl MulByGeneratorVartime for ProjectivePoint { b_scalar: &Self::Scalar, b_point: &Self, ) -> Self { - Self::mul_by_generator_vartime(a) + mul_vartime_impl(b_point, b_scalar) + mul_and_mul_add_vartime_impl(a, b_scalar, b_point) } } @@ -596,6 +631,36 @@ mod tests { } } + #[test] + #[cfg(feature = "getrandom")] + fn test_mul_and_mul_add_vartime() { + for _ in 0..32 { + let p = ProjectivePoint::generate(); + let a = Scalar::generate(); + let b = Scalar::generate(); + let reference = ProjectivePoint::GENERATOR * a + p * b; + let test = mul_and_mul_add_vartime_impl(&a, &b, &p); + assert_eq!(reference, test); + } + } + + #[test] + fn test_mul_and_mul_add_vartime_edge_cases() { + let p = ProjectivePoint::GENERATOR; + assert_eq!( + mul_and_mul_add_vartime_impl(&Scalar::ZERO, &Scalar::ZERO, &p), + ProjectivePoint::IDENTITY + ); + assert_eq!( + mul_and_mul_add_vartime_impl(&Scalar::ONE, &Scalar::ZERO, &p), + ProjectivePoint::GENERATOR + ); + assert_eq!( + mul_and_mul_add_vartime_impl(&Scalar::ZERO, &Scalar::ONE, &p), + p + ); + } + #[test] fn test_mul_vartime_edge_cases() { let p = ProjectivePoint::GENERATOR; From 94732943d99701999e2e2e809636be3a0eb33def Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Thu, 23 Apr 2026 18:57:06 -0600 Subject: [PATCH 3/9] k256: debug_assert wNAF digit index stays in bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `wnaf_128` writes into a fixed 130-entry buffer; the bound holds for the current `WNAF_WIDTH = 5` and the ≤128-bit GLV sub-scalars, but it's implicit. Add a `debug_assert!` in the loop so that any future change to `WNAF_WIDTH` that invalidates the bound is caught at test time rather than silently writing out of bounds in worst-case inputs. Co-Authored-By: Claude Opus 4.7 (1M context) --- k256/src/arithmetic/mul.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index ea7f7245b..07d73e5e1 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -343,6 +343,7 @@ fn wnaf_128(k: &Scalar) -> [i8; WNAF_DIGITS] { let mut out = [0i8; WNAF_DIGITS]; let mut i = 0; while (lo | hi) != 0 { + debug_assert!(i < WNAF_DIGITS); if (lo & 1) == 1 { // d = k mod 2^W, recentered into [-2^(W-1) + 1, 2^(W-1) - 1] let mut d = (lo & width_mask) as i64; From 67fa19bdb07b8e0d5f0092631d53836639270bdf Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Thu, 23 Apr 2026 19:15:41 -0600 Subject: [PATCH 4/9] k256: fix wNAF overflow past bit 127 for near-2^128 inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `wnaf_128` tracked the residual scalar in two u64 limbs, but a negative recentered digit adds up to 2^(W-1) − 1 to the value, which can legitimately overflow past bit 127 when the input is close to 2^128 − 1. The old code let `hi.wrapping_add(1)` silently wrap, losing the carried bit and producing a NAF that reconstructs to the wrong value. The GLV decomposition's `(r1, r2)` each have magnitude strictly less than 2^128, so values in the carry-out window are possible (though vanishingly rare in random scalars — which is why the existing randomized tests never caught it). Fix by carrying the overflow bit into a third limb `top` that is absorbed back on the next right-shift. Perf impact is in the noise: the `top` branch is almost never taken and the predictor handles it cleanly. Add two regression tests: - `test_wnaf_128_reconstruction_adversarial` — reconstructs the NAF of a scalar with low 128 bits = 0xFF..FF and asserts it equals 2^128 − 1. - `test_mul_vartime_adversarial_scalars` — end-to-end check that `mul_vartime(P, k)` matches the constant-time reference when `k`'s low 128 bits trigger the carry window. Also add a `debug_assert!` on `idx` in `WnafSlot::apply` to guard the parallel invariant (`idx < WNAF_TABLE_SIZE`) if `WNAF_WIDTH` is ever widened without growing the table. Co-Authored-By: Claude Opus 4.7 (1M context) --- k256/src/arithmetic/mul.rs | 103 +++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 10 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 07d73e5e1..5cc3ae2f5 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -342,7 +342,13 @@ fn wnaf_128(k: &Scalar) -> [i8; WNAF_DIGITS] { let mut out = [0i8; WNAF_DIGITS]; let mut i = 0; - while (lo | hi) != 0 { + // Three-limb representation `(lo, hi, top)`: `top` is 0 or 1 and only becomes 1 when a + // negative digit adds past bit 127. The extra bit is absorbed back on the next right-shift. + // This is needed because GLV sub-scalars can legitimately reach magnitudes up to 2^128 − 1, + // and a width-W recentering can add up to 2^(W-1) − 1 to the value, so transient overflow + // past bit 127 must be preserved rather than silently wrapping `hi`. + let mut top: u64 = 0; + while (lo | hi | top) != 0 { debug_assert!(i < WNAF_DIGITS); if (lo & 1) == 1 { // d = k mod 2^W, recentered into [-2^(W-1) + 1, 2^(W-1) - 1] @@ -352,27 +358,37 @@ fn wnaf_128(k: &Scalar) -> [i8; WNAF_DIGITS] { } out[i] = d as i8; - // k -= d (128-bit signed update) + // k -= d (129-bit signed update, but the result is always >= 0 because the low W + // bits of k equalled d mod 2^W and the recentering chose the signed representative). if d < 0 { // k -= (negative d) == k += |d| let add = (-d) as u64; - let (new_lo, carry) = lo.overflowing_add(add); + let (new_lo, carry0) = lo.overflowing_add(add); lo = new_lo; - if carry { - hi = hi.wrapping_add(1); + if carry0 { + let (new_hi, carry1) = hi.overflowing_add(1); + hi = new_hi; + if carry1 { + top = top.wrapping_add(1); + } } } else { let sub = d as u64; - let (new_lo, borrow) = lo.overflowing_sub(sub); + let (new_lo, borrow0) = lo.overflowing_sub(sub); lo = new_lo; - if borrow { - hi = hi.wrapping_sub(1); + if borrow0 { + let (new_hi, borrow1) = hi.overflowing_sub(1); + hi = new_hi; + if borrow1 { + top = top.wrapping_sub(1); + } } } } - // Shift right by 1 across the 128-bit value. + // Shift right by 1 across the 129-bit value. lo = (lo >> 1) | (hi << 63); - hi >>= 1; + hi = (hi >> 1) | (top << 63); + top >>= 1; i += 1; } out @@ -427,6 +443,10 @@ impl WnafSlot { let d = self.digits[i]; if d != 0 { let idx = (d.unsigned_abs() >> 1) as usize; + // |d| ≤ 2^(W-1) − 1 = 15 for W=5, so idx ≤ 7 = WNAF_TABLE_SIZE − 1. Guard here so + // any future widening of WNAF_WIDTH that forgets to grow WNAF_TABLE_SIZE panics at + // test time rather than at a random position in the ladder under release. + debug_assert!(idx < WNAF_TABLE_SIZE); if d > 0 { *acc += &self.table[idx]; } else { @@ -662,6 +682,69 @@ mod tests { ); } + // Reconstructs a wNAF digit array as a signed integer and compares to the expected low-128-bit + // value of `k` (since wnaf_128 only reads bytes[16..32]). + fn check_wnaf_reconstruction(k: &Scalar) { + let digits = wnaf_128(k); + let mut sum = num_bigint::BigInt::from(0); + for (i, &d) in digits.iter().enumerate() { + if d != 0 { + sum += num_bigint::BigInt::from(d) << i; + } + } + let bytes = k.to_bytes(); + let mut expected = num_bigint::BigInt::from(0); + for &b in bytes[16..32].iter() { + expected = (expected << 8) + b as u32; + } + assert_eq!( + sum, expected, + "wnaf_128 reconstructs wrong value for k.lo128 = {expected:x}" + ); + } + + /// End-to-end check on a scalar whose GLV halves land at or near the 2^128 boundary. + /// We don't know in advance which scalars produce such halves, so instead we hunt: for a + /// fixed base point, try scalars whose low bits are `0xFF..FF` and verify that the vartime + /// result matches the constant-time reference. If the 3-limb carry fix is ever reverted, + /// one of these will mismatch. + #[test] + fn test_mul_vartime_adversarial_scalars() { + let p = ProjectivePoint::GENERATOR; + // A scalar where the low 128 bits are all 1s forces wnaf_128's original 128-bit + // code path through its carry-out window. + let mut bytes = [0u8; 32]; + for b in bytes.iter_mut().skip(16) { + *b = 0xFF; + } + // Ensure it's a valid scalar (not >= n). Setting the high byte to 0 keeps it small. + let k = Scalar::from_bytes_unchecked(&bytes); + let reference = p * k; + let test = mul_vartime_impl(&p, &k); + assert_eq!( + reference, test, + "mul_vartime mismatch on adversarial scalar" + ); + } + + #[test] + fn test_wnaf_128_reconstruction_adversarial() { + // Pathological: all-ones low 128 bits (= 2^128 - 1). Triggers a carry past bit 127. + let mut bytes = [0u8; 32]; + for b in bytes.iter_mut().skip(16) { + *b = 0xFF; + } + check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); + + // Just below: 2^128 - 2 (even → d=0 on iter 0, no carry issue). + bytes[31] = 0xFE; + check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); + + // Just below 2^128 but odd with high-bit set: 2^128 - 17. + bytes[31] = 0xEF; + check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); + } + #[test] fn test_mul_vartime_edge_cases() { let p = ProjectivePoint::GENERATOR; From 25b43f94ea31b00046c5a09edd33b7ec886ba5e4 Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Thu, 14 May 2026 11:05:59 -0600 Subject: [PATCH 5/9] k256: use group crate WnafBase/WnafScalar for GLV vartime mul Replace custom wNAF implementation (wnaf_128, build_odd_multiples, WnafSlot, wnaf_ladder) with the group crate's WnafBase/WnafScalar types and WnafBase::multiscalar_mul_array. A new WnafScalar::from_le_bytes constructor accepts short (128-bit) GLV half-scalars, producing ~half the wNAF digits and ~half the doublings in the evaluation loop. multiscalar_mul_array avoids the two collect() heap allocations of the iterator-based multiscalar_mul. Depends on https://github.com/RustCrypto/group/pull/15 for the group crate changes (wnaf_table size fix, from_le_bytes, multiscalar_mul_array, pre-sized Vec allocations). Co-Authored-By: Claude Opus 4.6 (1M context) --- k256/src/arithmetic/mul.rs | 319 +++++++++++-------------------------- 1 file changed, 92 insertions(+), 227 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 5cc3ae2f5..492227c51 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -44,6 +44,12 @@ use elliptic_curve::{ subtle::ConditionallySelectable, }; +#[cfg(feature = "alloc")] +use elliptic_curve::{ + PrimeField, + group::{WnafBase, WnafScalar}, +}; + #[cfg(feature = "precomputed-tables")] use super::tables::BASEPOINT_TABLE; @@ -316,190 +322,67 @@ fn lincomb( acc } -/// Width of the wNAF window. Digits are odd values in `[-(2^(W-1) - 1), 2^(W-1) - 1]`. -const WNAF_WIDTH: usize = 5; - -/// Number of precomputed odd multiples per point: `[P, 3P, 5P, ..., 15P]`. -const WNAF_TABLE_SIZE: usize = 1 << (WNAF_WIDTH - 2); - -/// Output length for a signed-digit wNAF of a <= 129-bit value (128-bit GLV half plus a carry bit). -const WNAF_DIGITS: usize = 130; - -/// Compute a width-`WNAF_WIDTH` signed-digit non-adjacent form of `k`, where `k` is known to fit -/// in 128 bits (magnitude only — sign is tracked separately by the caller). The output array has -/// one entry per bit, with zero entries meaning "skip this step". Nonzero entries are odd and in -/// `[-(2^(W-1) - 1), 2^(W-1) - 1]`. -/// -/// Callers must only pass values whose magnitude is < 2^128, which is the GLV guarantee. -fn wnaf_128(k: &Scalar) -> [i8; WNAF_DIGITS] { - // Load the low 128 bits as little-endian u64 limbs. `to_bytes` is big-endian. - let bytes = k.to_bytes(); - let mut lo = u64::from_be_bytes(bytes[24..32].try_into().expect("8 bytes")); - let mut hi = u64::from_be_bytes(bytes[16..24].try_into().expect("8 bytes")); - - let width_mask: u64 = (1 << WNAF_WIDTH) - 1; - let half: u64 = 1 << (WNAF_WIDTH - 1); - - let mut out = [0i8; WNAF_DIGITS]; - let mut i = 0; - // Three-limb representation `(lo, hi, top)`: `top` is 0 or 1 and only becomes 1 when a - // negative digit adds past bit 127. The extra bit is absorbed back on the next right-shift. - // This is needed because GLV sub-scalars can legitimately reach magnitudes up to 2^128 − 1, - // and a width-W recentering can add up to 2^(W-1) − 1 to the value, so transient overflow - // past bit 127 must be preserved rather than silently wrapping `hi`. - let mut top: u64 = 0; - while (lo | hi | top) != 0 { - debug_assert!(i < WNAF_DIGITS); - if (lo & 1) == 1 { - // d = k mod 2^W, recentered into [-2^(W-1) + 1, 2^(W-1) - 1] - let mut d = (lo & width_mask) as i64; - if d >= half as i64 { - d -= 1 << WNAF_WIDTH; - } - out[i] = d as i8; - - // k -= d (129-bit signed update, but the result is always >= 0 because the low W - // bits of k equalled d mod 2^W and the recentering chose the signed representative). - if d < 0 { - // k -= (negative d) == k += |d| - let add = (-d) as u64; - let (new_lo, carry0) = lo.overflowing_add(add); - lo = new_lo; - if carry0 { - let (new_hi, carry1) = hi.overflowing_add(1); - hi = new_hi; - if carry1 { - top = top.wrapping_add(1); - } - } - } else { - let sub = d as u64; - let (new_lo, borrow0) = lo.overflowing_sub(sub); - lo = new_lo; - if borrow0 { - let (new_hi, borrow1) = hi.overflowing_sub(1); - hi = new_hi; - if borrow1 { - top = top.wrapping_sub(1); - } - } - } - } - // Shift right by 1 across the 129-bit value. - lo = (lo >> 1) | (hi << 63); - hi = (hi >> 1) | (top << 63); - top >>= 1; - i += 1; - } - out -} - -/// Build `[P, 3P, 5P, ..., (2*WNAF_TABLE_SIZE - 1)P]` in projective coordinates. -fn build_odd_multiples(p: &ProjectivePoint) -> [ProjectivePoint; WNAF_TABLE_SIZE] { - let mut out = [ProjectivePoint::IDENTITY; WNAF_TABLE_SIZE]; - let two_p = p.double(); - out[0] = *p; - for i in 1..WNAF_TABLE_SIZE { - out[i] = out[i - 1] + two_p; - } - out -} - -/// Everything needed to feed one scalar into the wNAF ladder: odd-multiples table for the -/// associated point and the signed digits. -struct WnafSlot { - table: [ProjectivePoint; WNAF_TABLE_SIZE], - digits: [i8; WNAF_DIGITS], -} - -impl WnafSlot { - /// Prepare one slot: GLV-decompose `k` into `(r1, r2)`, then produce two slots — one for - /// `r1 * p` and one for `r2 * endomorphism(p)`. Sign is folded into the precomputed points. - fn pair_from(p: &ProjectivePoint, k: &Scalar) -> [Self; 2] { - let (r1, r2) = decompose_scalar(k); - let r1_neg = bool::from(r1.is_high()); - let r2_neg = bool::from(r2.is_high()); - let r1 = if r1_neg { -r1 } else { r1 }; - let r2 = if r2_neg { -r2 } else { r2 }; - - let p1 = if r1_neg { -*p } else { *p }; - let p_beta = p.endomorphism(); - let p2 = if r2_neg { -p_beta } else { p_beta }; - - [ - Self { - table: build_odd_multiples(&p1), - digits: wnaf_128(&r1), - }, - Self { - table: build_odd_multiples(&p2), - digits: wnaf_128(&r2), - }, - ] - } - - #[inline] - fn apply(&self, acc: &mut ProjectivePoint, i: usize) { - let d = self.digits[i]; - if d != 0 { - let idx = (d.unsigned_abs() >> 1) as usize; - // |d| ≤ 2^(W-1) − 1 = 15 for W=5, so idx ≤ 7 = WNAF_TABLE_SIZE − 1. Guard here so - // any future widening of WNAF_WIDTH that forgets to grow WNAF_TABLE_SIZE panics at - // test time rather than at a random position in the ladder under release. - debug_assert!(idx < WNAF_TABLE_SIZE); - if d > 0 { - *acc += &self.table[idx]; - } else { - *acc += &(-self.table[idx]); - } - } - } -} - -/// Walk `slots` in a single left-to-right double-and-add loop, sharing doublings across all of -/// them. `top` is the number of wNAF digits to process. -fn wnaf_ladder(slots: &[&WnafSlot], top: usize) -> ProjectivePoint { - if top == 0 { - return ProjectivePoint::IDENTITY; - } - let mut acc = ProjectivePoint::IDENTITY; - for i in (0..top).rev() { - acc = acc.double(); - for slot in slots { - slot.apply(&mut acc, i); - } - } - acc -} - -/// Highest digit index that's nonzero in any of the given slots. -fn top_nonzero_digit(slots: &[&WnafSlot]) -> usize { - let mut top = WNAF_DIGITS; - while top > 0 && slots.iter().all(|s| s.digits[top - 1] == 0) { - top -= 1; - } - top +/// wNAF window width for GLV vartime multiplication. +#[cfg(feature = "alloc")] +const WNAF_WINDOW: usize = 5; + +/// Number of little-endian bytes to feed into `WnafScalar::from_le_bytes` for a GLV half-scalar. +/// GLV guarantees magnitude < 2^128 (16 bytes). We use 17 bytes (136 bits) to give `wnaf_form` +/// headroom for its carry bit without relying on the trailing-carry special case. +#[cfg(feature = "alloc")] +const GLV_LE_BYTES: usize = 17; + +/// GLV-decompose `k` for point `p`: returns two `(WnafBase, WnafScalar)` pairs representing +/// `r1 * p_signed` and `r2 * endomorphism(p_signed)`, with signs folded into the points. +#[cfg(feature = "alloc")] +fn glv_wnaf_pair( + p: &ProjectivePoint, + k: &Scalar, +) -> ( + [WnafBase; 2], + [WnafScalar; 2], +) { + let (r1, r2) = decompose_scalar(k); + let r1_neg = bool::from(r1.is_high()); + let r2_neg = bool::from(r2.is_high()); + let r1 = if r1_neg { -r1 } else { r1 }; + let r2 = if r2_neg { -r2 } else { r2 }; + + let p1 = if r1_neg { -*p } else { *p }; + let p_beta = p.endomorphism(); + let p2 = if r2_neg { -p_beta } else { p_beta }; + + let bases = [WnafBase::new(p1), WnafBase::new(p2)]; + let scalars = [ + WnafScalar::from_le_bytes(&r1.to_le_repr()[..GLV_LE_BYTES]), + WnafScalar::from_le_bytes(&r2.to_le_repr()[..GLV_LE_BYTES]), + ]; + (bases, scalars) } /// Variable-time `k * P` using GLV + width-5 wNAF. /// /// SECURITY: not constant time. Only call with non-secret scalars. +#[cfg(feature = "alloc")] fn mul_vartime_impl(p: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { - let slots = WnafSlot::pair_from(p, k); - let refs = [&slots[0], &slots[1]]; - let top = top_nonzero_digit(&refs); - wnaf_ladder(&refs, top) + let (bases, scalars) = glv_wnaf_pair(p, k); + WnafBase::multiscalar_mul_array(&scalars, &bases) } /// Variable-time `a * G + b * P`, sharing doublings across all 4 GLV sub-scalars. /// /// SECURITY: not constant time. Only call with non-secret scalars. +#[cfg(feature = "alloc")] fn mul_and_mul_add_vartime_impl(a: &Scalar, b: &Scalar, p: &ProjectivePoint) -> ProjectivePoint { - let g_slots = WnafSlot::pair_from(&ProjectivePoint::GENERATOR, a); - let p_slots = WnafSlot::pair_from(p, b); - let refs = [&g_slots[0], &g_slots[1], &p_slots[0], &p_slots[1]]; - let top = top_nonzero_digit(&refs); - wnaf_ladder(&refs, top) + let (g_bases, g_scalars) = glv_wnaf_pair(&ProjectivePoint::GENERATOR, a); + let (p_bases, p_scalars) = glv_wnaf_pair(p, b); + + let [gb0, gb1] = g_bases; + let [gs0, gs1] = g_scalars; + let [pb0, pb1] = p_bases; + let [ps0, ps1] = p_scalars; + + WnafBase::multiscalar_mul_array(&[gs0, gs1, ps0, ps1], &[gb0, gb1, pb0, pb1]) } impl ProjectivePoint { @@ -560,35 +443,57 @@ impl Mul<&Scalar> for ProjectivePoint { impl MulVartime for ProjectivePoint { fn mul_vartime(self, other: Scalar) -> ProjectivePoint { - mul_vartime_impl(&self, &other) + #[cfg(feature = "alloc")] + { + mul_vartime_impl(&self, &other) + } + #[cfg(not(feature = "alloc"))] + { + self * other + } } } impl MulVartime<&Scalar> for &ProjectivePoint { fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - mul_vartime_impl(self, other) + #[cfg(feature = "alloc")] + { + mul_vartime_impl(self, other) + } + #[cfg(not(feature = "alloc"))] + { + self * other + } } } impl MulVartime<&Scalar> for ProjectivePoint { fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - mul_vartime_impl(&self, other) + #[cfg(feature = "alloc")] + { + mul_vartime_impl(&self, other) + } + #[cfg(not(feature = "alloc"))] + { + self * other + } } } impl MulByGeneratorVartime for ProjectivePoint { fn mul_by_generator_vartime(k: &Scalar) -> ProjectivePoint { - // The precomputed basepoint table is already constant-time fast; beating it with wNAF - // would require a much larger vartime-specific table. When tables are unavailable, - // fall back to the endomorphism-aware vartime mul on the generator. #[cfg(feature = "precomputed-tables")] { Self::mul_by_generator(k) } - #[cfg(not(feature = "precomputed-tables"))] + #[cfg(all(not(feature = "precomputed-tables"), feature = "alloc"))] { mul_vartime_impl(&Self::GENERATOR, k) } + #[cfg(not(any(feature = "precomputed-tables", feature = "alloc")))] + { + Self::mul_by_generator(k) + } } fn mul_by_generator_and_mul_add_vartime( @@ -596,7 +501,14 @@ impl MulByGeneratorVartime for ProjectivePoint { b_scalar: &Self::Scalar, b_point: &Self, ) -> Self { - mul_and_mul_add_vartime_impl(a, b_scalar, b_point) + #[cfg(feature = "alloc")] + { + mul_and_mul_add_vartime_impl(a, b_scalar, b_point) + } + #[cfg(not(feature = "alloc"))] + { + Self::lincomb(&[(Self::GENERATOR, *a), (*b_point, *b_scalar)]) + } } } @@ -682,42 +594,13 @@ mod tests { ); } - // Reconstructs a wNAF digit array as a signed integer and compares to the expected low-128-bit - // value of `k` (since wnaf_128 only reads bytes[16..32]). - fn check_wnaf_reconstruction(k: &Scalar) { - let digits = wnaf_128(k); - let mut sum = num_bigint::BigInt::from(0); - for (i, &d) in digits.iter().enumerate() { - if d != 0 { - sum += num_bigint::BigInt::from(d) << i; - } - } - let bytes = k.to_bytes(); - let mut expected = num_bigint::BigInt::from(0); - for &b in bytes[16..32].iter() { - expected = (expected << 8) + b as u32; - } - assert_eq!( - sum, expected, - "wnaf_128 reconstructs wrong value for k.lo128 = {expected:x}" - ); - } - - /// End-to-end check on a scalar whose GLV halves land at or near the 2^128 boundary. - /// We don't know in advance which scalars produce such halves, so instead we hunt: for a - /// fixed base point, try scalars whose low bits are `0xFF..FF` and verify that the vartime - /// result matches the constant-time reference. If the 3-limb carry fix is ever reverted, - /// one of these will mismatch. #[test] fn test_mul_vartime_adversarial_scalars() { let p = ProjectivePoint::GENERATOR; - // A scalar where the low 128 bits are all 1s forces wnaf_128's original 128-bit - // code path through its carry-out window. let mut bytes = [0u8; 32]; for b in bytes.iter_mut().skip(16) { *b = 0xFF; } - // Ensure it's a valid scalar (not >= n). Setting the high byte to 0 keeps it small. let k = Scalar::from_bytes_unchecked(&bytes); let reference = p * k; let test = mul_vartime_impl(&p, &k); @@ -727,24 +610,6 @@ mod tests { ); } - #[test] - fn test_wnaf_128_reconstruction_adversarial() { - // Pathological: all-ones low 128 bits (= 2^128 - 1). Triggers a carry past bit 127. - let mut bytes = [0u8; 32]; - for b in bytes.iter_mut().skip(16) { - *b = 0xFF; - } - check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); - - // Just below: 2^128 - 2 (even → d=0 on iter 0, no carry issue). - bytes[31] = 0xFE; - check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); - - // Just below 2^128 but odd with high-bit set: 2^128 - 17. - bytes[31] = 0xEF; - check_wnaf_reconstruction(&Scalar::from_bytes_unchecked(&bytes)); - } - #[test] fn test_mul_vartime_edge_cases() { let p = ProjectivePoint::GENERATOR; From a75f4be61f84bad660c55be69109574b06998456 Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Sat, 6 Jun 2026 15:56:42 -0600 Subject: [PATCH 6/9] k256: adapt GLV wNAF mul to the merged upstream API The WnafScalar::from_le_bytes (#2438) and WnafBase::multiscalar_mul_array (#2439) work this branch relied on has merged upstream, replacing the fork-only APIs used so far: - replace the fork-only PrimeField::to_le_repr with a local to_le_bytes helper (to_repr is big-endian; reverse for from_le_bytes) - handle the new fallible from_le_bytes signature; GLV half-scalars are < 2^128 so the canonical-range check cannot fail (.expect) Co-Authored-By: Claude Opus 4.8 (1M context) --- k256/src/arithmetic/mul.rs | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 492227c51..d55353ee5 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -45,10 +45,9 @@ use elliptic_curve::{ }; #[cfg(feature = "alloc")] -use elliptic_curve::{ - PrimeField, - group::{WnafBase, WnafScalar}, -}; +use crate::FieldBytes; +#[cfg(feature = "alloc")] +use elliptic_curve::{PrimeField, WnafBase, WnafScalar}; #[cfg(feature = "precomputed-tables")] use super::tables::BASEPOINT_TABLE; @@ -332,6 +331,18 @@ const WNAF_WINDOW: usize = 5; #[cfg(feature = "alloc")] const GLV_LE_BYTES: usize = 17; +/// Little-endian byte encoding of a scalar. +/// +/// `Scalar::to_repr()` is big-endian; `WnafScalar::from_le_bytes` consumes little-endian bytes, so +/// reverse it. (This replaces the forked `PrimeField::to_le_repr` the wNAF work previously relied +/// on, which no longer exists upstream.) +#[cfg(feature = "alloc")] +fn to_le_bytes(scalar: &Scalar) -> FieldBytes { + let mut repr = scalar.to_repr(); + repr.reverse(); + repr +} + /// GLV-decompose `k` for point `p`: returns two `(WnafBase, WnafScalar)` pairs representing /// `r1 * p_signed` and `r2 * endomorphism(p_signed)`, with signs folded into the points. #[cfg(feature = "alloc")] @@ -353,9 +364,13 @@ fn glv_wnaf_pair( let p2 = if r2_neg { -p_beta } else { p_beta }; let bases = [WnafBase::new(p1), WnafBase::new(p2)]; + // GLV guarantees each half-scalar has magnitude < 2^128, far below the curve order, so the + // canonical-range check in `from_le_bytes` cannot fail for these inputs. let scalars = [ - WnafScalar::from_le_bytes(&r1.to_le_repr()[..GLV_LE_BYTES]), - WnafScalar::from_le_bytes(&r2.to_le_repr()[..GLV_LE_BYTES]), + WnafScalar::from_le_bytes(&to_le_bytes(&r1)[..GLV_LE_BYTES]) + .expect("GLV half-scalar is in range"), + WnafScalar::from_le_bytes(&to_le_bytes(&r2)[..GLV_LE_BYTES]) + .expect("GLV half-scalar is in range"), ]; (bases, scalars) } From b986ce7351a08097e3b89131a751c5f0d9c27ce7 Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Sat, 6 Jun 2026 16:33:55 -0600 Subject: [PATCH 7/9] k256: tidy GLV vartime mul structure Pure cleanup of the GLV+wNAF vartime path, no behavior change: - Fold the loose `glv_wnaf_pair` / `mul_vartime_impl` / `mul_and_mul_add_vartime_impl` free functions into an `#[cfg(alloc)] impl ProjectivePoint` as methods, alongside the existing `mul_by_generator`. - Replace the in-body `#[cfg]` ladders in the `MulVartime` / `MulByGeneratorVartime` impls with per-fn `#[cfg]` definitions, matching the `mul_by_generator` twin-definition idiom. Without `alloc`, `mul_vartime` is plain `self * rhs` and the trait's default `mul_by_generator_and_mul_add_vartime` applies (no override needed). - Drop the `to_le_bytes` helper and its redundant big-endian->little- endian reverse: read the scalar's little-endian bytes directly via `U256::to_le_byte_array`. The wNAF path needs LE, and `to_repr` is BE, so the previous reverse was immediately undone inside `from_le_bytes`. - Replace the `.expect` on `from_le_bytes` with an `unwrap_or_else` fallback to the infallible full-width `WnafScalar::new`, removing the panic path from a crypto routine. Verified: builds across the feature matrix (arithmetic / schnorr, each with and without alloc; alloc without precomputed-tables); all k256 tests pass; schnorr verify still ~20% faster than master. Co-Authored-By: Claude Opus 4.8 (1M context) --- k256/src/arithmetic/mul.rs | 229 +++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 126 deletions(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index d55353ee5..89087f4d0 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -45,9 +45,7 @@ use elliptic_curve::{ }; #[cfg(feature = "alloc")] -use crate::FieldBytes; -#[cfg(feature = "alloc")] -use elliptic_curve::{PrimeField, WnafBase, WnafScalar}; +use elliptic_curve::{WnafBase, WnafScalar, bigint::ArrayEncoding}; #[cfg(feature = "precomputed-tables")] use super::tables::BASEPOINT_TABLE; @@ -331,73 +329,61 @@ const WNAF_WINDOW: usize = 5; #[cfg(feature = "alloc")] const GLV_LE_BYTES: usize = 17; -/// Little-endian byte encoding of a scalar. +/// GLV + wNAF variable-time scalar multiplication. /// -/// `Scalar::to_repr()` is big-endian; `WnafScalar::from_le_bytes` consumes little-endian bytes, so -/// reverse it. (This replaces the forked `PrimeField::to_le_repr` the wNAF work previously relied -/// on, which no longer exists upstream.) -#[cfg(feature = "alloc")] -fn to_le_bytes(scalar: &Scalar) -> FieldBytes { - let mut repr = scalar.to_repr(); - repr.reverse(); - repr -} - -/// GLV-decompose `k` for point `p`: returns two `(WnafBase, WnafScalar)` pairs representing -/// `r1 * p_signed` and `r2 * endomorphism(p_signed)`, with signs folded into the points. -#[cfg(feature = "alloc")] -fn glv_wnaf_pair( - p: &ProjectivePoint, - k: &Scalar, -) -> ( - [WnafBase; 2], - [WnafScalar; 2], -) { - let (r1, r2) = decompose_scalar(k); - let r1_neg = bool::from(r1.is_high()); - let r2_neg = bool::from(r2.is_high()); - let r1 = if r1_neg { -r1 } else { r1 }; - let r2 = if r2_neg { -r2 } else { r2 }; - - let p1 = if r1_neg { -*p } else { *p }; - let p_beta = p.endomorphism(); - let p2 = if r2_neg { -p_beta } else { p_beta }; - - let bases = [WnafBase::new(p1), WnafBase::new(p2)]; - // GLV guarantees each half-scalar has magnitude < 2^128, far below the curve order, so the - // canonical-range check in `from_le_bytes` cannot fail for these inputs. - let scalars = [ - WnafScalar::from_le_bytes(&to_le_bytes(&r1)[..GLV_LE_BYTES]) - .expect("GLV half-scalar is in range"), - WnafScalar::from_le_bytes(&to_le_bytes(&r2)[..GLV_LE_BYTES]) - .expect("GLV half-scalar is in range"), - ]; - (bases, scalars) -} - -/// Variable-time `k * P` using GLV + width-5 wNAF. +/// These require heap-allocated wNAF tables (via the `group` crate's `WnafBase`/`WnafScalar`), so +/// the whole block is gated on `alloc`. Without `alloc`, the `MulVartime`/`MulByGeneratorVartime` +/// impls fall back to constant-time multiplication and the trait-provided default combinators. /// -/// SECURITY: not constant time. Only call with non-secret scalars. +/// SECURITY: these are not constant time and must only be called with non-secret scalars. #[cfg(feature = "alloc")] -fn mul_vartime_impl(p: &ProjectivePoint, k: &Scalar) -> ProjectivePoint { - let (bases, scalars) = glv_wnaf_pair(p, k); - WnafBase::multiscalar_mul_array(&scalars, &bases) -} +impl ProjectivePoint { + /// GLV-decompose `k` for `self`: two `(WnafBase, WnafScalar)` pairs representing + /// `r1 * self_signed` and `r2 * endomorphism(self_signed)`, with signs folded into the points. + fn glv_wnaf_pair( + &self, + k: &Scalar, + ) -> ( + [WnafBase; 2], + [WnafScalar; 2], + ) { + let (r1, r2) = decompose_scalar(k); + let r1_neg = bool::from(r1.is_high()); + let r2_neg = bool::from(r2.is_high()); + let r1 = if r1_neg { -r1 } else { r1 }; + let r2 = if r2_neg { -r2 } else { r2 }; + + let p1 = if r1_neg { -*self } else { *self }; + let p_beta = self.endomorphism(); + let p2 = if r2_neg { -p_beta } else { p_beta }; + + let bases = [WnafBase::new(p1), WnafBase::new(p2)]; + // GLV guarantees each half-scalar fits in `GLV_LE_BYTES`, so the truncated little-endian + // encoding round-trips and `from_le_bytes`'s canonical-range check always succeeds. Should + // that invariant ever fail to hold, fall back to the full-width `new` rather than panicking; + // it produces an identical (just slower) result for any in-range scalar. + let scalars = [ + WnafScalar::from_le_bytes(&r1.0.to_le_byte_array()[..GLV_LE_BYTES]) + .unwrap_or_else(|_| WnafScalar::new(&r1)), + WnafScalar::from_le_bytes(&r2.0.to_le_byte_array()[..GLV_LE_BYTES]) + .unwrap_or_else(|_| WnafScalar::new(&r2)), + ]; + (bases, scalars) + } -/// Variable-time `a * G + b * P`, sharing doublings across all 4 GLV sub-scalars. -/// -/// SECURITY: not constant time. Only call with non-secret scalars. -#[cfg(feature = "alloc")] -fn mul_and_mul_add_vartime_impl(a: &Scalar, b: &Scalar, p: &ProjectivePoint) -> ProjectivePoint { - let (g_bases, g_scalars) = glv_wnaf_pair(&ProjectivePoint::GENERATOR, a); - let (p_bases, p_scalars) = glv_wnaf_pair(p, b); + /// Variable-time `k * self` using GLV + width-5 wNAF. + fn mul_vartime_glv(&self, k: &Scalar) -> ProjectivePoint { + let (bases, scalars) = self.glv_wnaf_pair(k); + WnafBase::multiscalar_mul_array(&scalars, &bases) + } - let [gb0, gb1] = g_bases; - let [gs0, gs1] = g_scalars; - let [pb0, pb1] = p_bases; - let [ps0, ps1] = p_scalars; + /// Variable-time `a * G + b * self`, sharing doublings across all 4 GLV sub-scalars. + fn mul_add_vartime_glv(&self, a: &Scalar, b: &Scalar) -> ProjectivePoint { + let ([gb0, gb1], [gs0, gs1]) = ProjectivePoint::GENERATOR.glv_wnaf_pair(a); + let ([pb0, pb1], [ps0, ps1]) = self.glv_wnaf_pair(b); - WnafBase::multiscalar_mul_array(&[gs0, gs1, ps0, ps1], &[gb0, gb1, pb0, pb1]) + WnafBase::multiscalar_mul_array(&[gs0, gs1, ps0, ps1], &[gb0, gb1, pb0, pb1]) + } } impl ProjectivePoint { @@ -457,73 +443,67 @@ impl Mul<&Scalar> for ProjectivePoint { } impl MulVartime for ProjectivePoint { + #[cfg(feature = "alloc")] fn mul_vartime(self, other: Scalar) -> ProjectivePoint { - #[cfg(feature = "alloc")] - { - mul_vartime_impl(&self, &other) - } - #[cfg(not(feature = "alloc"))] - { - self * other - } + self.mul_vartime_glv(&other) + } + + #[cfg(not(feature = "alloc"))] + fn mul_vartime(self, other: Scalar) -> ProjectivePoint { + self * other } } impl MulVartime<&Scalar> for &ProjectivePoint { + #[cfg(feature = "alloc")] fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - #[cfg(feature = "alloc")] - { - mul_vartime_impl(self, other) - } - #[cfg(not(feature = "alloc"))] - { - self * other - } + self.mul_vartime_glv(other) + } + + #[cfg(not(feature = "alloc"))] + fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { + self * other } } impl MulVartime<&Scalar> for ProjectivePoint { + #[cfg(feature = "alloc")] fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { - #[cfg(feature = "alloc")] - { - mul_vartime_impl(&self, other) - } - #[cfg(not(feature = "alloc"))] - { - self * other - } + self.mul_vartime_glv(other) + } + + #[cfg(not(feature = "alloc"))] + fn mul_vartime(self, other: &Scalar) -> ProjectivePoint { + self * other } } impl MulByGeneratorVartime for ProjectivePoint { + // With precomputed basepoint tables, fixed-base multiplication beats GLV+wNAF. Otherwise use + // the (alloc-only) GLV path, falling back to plain multiplication when neither is available. + #[cfg(feature = "precomputed-tables")] fn mul_by_generator_vartime(k: &Scalar) -> ProjectivePoint { - #[cfg(feature = "precomputed-tables")] - { - Self::mul_by_generator(k) - } - #[cfg(all(not(feature = "precomputed-tables"), feature = "alloc"))] - { - mul_vartime_impl(&Self::GENERATOR, k) - } - #[cfg(not(any(feature = "precomputed-tables", feature = "alloc")))] - { - Self::mul_by_generator(k) - } + Self::mul_by_generator(k) } + #[cfg(all(not(feature = "precomputed-tables"), feature = "alloc"))] + fn mul_by_generator_vartime(k: &Scalar) -> ProjectivePoint { + Self::GENERATOR.mul_vartime_glv(k) + } + + #[cfg(not(any(feature = "precomputed-tables", feature = "alloc")))] + fn mul_by_generator_vartime(k: &Scalar) -> ProjectivePoint { + Self::mul_by_generator(k) + } + + // Without `alloc`, the trait's default (`aG + bP` via two separate `mul_vartime` calls) applies. + #[cfg(feature = "alloc")] fn mul_by_generator_and_mul_add_vartime( a: &Self::Scalar, b_scalar: &Self::Scalar, b_point: &Self, ) -> Self { - #[cfg(feature = "alloc")] - { - mul_and_mul_add_vartime_impl(a, b_scalar, b_point) - } - #[cfg(not(feature = "alloc"))] - { - Self::lincomb(&[(Self::GENERATOR, *a), (*b_point, *b_scalar)]) - } + b_point.mul_add_vartime_glv(a, b_scalar) } } @@ -568,48 +548,47 @@ mod tests { } #[test] - #[cfg(feature = "getrandom")] + #[cfg(all(feature = "alloc", feature = "getrandom"))] fn test_mul_vartime() { for _ in 0..32 { let p = ProjectivePoint::generate(); let k = Scalar::generate(); let reference = p * k; - let test = mul_vartime_impl(&p, &k); + let test = p.mul_vartime_glv(&k); assert_eq!(reference, test); } } #[test] - #[cfg(feature = "getrandom")] + #[cfg(all(feature = "alloc", feature = "getrandom"))] fn test_mul_and_mul_add_vartime() { for _ in 0..32 { let p = ProjectivePoint::generate(); let a = Scalar::generate(); let b = Scalar::generate(); let reference = ProjectivePoint::GENERATOR * a + p * b; - let test = mul_and_mul_add_vartime_impl(&a, &b, &p); + let test = p.mul_add_vartime_glv(&a, &b); assert_eq!(reference, test); } } #[test] + #[cfg(feature = "alloc")] fn test_mul_and_mul_add_vartime_edge_cases() { let p = ProjectivePoint::GENERATOR; assert_eq!( - mul_and_mul_add_vartime_impl(&Scalar::ZERO, &Scalar::ZERO, &p), + p.mul_add_vartime_glv(&Scalar::ZERO, &Scalar::ZERO), ProjectivePoint::IDENTITY ); assert_eq!( - mul_and_mul_add_vartime_impl(&Scalar::ONE, &Scalar::ZERO, &p), + p.mul_add_vartime_glv(&Scalar::ONE, &Scalar::ZERO), ProjectivePoint::GENERATOR ); - assert_eq!( - mul_and_mul_add_vartime_impl(&Scalar::ZERO, &Scalar::ONE, &p), - p - ); + assert_eq!(p.mul_add_vartime_glv(&Scalar::ZERO, &Scalar::ONE), p); } #[test] + #[cfg(feature = "alloc")] fn test_mul_vartime_adversarial_scalars() { let p = ProjectivePoint::GENERATOR; let mut bytes = [0u8; 32]; @@ -618,7 +597,7 @@ mod tests { } let k = Scalar::from_bytes_unchecked(&bytes); let reference = p * k; - let test = mul_vartime_impl(&p, &k); + let test = p.mul_vartime_glv(&k); assert_eq!( reference, test, "mul_vartime mismatch on adversarial scalar" @@ -626,16 +605,14 @@ mod tests { } #[test] + #[cfg(feature = "alloc")] fn test_mul_vartime_edge_cases() { let p = ProjectivePoint::GENERATOR; + assert_eq!(p.mul_vartime_glv(&Scalar::ZERO), ProjectivePoint::IDENTITY); + assert_eq!(p.mul_vartime_glv(&Scalar::ONE), p); + assert_eq!(p.mul_vartime_glv(&-Scalar::ONE), -p); assert_eq!( - mul_vartime_impl(&p, &Scalar::ZERO), - ProjectivePoint::IDENTITY - ); - assert_eq!(mul_vartime_impl(&p, &Scalar::ONE), p); - assert_eq!(mul_vartime_impl(&p, &-Scalar::ONE), -p); - assert_eq!( - mul_vartime_impl(&ProjectivePoint::IDENTITY, &Scalar::ONE), + ProjectivePoint::IDENTITY.mul_vartime_glv(&Scalar::ONE), ProjectivePoint::IDENTITY ); } From 39e2ae23b2e936eec8bbd750e1aec4dfe11b54ac Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Sat, 6 Jun 2026 17:37:42 -0600 Subject: [PATCH 8/9] k256: restore lincomb for no-alloc mul_by_generator_and_mul_add_vartime The GLV rework only defined mul_by_generator_and_mul_add_vartime under `#[cfg(feature = "alloc")]`, so without `alloc` the impl fell through to the trait default, which computes `aG + bP` as two independent variable-time scalar multiplications (`mul_by_generator_vartime(a) + p.mul_vartime(b)`). That doubles the point doublings versus the pre-GLV behavior, where this was a single linear combination sharing doublings across both terms. Add a `#[cfg(not(feature = "alloc"))]` arm that restores the original `Self::lincomb(&[(G, a), (b_point, b)])`. The array-based LinearCombination impl uses stack tables and is not gated on `alloc`, so it works in no_std verifiers (the main consumers of the no-alloc path). Verified the fallback matches `aG + bP` and the identity/zero edge cases in both no-alloc configs (with and without precomputed-tables + critical-section). Co-Authored-By: Claude Opus 4.8 (1M context) --- k256/src/arithmetic/mul.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 89087f4d0..054f10617 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -496,7 +496,6 @@ impl MulByGeneratorVartime for ProjectivePoint { Self::mul_by_generator(k) } - // Without `alloc`, the trait's default (`aG + bP` via two separate `mul_vartime` calls) applies. #[cfg(feature = "alloc")] fn mul_by_generator_and_mul_add_vartime( a: &Self::Scalar, @@ -505,6 +504,18 @@ impl MulByGeneratorVartime for ProjectivePoint { ) -> Self { b_point.mul_add_vartime_glv(a, b_scalar) } + + // Without `alloc` there is no wNAF table, so fall back to a linear combination, which shares + // doublings across both terms. This avoids the trait default's two independent scalar mults + // (`aG + bP` via separate `mul_vartime` calls), matching the pre-GLV behavior. + #[cfg(not(feature = "alloc"))] + fn mul_by_generator_and_mul_add_vartime( + a: &Self::Scalar, + b_scalar: &Self::Scalar, + b_point: &Self, + ) -> Self { + Self::lincomb(&[(Self::GENERATOR, *a), (*b_point, *b_scalar)]) + } } impl MulAssign for ProjectivePoint { From db2db3ad1f1a1c2d3687e2b4bdea94ad0493f651 Mon Sep 17 00:00:00 2001 From: 42pupusas Date: Tue, 9 Jun 2026 17:38:47 -0600 Subject: [PATCH 9/9] k256: import wNAF types from the in-repo wnaf crate As of #1779 the forked WnafBase/WnafScalar implementation lives in the wnaf crate in this repository, re-exported by elliptic-curve behind its `wnaf` feature; the root re-exports are gone from traits master. Enable elliptic-curve/wnaf from the k256 alloc feature and import via elliptic_curve::wnaf, matching primeorder. from_le_bytes returns Option here rather than Result; adjust the fallback closures to match. Co-Authored-By: Claude Fable 5 --- k256/Cargo.toml | 2 +- k256/src/arithmetic/mul.rs | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/k256/Cargo.toml b/k256/Cargo.toml index 78d27cf97..5f71fd02b 100644 --- a/k256/Cargo.toml +++ b/k256/Cargo.toml @@ -43,7 +43,7 @@ sha3 = { version = "0.12", default-features = false } [features] default = ["arithmetic", "ecdsa", "pkcs8", "precomputed-tables", "schnorr", "std"] -alloc = ["ecdsa-core?/alloc", "elliptic-curve/alloc"] +alloc = ["ecdsa-core?/alloc", "elliptic-curve/alloc", "elliptic-curve/wnaf"] std = ["alloc", "ecdsa-core?/std", "elliptic-curve/std", "getrandom"] arithmetic = ["elliptic-curve/arithmetic"] diff --git a/k256/src/arithmetic/mul.rs b/k256/src/arithmetic/mul.rs index 054f10617..15293ca31 100644 --- a/k256/src/arithmetic/mul.rs +++ b/k256/src/arithmetic/mul.rs @@ -45,7 +45,10 @@ use elliptic_curve::{ }; #[cfg(feature = "alloc")] -use elliptic_curve::{WnafBase, WnafScalar, bigint::ArrayEncoding}; +use elliptic_curve::{ + bigint::ArrayEncoding, + wnaf::{WnafBase, WnafScalar}, +}; #[cfg(feature = "precomputed-tables")] use super::tables::BASEPOINT_TABLE; @@ -331,7 +334,7 @@ const GLV_LE_BYTES: usize = 17; /// GLV + wNAF variable-time scalar multiplication. /// -/// These require heap-allocated wNAF tables (via the `group` crate's `WnafBase`/`WnafScalar`), so +/// These require heap-allocated wNAF tables (via the `wnaf` crate's `WnafBase`/`WnafScalar`), so /// the whole block is gated on `alloc`. Without `alloc`, the `MulVartime`/`MulByGeneratorVartime` /// impls fall back to constant-time multiplication and the trait-provided default combinators. /// @@ -364,9 +367,9 @@ impl ProjectivePoint { // it produces an identical (just slower) result for any in-range scalar. let scalars = [ WnafScalar::from_le_bytes(&r1.0.to_le_byte_array()[..GLV_LE_BYTES]) - .unwrap_or_else(|_| WnafScalar::new(&r1)), + .unwrap_or_else(|| WnafScalar::new(&r1)), WnafScalar::from_le_bytes(&r2.0.to_le_byte_array()[..GLV_LE_BYTES]) - .unwrap_or_else(|_| WnafScalar::new(&r2)), + .unwrap_or_else(|| WnafScalar::new(&r2)), ]; (bases, scalars) }