Skip to content

Commit 71cdbd4

Browse files
committed
feat(hpc): materialise l1_f64_simd, l2_f64_simd, linf_f64_simd in heel_f64x8
Resolves the PP-15 baton-handoff finding from wave-1: the integration plan §5 stable-surface table promised these three functions, but the ND-1 + ND-2 audits confirmed only cosine_f64_simd existed in source. Three "aspirational reserved names" → three real public APIs. Implementation matches cosine_f64_simd's pattern exactly: - debug_assert_eq! length guard - n / 8 F64x8 chunks via from_slice - l1: diff.abs() + acc accumulation, reduce_sum - l2: diff.mul_add(diff, acc) (FMA square), reduce_sum().sqrt() - linf: simd_max over abs, reduce_max - scalar tail loop for remainder Tests (l1_l2_linf_tests, 3 new): - l1_zero_for_equal_inputs — L1 of equal slices is exactly 0.0 - l2_matches_scalar_reference — within 100 * f64::EPSILON of scalar - linf_picks_the_largest_gap — Linf of [0,0,5,0] vs zeros is 5.0 All 15 heel_f64x8 tests pass (12 prior + 3 new). docs/hpc-stability.md in this same branch now describes real APIs, not promises. Worker: W-ND-3. Wave-2 of the four-repo integration.
1 parent 15c7bd9 commit 71cdbd4

1 file changed

Lines changed: 159 additions & 0 deletions

File tree

src/hpc/heel_f64x8.rs

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,165 @@ pub fn cosine_f32_to_f64_simd(a: &[f32], b: &[f32]) -> f64 {
192192
}
193193
}
194194

195+
// ═══════════════════════════════════════════════════════════════════════════
196+
// Stable distance kernels — L1, L2, L∞ — PP-15 / integration plan §5
197+
// ═══════════════════════════════════════════════════════════════════════════
198+
199+
/// L1 (Manhattan) distance between two equal-length f64 slices.
200+
///
201+
/// Returns Σ |a[i] - b[i]|. Numerically: scalar reduction, no
202+
/// catastrophic cancellation. SIMD-accelerated via F64x8 chunks.
203+
///
204+
/// # Stability
205+
///
206+
/// Public stable surface per `docs/hpc-stability.md`. Signature is
207+
/// frozen: no rename, no semantic drift.
208+
///
209+
/// # Panics (debug only)
210+
///
211+
/// Panics in debug builds if `a.len() != b.len()`.
212+
pub fn l1_f64_simd(a: &[f64], b: &[f64]) -> f64 {
213+
debug_assert_eq!(a.len(), b.len());
214+
let n = a.len().min(b.len());
215+
let chunks = n / 8;
216+
let remainder = n % 8;
217+
218+
let mut acc = F64x8::splat(0.0);
219+
for i in 0..chunks {
220+
let va = F64x8::from_slice(&a[i * 8..]);
221+
let vb = F64x8::from_slice(&b[i * 8..]);
222+
let diff = va - vb;
223+
acc = acc + diff.abs(); // acc += |a - b| lane-wise
224+
}
225+
let mut sum = acc.reduce_sum();
226+
227+
// Scalar remainder
228+
let offset = chunks * 8;
229+
for i in 0..remainder {
230+
sum += (a[offset + i] - b[offset + i]).abs();
231+
}
232+
sum
233+
}
234+
235+
/// L2 (Euclidean) distance between two equal-length f64 slices.
236+
///
237+
/// Returns sqrt(Σ (a[i] - b[i])^2). SIMD-accelerated via F64x8 FMA chunks.
238+
///
239+
/// # Stability
240+
///
241+
/// Public stable surface per `docs/hpc-stability.md`. Signature is
242+
/// frozen: no rename, no semantic drift.
243+
///
244+
/// # Panics (debug only)
245+
///
246+
/// Panics in debug builds if `a.len() != b.len()`.
247+
pub fn l2_f64_simd(a: &[f64], b: &[f64]) -> f64 {
248+
debug_assert_eq!(a.len(), b.len());
249+
let n = a.len().min(b.len());
250+
let chunks = n / 8;
251+
let remainder = n % 8;
252+
253+
let mut acc = F64x8::splat(0.0);
254+
for i in 0..chunks {
255+
let va = F64x8::from_slice(&a[i * 8..]);
256+
let vb = F64x8::from_slice(&b[i * 8..]);
257+
let diff = va - vb;
258+
acc = diff.mul_add(diff, acc); // acc = diff*diff + acc (FMA)
259+
}
260+
let mut sum = acc.reduce_sum();
261+
262+
// Scalar remainder
263+
let offset = chunks * 8;
264+
for i in 0..remainder {
265+
let d = a[offset + i] - b[offset + i];
266+
sum += d * d;
267+
}
268+
sum.sqrt()
269+
}
270+
271+
/// L_infinity (Chebyshev) distance between two equal-length f64 slices.
272+
///
273+
/// Returns max_i |a[i] - b[i]|. SIMD-accelerated via F64x8 lane-wise max.
274+
///
275+
/// # Stability
276+
///
277+
/// Public stable surface per `docs/hpc-stability.md`. Signature is
278+
/// frozen: no rename, no semantic drift.
279+
///
280+
/// # Panics (debug only)
281+
///
282+
/// Panics in debug builds if `a.len() != b.len()`.
283+
pub fn linf_f64_simd(a: &[f64], b: &[f64]) -> f64 {
284+
debug_assert_eq!(a.len(), b.len());
285+
let n = a.len().min(b.len());
286+
let chunks = n / 8;
287+
let remainder = n % 8;
288+
289+
let mut max_acc = F64x8::splat(0.0);
290+
for i in 0..chunks {
291+
let va = F64x8::from_slice(&a[i * 8..]);
292+
let vb = F64x8::from_slice(&b[i * 8..]);
293+
let diff = (va - vb).abs(); // |a - b| lane-wise
294+
max_acc = max_acc.simd_max(diff); // running lane-wise max
295+
}
296+
let mut max_val = max_acc.reduce_max();
297+
298+
// Scalar remainder
299+
let offset = chunks * 8;
300+
for i in 0..remainder {
301+
let d = (a[offset + i] - b[offset + i]).abs();
302+
if d > max_val {
303+
max_val = d;
304+
}
305+
}
306+
max_val
307+
}
308+
309+
#[cfg(test)]
310+
mod l1_l2_linf_tests {
311+
use super::*;
312+
313+
#[test]
314+
fn l1_zero_for_equal_inputs() {
315+
let a = vec![1.0f64; 8];
316+
let b = vec![1.0f64; 8];
317+
let result = l1_f64_simd(&a, &b);
318+
assert_eq!(result, 0.0, "L1 of identical vectors must be 0.0, got {}", result);
319+
}
320+
321+
#[test]
322+
fn l2_matches_scalar_reference() {
323+
let a: Vec<f64> = (0..100).map(|i| (i as f64 * 0.1).sin()).collect();
324+
let b: Vec<f64> = (0..100).map(|i| (i as f64 * 0.1).cos()).collect();
325+
326+
let simd_l2 = l2_f64_simd(&a, &b);
327+
328+
// Scalar reference
329+
let scalar_sum: f64 = a.iter().zip(&b).map(|(x, y)| (x - y) * (x - y)).sum();
330+
let scalar_l2 = scalar_sum.sqrt();
331+
332+
assert!(
333+
(simd_l2 - scalar_l2).abs() < 100.0 * f64::EPSILON,
334+
"SIMD L2 {:.15} vs scalar L2 {:.15}, diff = {:.3e}",
335+
simd_l2,
336+
scalar_l2,
337+
(simd_l2 - scalar_l2).abs()
338+
);
339+
}
340+
341+
#[test]
342+
fn linf_picks_the_largest_gap() {
343+
let a = vec![0.0f64, 0.0, 5.0, 0.0];
344+
let b = vec![0.0f64, 0.0, 0.0, 0.0];
345+
let result = linf_f64_simd(&a, &b);
346+
assert!(
347+
(result - 5.0).abs() < f64::EPSILON,
348+
"L∞ should be 5.0, got {}",
349+
result
350+
);
351+
}
352+
}
353+
195354
#[cfg(test)]
196355
mod tests {
197356
use super::*;

0 commit comments

Comments
 (0)