From 8d98351cc74301caa3791008ee187a8f0c1faffe Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 4 Jul 2026 13:50:35 +0000 Subject: [PATCH] onebrc/lane-j: wire GridBatch through ndarray MultiLaneColumn (D-DNV-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The proven 64×64 gridlake batch table (E-1BRC-GRIDLAKE-SWEETSPOT-1) now renders its four accumulator columns as ndarray::simd::MultiLaneColumn gridlake carriers — min/max on the i32x16 lane, sum on i64x8, count (widened, non-negative) on u64x8, the integer lanes ndarray added for exactly this. GridBatch::as_gridlake_columns is a little-endian reading (zero re-layout), returning Err(()) on a mis-aligned grid to mirror MultiLaneColumn::new's own 64-byte contract. This is D-DNV-1 of the DeepNSM→V3 convergence plan: the batch table is not a bespoke struct, it is typed lanes over one carrier — the same MultiLaneColumn the COCA cognitive Cell (helix48/campq48/count/truth) composes from. Wire, don't invent. - Cargo.toml: lane-j feature pulls ndarray (default-features=false, std). - lane_j.rs: GridlakeColumns + as_gridlake_columns + 2 tests (LE roundtrip cell-for-cell against the source accumulators, incl. lane-boundary and tile-edge cells; unaligned-grid reject). Typed lanes exercised via len_i32x16/iter_i32x16 == grid/16, i64x8/u64x8 == grid/8. - STATUS_BOARD: deepnsm-v3-convergence-v1 section, D-DNV-1 In PR. Verified: cargo test --features lane-j (2/2 green); lane_j.rs fmt + clippy clean. Pre-existing fmt/clippy debt in sibling lanes (lane_s/lane_t) left untouched. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01MLBnPuScZy6w9di2QEjsXM --- .claude/board/STATUS_BOARD.md | 11 +++ crates/onebrc-probe/Cargo.toml | 6 +- crates/onebrc-probe/src/lane_j.rs | 135 +++++++++++++++++++++++++++++- 3 files changed, 149 insertions(+), 3 deletions(-) diff --git a/.claude/board/STATUS_BOARD.md b/.claude/board/STATUS_BOARD.md index 32b68225..4905a648 100644 --- a/.claude/board/STATUS_BOARD.md +++ b/.claude/board/STATUS_BOARD.md @@ -1,3 +1,14 @@ +## deepnsm-v3-convergence-v1 — DeepNSM is the encoder that fills reserved tenants + +Plan: `.claude/plans/deepnsm-v3-convergence-v1.md` (`E-V3-DEEPNSM-IS-THE-ENCODER-NOT-A-MIGRATION-1`). Static convergence PROVEN by #624 P0–P5; the memory layer is the genuinely-unbuilt seam. Extends `v3-convergence-wiring-v1` (wire-don't-invent). + +| D-id | Title | Crate(s) | Status | Evidence | +|---|---|---|---|---| +| D-DNV-1 | Gridlake carrier: `GridBatch::as_gridlake_columns` → `ndarray::simd::MultiLaneColumn` (i32 min/max, i64 sum, u64 count); the carrier the COCA `Cell` also rides | onebrc-probe (+ndarray) | In PR | lane-j feature pulls ndarray; 2 tests green (LE roundtrip cell-for-cell + unaligned-grid reject); lane_j.rs clippy-clean | +| D-DNV-2 | deepnsm `SpoTriple` → `CausalEdge64` S/P/O+freq/conf → `MaterializedEdges`; run `nars_engine.all_projections()` (2³) over the COCA distance matrix | deepnsm + planner | Queued | buildable; extends #624 P3b | +| D-DNV-3 | arm-discovery as the 2nd proposer leg into one SpoStore (shares palette256 oracle) | arm-discovery + deepnsm | Blocked (ARM-JIRAK-FLOOR) | D-ARM-7 Jirak noise floor is the hard prereq | +| D-DNV-4 | Episodic-witness tenant + `basin=family` wake (`witness_tombstone` calcify chain) | contract + arigraph | Blocked (own wave + probe) | no episodic-witness ValueTenant; calcify chain is `todo!()`; basin=family doc-only | + ## v3-substrate-integration-v1 — the .claude/v3/ consolidation (W0–W6) Plan: `.claude/v3/INTEGRATION-PLAN.md` (stub: `.claude/plans/v3-substrate-integration-v1.md`). Adopts (does not re-mint) D-MBX-A6, D-PERT-1, D-CC-*, D-VCW-3/5/7, D-CCF-4. diff --git a/crates/onebrc-probe/Cargo.toml b/crates/onebrc-probe/Cargo.toml index f34dc3c3..6d4e5e24 100644 --- a/crates/onebrc-probe/Cargo.toml +++ b/crates/onebrc-probe/Cargo.toml @@ -43,8 +43,10 @@ lane-h = ["lane-g"] # flush-cache interleaving - same dep set as lane-g. lane-i = ["dep:lance-graph-contract", "dep:ractor", "dep:tokio"] # Lane J (parameterized batch pipeline): lane I's shape with grid / -# sink-lanes / registry knobs — needs lane-i's RowOwner. -lane-j = ["lane-i"] +# sink-lanes / registry knobs — needs lane-i's RowOwner. Also pulls +# ndarray for the gridlake carrier (`GridBatch::as_gridlake_columns` → +# `ndarray::simd::MultiLaneColumn`; DeepNSM→V3 D-DNV-1). +lane-j = ["lane-i", "dep:ndarray"] # All 8 batching-method presets (src/presets.rs) — the lab-sweep surface; # see FINDINGS.md (agnostic record) + COMMENTARY.md (interpretation). presets = ["lane-g", "lane-h", "lane-i", "lane-j"] diff --git a/crates/onebrc-probe/src/lane_j.rs b/crates/onebrc-probe/src/lane_j.rs index da4e418e..147de6cf 100644 --- a/crates/onebrc-probe/src/lane_j.rs +++ b/crates/onebrc-probe/src/lane_j.rs @@ -43,6 +43,7 @@ use crate::lane_f::{fnv1a64, morton_slot}; use crate::lane_i::RowOwner; use crate::{chunk_bounds, merge_maps, parse_temp_tenths, Stats}; use lance_graph_contract::kanban::{ExecTarget, KanbanColumn, KanbanMove}; +use ndarray::simd::MultiLaneColumn; use ractor::{Actor, ActorProcessingErr, ActorRef, RpcReplyPort}; use std::collections::{BTreeMap, VecDeque}; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -133,7 +134,7 @@ impl GridMemo { // ─── Grid batch table (the gridlake SoA unit at grid=4096: ~80 KB) ────── -pub(crate) struct GridBatch { +pub struct GridBatch { mins: Vec, maxs: Vec, sums: Vec, @@ -180,6 +181,70 @@ impl GridBatch { } } +// ─── Gridlake carrier: the batch table AS ndarray MultiLaneColumns ────── + +/// The lane-J `GridBatch` accumulators rendered as `ndarray::simd` +/// [`MultiLaneColumn`] gridlake carriers — the SoA-contract carrier the +/// proven 64×64 gridlake tile rides (`E-1BRC-GRIDLAKE-SWEETSPOT-1`), and the +/// **same** `MultiLaneColumn` the COCA cognitive `Cell` +/// (helix48/campq48/count/truth, `crates/deepnsm/examples/gridlake_coca_wire.rs`) +/// composes from. This is the DeepNSM→V3 D-DNV-1 recognition +/// (`.claude/plans/deepnsm-v3-convergence-v1.md`, +/// `E-V3-DEEPNSM-IS-THE-ENCODER-NOT-A-MIGRATION-1`): the batch table is not a +/// bespoke struct, it is typed lanes over one carrier — "wire, don't invent." +/// +/// Lane widths follow the integer lanes ndarray added for exactly this +/// (`iter_i32x16` "min/max tile columns", `iter_i64x8` "running sums"): +/// min/max ride `I32x16`, sum rides `I64x8`, count (a non-negative +/// accumulator) rides `U64x8`. Each column's backing buffer is a 64-byte +/// multiple whenever `grid` is a multiple of 16 (i32·16 = i64·8 = u64·8 = +/// 64 B), which the gridlake `grid = 4096` satisfies. +pub struct GridlakeColumns { + pub mins: MultiLaneColumn, + pub maxs: MultiLaneColumn, + pub sums: MultiLaneColumn, + pub counts: MultiLaneColumn, +} + +impl GridBatch { + /// Render the four accumulator columns as [`MultiLaneColumn`] gridlake + /// carriers (little-endian bytes, zero semantic change — a *reading*, not + /// a re-layout). `count` is widened `u32 → u64` to ride the unsigned + /// 64-bit accumulator lane. Returns `Err(())` if a column buffer is not + /// 64-byte aligned (i.e. `grid % 16 != 0`), mirroring + /// `MultiLaneColumn::new`'s own contract. + #[allow(clippy::result_unit_err)] // pass-through of MultiLaneColumn::new's Result<_, ()> alignment contract + pub fn as_gridlake_columns(&self) -> Result { + fn col_i32(v: &[i32]) -> Result { + let mut b = Vec::with_capacity(v.len() * 4); + for &x in v { + b.extend_from_slice(&x.to_le_bytes()); + } + MultiLaneColumn::new(Arc::from(b)) + } + fn col_i64(v: &[i64]) -> Result { + let mut b = Vec::with_capacity(v.len() * 8); + for &x in v { + b.extend_from_slice(&x.to_le_bytes()); + } + MultiLaneColumn::new(Arc::from(b)) + } + fn col_u64_from_u32(v: &[u32]) -> Result { + let mut b = Vec::with_capacity(v.len() * 8); + for &x in v { + b.extend_from_slice(&(x as u64).to_le_bytes()); + } + MultiLaneColumn::new(Arc::from(b)) + } + Ok(GridlakeColumns { + mins: col_i32(&self.mins)?, + maxs: col_i32(&self.maxs)?, + sums: col_i64(&self.sums)?, + counts: col_u64_from_u32(&self.counts)?, + }) + } +} + // ─── Laned sinks: each lane owns a contiguous row-range slice ─────────── enum LaneMsg { @@ -594,6 +659,74 @@ pub fn lane_j_grid_pipeline(data: &[u8], workers: usize) -> BTreeMap Vec { + c.as_bytes() + .chunks_exact(4) + .map(|b| i32::from_le_bytes(b.try_into().expect("4-byte i32 chunk"))) + .collect() + }; + let dec_i64 = |c: &MultiLaneColumn| -> Vec { + c.as_bytes() + .chunks_exact(8) + .map(|b| i64::from_le_bytes(b.try_into().expect("8-byte i64 chunk"))) + .collect() + }; + let dec_u64 = |c: &MultiLaneColumn| -> Vec { + c.as_bytes() + .chunks_exact(8) + .map(|b| u64::from_le_bytes(b.try_into().expect("8-byte u64 chunk"))) + .collect() + }; + assert_eq!(dec_i32(&cols.mins), batch.mins); + assert_eq!(dec_i32(&cols.maxs), batch.maxs); + assert_eq!(dec_i64(&cols.sums), batch.sums); + let counts_u64: Vec = batch.counts.iter().map(|&c| c as u64).collect(); + assert_eq!(dec_u64(&cols.counts), counts_u64); + } + + /// The carrier refuses a mis-aligned grid (not a multiple of 16) rather + /// than silently producing a non-64-byte column — the `MultiLaneColumn` + /// contract surfaced at the batch boundary. + #[test] + fn gridlake_carrier_rejects_unaligned_grid() { + let batch = GridBatch::new(72); // 72 % 16 != 0 → i32 col = 288 B, not 64-mult + assert!(batch.as_gridlake_columns().is_err()); + } + /// Parity across the knob matrix corners: gridlake (4096) and full /// (65536) grids × 1 and 8 sink lanes × registry on/off, all with a /// small batch to force multi-batch flush-cache recycling.