From e849f512f15ac303b4163357c638d777989a907b Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 19 May 2026 11:32:48 +0300 Subject: [PATCH 1/3] test: round1-2 RFL coverage push (10 new files) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds happy-path RFL coverage for recently-introduced operators and public-API surfaces that had 0%/low coverage on upstream master. Round 1: - rfl/agg/rowform_topk.rfl — OP_GROUP_TOPK_ROWFORM / BOTK - rfl/agg/rowform_maxmin.rfl — OP_GROUP_MAXMIN_ROWFORM - rfl/agg/rowform_sum_count.rfl — OP_GROUP_SUM_COUNT_ROWFORM (3..8 keys) - rfl/sort/fused_topn.rfl — top/bot over filtered vectors - rfl/query/per_group_buf.rfl — nonagg_eval_per_group(_buf), const_str_expr_copy - rfl/query/parallel_probe.rfl — idxbuf_hist_fn / idxbuf_scat_fn parallel row->gid probe path Round 2: - rfl/temporal/extract.rfl — yyyy/mm/dd/hh/ss/minute/dow/doy - rfl/agg/variance.rfl — var / var_pop / stddev / stddev_pop / dev - rfl/io/csv_splayed.rfl — csv_splayed_writer_*, GUID writer, col_copy_str_pool roundtrip - rfl/hof/wrappers.rfl — pmap / fold-left / scan-left dispatchers All tests happy-path only (correct types / shapes); error/null/wrong-type branches deferred to a future round. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/rfl/agg/rowform_maxmin.rfl | 103 ++++++++++++++++ test/rfl/agg/rowform_sum_count.rfl | 132 +++++++++++++++++++++ test/rfl/agg/rowform_topk.rfl | 125 ++++++++++++++++++++ test/rfl/agg/variance.rfl | 114 ++++++++++++++++++ test/rfl/hof/wrappers.rfl | 129 ++++++++++++++++++++ test/rfl/io/csv_splayed.rfl | 181 +++++++++++++++++++++++++++++ test/rfl/query/parallel_probe.rfl | 107 +++++++++++++++++ test/rfl/query/per_group_buf.rfl | 136 ++++++++++++++++++++++ test/rfl/sort/fused_topn.rfl | 94 +++++++++++++++ test/rfl/temporal/extract.rfl | 142 ++++++++++++++++++++++ 10 files changed, 1263 insertions(+) create mode 100644 test/rfl/agg/rowform_maxmin.rfl create mode 100644 test/rfl/agg/rowform_sum_count.rfl create mode 100644 test/rfl/agg/rowform_topk.rfl create mode 100644 test/rfl/agg/variance.rfl create mode 100644 test/rfl/hof/wrappers.rfl create mode 100644 test/rfl/io/csv_splayed.rfl create mode 100644 test/rfl/query/parallel_probe.rfl create mode 100644 test/rfl/query/per_group_buf.rfl create mode 100644 test/rfl/sort/fused_topn.rfl create mode 100644 test/rfl/temporal/extract.rfl diff --git a/test/rfl/agg/rowform_maxmin.rfl b/test/rfl/agg/rowform_maxmin.rfl new file mode 100644 index 00000000..56cacb21 --- /dev/null +++ b/test/rfl/agg/rowform_maxmin.rfl @@ -0,0 +1,103 @@ +;; ════════════════════════════════════════════════════════════════════ +;; ROWFORM per-group max(x) + min(y) (src/ops/group.c: exec_group_maxmin_rowform) +;; +;; Planner gate (src/ops/query.c:5985) routes +;; (select {a: (max x) b: (min y) by: from: T}) +;; to OP_GROUP_MAXMIN_ROWFORM when: +;; - exactly 2 aggs, the first OP_MAX and second OP_MIN +;; - 1 key, no where, no non-agg +;; - key, x, y all simple OP_SCAN +;; - key type in {I64,I32,I16,U8,BOOL,DATE,TIME,TIMESTAMP,SYM} +;; - x, y types in {I64,I32,I16,U8,BOOL} (integer only — F64 falls back) +;; +;; ROWFORM emits one row per group with columns [key, x, y] where x +;; holds per-group max and y per-group min. Group order is partition- +;; induced, so tests use sum / membership rather than positional checks. +;; +;; Parallel threshold: nrows >= 16384 (src/ops/group.c:10482). +;; ════════════════════════════════════════════════════════════════════ + +;; ─── basic shape: I64 key, I64 x, I64 y ───────────────────────────── +(set T (table [k x y] (list (as 'I64 [0 0 0 1 1 1]) (as 'I64 [3 1 5 2 7 4]) (as 'I64 [50 30 70 20 60 10])))) +;; g=0: x={3,1,5} max=5; y={50,30,70} min=30 +;; g=1: x={2,7,4} max=7; y={20,60,10} min=10 +(count (select {mx: (max x) mn: (min y) by: k from: T})) -- 2 +(sum (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mx)) -- 12 +(sum (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mn)) -- 40 +;; Output column types match source: I64 throughout. +(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mx)) -- 'I64 +(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'mn)) -- 'I64 +(type (at (select {mx: (max x) mn: (min y) by: k from: T}) 'k)) -- 'I64 + +;; ─── SYM key (gate allows it) ─────────────────────────────────────── +(set Ts (table [k x y] (list [A A A B B] (as 'I64 [1 5 3 2 4]) (as 'I64 [9 2 8 7 6])))) +;; g=A: max x=5, min y=2; g=B: max x=4, min y=6 +(count (select {mx: (max x) mn: (min y) by: k from: Ts})) -- 2 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'mx)) -- 9 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'mn)) -- 8 +(type (at (select {mx: (max x) mn: (min y) by: k from: Ts}) 'k)) -- 'SYM + +;; ─── narrow integer key + narrow integer values ───────────────────── +(set Ti32 (table [k x y] (list (as 'I32 [0 0 1 1]) (as 'I32 [5 7 11 13]) (as 'I32 [20 10 50 40])))) +;; g=0: max=7, min=10; g=1: max=13, min=40 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mx)) -- 20 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mn)) -- 50 +(type (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mx)) -- 'I32 +(type (at (select {mx: (max x) mn: (min y) by: k from: Ti32}) 'mn)) -- 'I32 + +(set Ti16 (table [k x y] (list (as 'I16 [0 0 1 1]) (as 'I16 [10 20 30 40]) (as 'I16 [-1 -2 3 -4])))) +;; g=0: max=20, min=-2; g=1: max=40, min=-4. Sums: 60, -6. +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti16}) 'mx)) -- 60 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Ti16}) 'mn)) -- -6 + +(set Tu8 (table [k x y] (list (as 'U8 [0 0 1 1]) (as 'U8 [10 30 5 7]) (as 'U8 [40 60 20 80])))) +;; g=0: max=30, min=40; g=1: max=7, min=20. Sums: 37, 60. +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tu8}) 'mx)) -- 37 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tu8}) 'mn)) -- 60 + +;; ─── BOOL x, BOOL y (degenerate but supported) ────────────────────── +(set Tb (table [k x y] (list [A A B B] [false true true false] [true false true true]))) +;; g=A: max x = true; min y = false. g=B: max x = true; min y = true. +(count (select {mx: (max x) mn: (min y) by: k from: Tb})) -- 2 +;; sum of BOOL coerces to I64: true=1, false=0. mx: 1+1=2, mn: 0+1=1. +(sum (as 'I64 (at (select {mx: (max x) mn: (min y) by: k from: Tb}) 'mx))) -- 2 +(sum (as 'I64 (at (select {mx: (max x) mn: (min y) by: k from: Tb}) 'mn))) -- 1 + +;; ─── single group ─────────────────────────────────────────────────── +(set T1 (table [k x y] (list (as 'I64 [0 0 0 0 0]) (as 'I64 [3 1 5 2 7]) (as 'I64 [50 30 70 20 60])))) +(count (select {mx: (max x) mn: (min y) by: k from: T1})) -- 1 +;; max x = 7, min y = 20 +(at (at (select {mx: (max x) mn: (min y) by: k from: T1}) 'mx) 0) -- 7 +(at (at (select {mx: (max x) mn: (min y) by: k from: T1}) 'mn) 0) -- 20 + +;; ─── many small groups ────────────────────────────────────────────── +(set Tm (table [k x y] (list (as 'I64 [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]) (as 'I64 [10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]) (as 'I64 [50 51 52 53 54 55 56 57 58 59 40 41 42 43 44 45 46 47 48 49])))) +;; group j has x={j+10, j+20} -> max = j+20; y={j+50, j+40} -> min = j+40. +;; sum of maxes: (0+20)+(1+20)+...+(9+20) = 45+200 = 245. +;; sum of mins: (0+40)+(1+40)+...+(9+40) = 45+400 = 445. +(count (select {mx: (max x) mn: (min y) by: k from: Tm})) -- 10 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tm}) 'mx)) -- 245 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tm}) 'mn)) -- 445 + +;; ─── parallel path: nrows >= 16384 ────────────────────────────────── +;; 20000 rows. x = i, y = 2N-i. 10 groups (mod 10). +(set N 20000) +(set Tbig (table [k x y] (list (% (til N) 10) (til N) (- (* 2 N) (til N))))) +;; Group j: x = {j, j+10, ..., j+19990}; max x = j+19990. +;; y = {2N-j, 2N-j-10, ..., 2N-j-19990}; min y = 2N-j-19990 = 20010-j. +;; sum of max x: 10*19990 + 45 = 199945 +;; sum of min y: 10*20010 - 45 = 200055 +(count (select {mx: (max x) mn: (min y) by: k from: Tbig})) -- 10 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tbig}) 'mx)) -- 199945 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tbig}) 'mn)) -- 200055 + +;; ─── parallel with SYM key (high-cardinality H2O id3 shape) ───────── +(set Tsbig (table [k x y] (list (as 'SYMBOL (% (til N) 100)) (til N) (- (* 2 N) (til N))))) +;; 100 groups. Group of key idj contains rows i where i % 100 = j. +;; x values: {j, j+100, ..., j+19900}; max x = j+19900. +;; y = 2N - i where N=20000: y values: {40000-j, ..., 40000-j-19900}; min y = 20100-j. +;; sum of max x: 100*19900 + (0+..+99) = 1990000 + 4950 = 1994950. +;; sum of min y: 100*20100 - (0+..+99) = 2010000 - 4950 = 2005050. +(count (select {mx: (max x) mn: (min y) by: k from: Tsbig})) -- 100 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tsbig}) 'mx)) -- 1994950 +(sum (at (select {mx: (max x) mn: (min y) by: k from: Tsbig}) 'mn)) -- 2005050 diff --git a/test/rfl/agg/rowform_sum_count.rfl b/test/rfl/agg/rowform_sum_count.rfl new file mode 100644 index 00000000..48484a97 --- /dev/null +++ b/test/rfl/agg/rowform_sum_count.rfl @@ -0,0 +1,132 @@ +;; ════════════════════════════════════════════════════════════════════ +;; ROWFORM multi-key per-group sum(v) + count(v) +;; (src/ops/group.c: exec_group_sum_count_rowform) +;; +;; Planner gate (src/ops/query.c:6082) routes +;; (select {tot: (sum v) cnt: (count v) by: [k1 k2 ... kN] from: T}) +;; to OP_GROUP_SUM_COUNT_ROWFORM when: +;; - N keys with 3 <= N <= 8 (all simple OP_SCAN) +;; - exactly 2 aggs: (sum v) then (count v), same value column +;; - no where, no non-agg expressions +;; - all keys non-nullable, types in +;; {I64,I32,I16,U8,BOOL,DATE,TIME,TIMESTAMP,SYM} +;; - v non-nullable, type in {I64,I32,I16,U8,BOOL,F64} +;; +;; ROWFORM emits one row per distinct key tuple with columns +;; [k1..kN, sum, count]. Sum is always F64 (executor casts integer +;; v -> double); count is I64. Group order is partition-induced; tests +;; verify via aggregate sums / counts, not positional checks. +;; +;; Aliases avoid colliding with key names (the result schema is +;; [keys..., tot, cnt] and a name collision lets `at` pick the wrong +;; column). We use `tot` and `cnt` throughout. +;; +;; Parallel threshold: nrows >= 16384 (src/ops/group.c:11656). +;; Closes canonical H2O q10. +;; ════════════════════════════════════════════════════════════════════ + +;; ─── basic 3-key shape: I64 keys, I64 v ───────────────────────────── +(set T (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [10 20 30 40])))) +;; All 4 rows have unique (k1,k2,k3) tuples, so 4 groups each of size 1. +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T})) -- 4 +;; Sum across all groups = sum of v = 100; count total = 4. +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'tot)) -- 100.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'cnt)) -- 4 +;; Sum column is F64 (executor always emits F64 sum); count is I64. +(type (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'tot)) -- 'F64 +(type (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: T}) 'cnt)) -- 'I64 + +;; ─── 3 keys with collapses: distinct group count < n_rows ─────────── +(set Tg (table [k1 k2 k3 v] (list (as 'I64 [0 0 0 1 1]) (as 'I64 [0 0 1 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [10 20 30 40 50])))) +;; Distinct (k1,k2,k3): (0,0,0)->v={10,20}=30, (0,1,0)->v=30, (1,0,0)->v={40,50}=90. +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg})) -- 3 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 150.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 5 +;; Verify max per-group sum is 90 (from (1,0,0)) and min 30 (from one of the singletons). +(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 90.0 +(min (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'tot)) -- 30.0 +(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 2 +(min (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tg}) 'cnt)) -- 1 + +;; ─── 4 keys ───────────────────────────────────────────────────────── +(set T4 (table [k1 k2 k3 k4 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [1 2 3 4])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4}) 'tot)) -- 10.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4] from: T4}) 'cnt)) -- 4 + +;; ─── 5 keys ───────────────────────────────────────────────────────── +(set T5 (table [k1 k2 k3 k4 k5 v] (list (as 'I64 [0 0 1 1 0]) (as 'I64 [0 1 0 1 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [0 0 0 0 0]) (as 'I64 [10 20 30 40 100])))) +;; (0,0,0,0,0) seen twice (rows 0 and 4) -> sum 110, count 2. Three other singletons. +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'tot)) -- 200.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'cnt)) -- 5 +(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5] from: T5}) 'cnt)) -- 2 + +;; ─── 6 keys ───────────────────────────────────────────────────────── +(set T6 (table [k1 k2 k3 k4 k5 k6 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [11 22 33 44])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6}) 'tot)) -- 110.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: T6}) 'cnt)) -- 4 + +;; ─── 7 keys ───────────────────────────────────────────────────────── +(set T7 (table [k1 k2 k3 k4 k5 k6 k7 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [5 6 7 8])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7}) 'tot)) -- 26.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7] from: T7}) 'cnt)) -- 4 + +;; ─── 8 keys (gate upper bound) ────────────────────────────────────── +(set T8 (table [k1 k2 k3 k4 k5 k6 k7 k8 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [0 0 0 0]) (as 'I64 [10 20 30 40])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8}) 'tot)) -- 100.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6 k7 k8] from: T8}) 'cnt)) -- 4 + +;; ─── F64 v column ─────────────────────────────────────────────────── +(set Tf (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'F64 [1.5 2.5 3.5 4.5])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf}) 'tot)) -- 12.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tf}) 'cnt)) -- 4 + +;; ─── narrow integer v (I32 / I16 / U8) ────────────────────────────── +(set Ti32 (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'I32 [10 20 30 40])))) +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ti32}) 'tot)) -- 100.0 +(set Tu8 (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) (as 'I64 [0 1 0 1]) (as 'I64 [0 0 0 0]) (as 'U8 [1 2 3 4])))) +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tu8}) 'tot)) -- 10.0 + +;; ─── SYM keys (canonical H2O q10 shape) ───────────────────────────── +(set Ts (table [k1 k2 k3 v] (list [A A B B] [X Y X Y] [P P P P] (as 'I64 [10 20 30 40])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts}) 'tot)) -- 100.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Ts}) 'cnt)) -- 4 + +;; SYM keys with collapses +(set Tsc (table [k1 k2 k3 v] (list [A A A B] [X X Y Y] [P P P P] (as 'I64 [10 20 30 40])))) +;; Distinct: (A,X,P) -> {10,20}=30, (A,Y,P) -> 30, (B,Y,P) -> 40. +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc})) -- 3 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc}) 'tot)) -- 100.0 +(max (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsc}) 'cnt)) -- 2 + +;; ─── mixed key types: I64 + SYM + I32 ─────────────────────────────── +(set Tmix (table [k1 k2 k3 v] (list (as 'I64 [0 0 1 1]) [A B A B] (as 'I32 [0 0 0 0]) (as 'I64 [10 20 30 40])))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tmix})) -- 4 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tmix}) 'tot)) -- 100.0 + +;; ─── parallel path: nrows >= 16384 ────────────────────────────────── +;; 20000 rows; 3 keys whose product cardinalities are 5 * 4 * 3 = 60 groups. +(set N 20000) +(set Tbig (table [k1 k2 k3 v] (list (% (til N) 5) (% (til N) 4) (% (til N) 3) (til N)))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig})) -- 60 +;; Total sum of v across all groups = N*(N-1)/2 = 199990000. +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig}) 'tot)) -- 199990000.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tbig}) 'cnt)) -- 20000 + +;; ─── parallel + SYM key ───────────────────────────────────────────── +(set Tsbig (table [k1 k2 k3 v] (list (as 'SYMBOL (% (til N) 5)) (as 'SYMBOL (% (til N) 4)) (% (til N) 3) (til N)))) +(count (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig})) -- 60 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig}) 'tot)) -- 199990000.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3] from: Tsbig}) 'cnt)) -- 20000 + +;; ─── parallel + 6 keys + F64 v ────────────────────────────────────── +(set Tf6 (table [k1 k2 k3 k4 k5 k6 v] (list (% (til N) 5) (% (til N) 4) (% (til N) 3) (% (til N) 2) (% (til N) 2) (% (til N) 2) (as 'F64 (til N))))) +;; Group count depends on coprime products, but total sum/count are stable. +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: Tf6}) 'tot)) -- 199990000.0 +(sum (at (select {tot: (sum v) cnt: (count v) by: [k1 k2 k3 k4 k5 k6] from: Tf6}) 'cnt)) -- 20000 diff --git a/test/rfl/agg/rowform_topk.rfl b/test/rfl/agg/rowform_topk.rfl new file mode 100644 index 00000000..288939ee --- /dev/null +++ b/test/rfl/agg/rowform_topk.rfl @@ -0,0 +1,125 @@ +;; ════════════════════════════════════════════════════════════════════ +;; ROWFORM per-group top-K / bot-K (src/ops/group.c: exec_group_topk_rowform) +;; +;; Planner gate (src/ops/query.c:5885) routes +;; (select {alias: (top|bot col K) by: from: T}) +;; to OP_GROUP_TOPK_ROWFORM / OP_GROUP_BOTK_ROWFORM when: +;; - single key, single agg, no where, no non-agg +;; - K in [1, 255] +;; - key & val columns are simple OP_SCAN, types in +;; {I64, I32, I16, U8, BOOL, DATE, TIME, TIMESTAMP, F64} +;; (SYM keys fall through to the LIST-cell OP_TOP_N path) +;; +;; ROWFORM emits one row per kept value: result has columns +;; [key_col, val_col] with K * n_groups rows (or fewer if a group has +;; < K elements). Within each group, top-K is descending; bot-K is +;; ascending. Order across groups is partition-induced (8-bit radix) +;; so tests use sum / membership rather than positional assertions. +;; +;; Parallel threshold: nrows >= 16384 (src/ops/group.c:9436). +;; ════════════════════════════════════════════════════════════════════ + +;; ─── basic shape: I64 key, I64 val, K=2, 2 groups ─────────────────── +(set T (table [k v] (list (as 'I64 [0 0 0 1 1 1]) (as 'I64 [10 20 30 40 50 60])))) +;; Top-2 per group: g=0 -> {30,20}, g=1 -> {60,50}; total 4 rows. +(count (select {t: (top v 2) by: k from: T})) -- 4 +;; Sum of top-2 values: 30+20+60+50 = 160. +(sum (at (select {t: (top v 2) by: k from: T}) 't)) -- 160 +;; ROWFORM keeps the source vector type (not LIST). +(type (at (select {t: (top v 2) by: k from: T}) 't)) -- 'I64 +;; Bot-2 per group: g=0 -> {10,20}, g=1 -> {40,50}; total 4 rows, sum 120. +(count (select {b: (bot v 2) by: k from: T})) -- 4 +(sum (at (select {b: (bot v 2) by: k from: T}) 'b)) -- 120 + +;; ─── K=1: degenerate to per-group max / min ───────────────────────── +;; K=1 top = max per group; total rows = n_groups. +(count (select {t: (top v 1) by: k from: T})) -- 2 +(sum (at (select {t: (top v 1) by: k from: T}) 't)) -- 90 +(sum (at (select {b: (bot v 1) by: k from: T}) 'b)) -- 50 + +;; ─── K equals group size: full group emitted ──────────────────────── +(count (select {t: (top v 3) by: k from: T})) -- 6 +;; Sum equals total of all source v's (10..60) = 210. +(sum (at (select {t: (top v 3) by: k from: T}) 't)) -- 210 + +;; ─── K > group size: capped at group size (no padding) ────────────── +(count (select {t: (top v 5) by: k from: T})) -- 6 +(sum (at (select {t: (top v 5) by: k from: T}) 't)) -- 210 + +;; ─── narrow integer key types (I32 / I16 / U8 / BOOL) ────────────── +(set Ti32 (table [k v] (list (as 'I32 [0 0 1 1]) (as 'I64 [5 7 11 13])))) +(sum (at (select {t: (top v 1) by: k from: Ti32}) 't)) -- 20 +(type (at (select {t: (top v 1) by: k from: Ti32}) 'k)) -- 'I32 +(set Ti16 (table [k v] (list (as 'I16 [0 0 1 1]) (as 'I64 [100 200 300 400])))) +(sum (at (select {t: (top v 1) by: k from: Ti16}) 't)) -- 600 +(set Tu8 (table [k v] (list (as 'U8 [0 0 1 1]) (as 'I64 [1 2 3 4])))) +(sum (at (select {b: (bot v 1) by: k from: Tu8}) 'b)) -- 4 +(set Tbool (table [k v] (list [false false true true] (as 'I64 [9 8 7 6])))) +(sum (at (select {t: (top v 1) by: k from: Tbool}) 't)) -- 16 + +;; ─── F64 key and F64 value ────────────────────────────────────────── +(set Tf (table [k v] (list (as 'F64 [0.0 0.0 0.0 1.0 1.0 1.0]) (as 'F64 [1.5 2.5 0.5 4.5 3.5 5.5])))) +(count (select {t: (top v 2) by: k from: Tf})) -- 4 +;; Top-2 per group: g=0 -> {2.5,1.5}=4.0; g=1 -> {5.5,4.5}=10.0; sum 14.0. +(sum (at (select {t: (top v 2) by: k from: Tf}) 't)) -- 14.0 +(type (at (select {t: (top v 2) by: k from: Tf}) 't)) -- 'F64 +;; Bot-1 per group: g=0 -> 0.5; g=1 -> 3.5; sum 4.0. +(sum (at (select {b: (bot v 1) by: k from: Tf}) 'b)) -- 4.0 + +;; ─── F64 key with I64 value (mixed) ───────────────────────────────── +(set Tfi (table [k v] (list (as 'F64 [0.0 0.0 1.0 1.0]) (as 'I64 [10 20 30 40])))) +(sum (at (select {t: (top v 1) by: k from: Tfi}) 't)) -- 60 +(type (at (select {t: (top v 1) by: k from: Tfi}) 'k)) -- 'F64 +(type (at (select {t: (top v 1) by: k from: Tfi}) 't)) -- 'I64 + +;; ─── single group: all rows share one key ─────────────────────────── +(set T1 (table [k v] (list (as 'I64 [0 0 0 0 0 0]) (as 'I64 [3 1 5 2 7 4])))) +(count (select {t: (top v 3) by: k from: T1})) -- 3 +;; Top-3 of {3,1,5,2,7,4} = {7,5,4}; sum 16. +(sum (at (select {t: (top v 3) by: k from: T1}) 't)) -- 16 +(sum (at (select {b: (bot v 3) by: k from: T1}) 'b)) -- 6 + +;; ─── ties: top-3 of [5 5 5 1] = [5 5 5] regardless of which 5 wins ── +(set Tt (table [k v] (list (as 'I64 [0 0 0 0]) (as 'I64 [5 5 5 1])))) +(sum (at (select {t: (top v 3) by: k from: Tt}) 't)) -- 15 + +;; ─── many groups, small per group; total kept = K * n_groups ──────── +(set Tm (table [k v] (list (as 'I64 [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]) (as 'I64 [10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29])))) +;; 10 groups × K=1 = 10 rows; max per group is (k+20). +(count (select {t: (top v 1) by: k from: Tm})) -- 10 +;; Sum of maxes: 20+21+...+29 = 245. +(sum (at (select {t: (top v 1) by: k from: Tm}) 't)) -- 245 +;; Bot per group: k+10; sum 10+11+...+19 = 145. +(sum (at (select {b: (bot v 1) by: k from: Tm}) 'b)) -- 145 +;; K=2 -> 20 rows; each group keeps {k+10, k+20}; total sum = 245+145 = 390. +(count (select {t: (top v 2) by: k from: Tm})) -- 20 +(sum (at (select {t: (top v 2) by: k from: Tm}) 't)) -- 390 + +;; ─── parallel path: nrows >= 16384 (radix dispatch) ───────────────── +;; 20000 rows, 10 groups (mod 10). Each group has 2000 elements equal +;; to {j, j+10, j+20, ..., j+19990}. Top-1 = j+19990 -> sum 10*19990+45. +(set N 20000) +(set Tbig (table [k v] (list (% (til N) 10) (til N)))) +(count (select {t: (top v 1) by: k from: Tbig})) -- 10 +(sum (at (select {t: (top v 1) by: k from: Tbig}) 't)) -- 199945 +;; Bot-1 sum = sum of j over j=0..9 = 45. +(sum (at (select {b: (bot v 1) by: k from: Tbig}) 'b)) -- 45 +;; Top-2: each group keeps {j+19990, j+19980}; sum = 10*(19990+19980) + 2*45 = 399790. +(count (select {t: (top v 2) by: k from: Tbig})) -- 20 +(sum (at (select {t: (top v 2) by: k from: Tbig}) 't)) -- 399790 +;; Bot-2: {j, j+10}; sum = 10*10 + 2*45 = 190. +(sum (at (select {b: (bot v 2) by: k from: Tbig}) 'b)) -- 190 + +;; Parallel path, 1000 distinct keys (high cardinality stress). +(set Mbig (table [k v] (list (% (til N) 1000) (til N)))) +;; 1000 groups × K=1 = 1000 rows. +(count (select {t: (top v 1) by: k from: Mbig})) -- 1000 +;; Each group j has 20 values: j, j+1000, ..., j+19000. Top-1 = j+19000. +;; Sum = 1000*19000 + (0+1+..+999) = 19000000 + 499500 = 19499500. +(sum (at (select {t: (top v 1) by: k from: Mbig}) 't)) -- 19499500 +(sum (at (select {b: (bot v 1) by: k from: Mbig}) 'b)) -- 499500 + +;; ─── F64 value parallel ───────────────────────────────────────────── +(set Mf (table [k v] (list (% (til N) 10) (as 'F64 (til N))))) +(sum (at (select {t: (top v 1) by: k from: Mf}) 't)) -- 199945.0 +(sum (at (select {b: (bot v 1) by: k from: Mf}) 'b)) -- 45.0 diff --git a/test/rfl/agg/variance.rfl b/test/rfl/agg/variance.rfl new file mode 100644 index 00000000..47d37199 --- /dev/null +++ b/test/rfl/agg/variance.rfl @@ -0,0 +1,114 @@ +;; Happy-path invariants for the variance / stddev / dev wrappers in +;; src/ops/agg.c. All five wrappers route through the shared +;; var_stddev_core(sample, take_sqrt): +;; +;; var -> sample=1, take_sqrt=0 +;; var_pop -> sample=0, take_sqrt=0 +;; stddev -> sample=1, take_sqrt=1 +;; stddev_pop -> sample=0, take_sqrt=1 +;; dev -> sample=0, take_sqrt=1 (alias of stddev_pop) +;; +;; Canonical Wikipedia fixture: [2 4 4 4 5 5 7 9] +;; mean = 5 +;; sum sq diff = 32 +;; pop_var = 32/8 = 4.0 +;; pop_stddev = 2.0 +;; sample_var = 32/7 ≈ 4.5714285714… +;; sample_stddev = √(32/7) ≈ 2.1380899352… + +;; ─── canonical fixture: exact integer answers ───────────────────── +(var_pop [2 4 4 4 5 5 7 9]) -- 4.0 +(stddev_pop [2 4 4 4 5 5 7 9]) -- 2.0 +(dev [2 4 4 4 5 5 7 9]) -- 2.0 + +;; sample_var = 32/7 — compare with tolerance. +(< (abs (- (var [2 4 4 4 5 5 7 9]) 4.571428571428571)) 0.000001) -- true +;; sample_stddev = sqrt(32/7) — compare with tolerance. +(< (abs (- (stddev [2 4 4 4 5 5 7 9]) 2.138089935299395)) 0.000001) -- true + +;; ─── F64 input gives the same answers as I64 input ──────────────── +(var_pop [2.0 4.0 4.0 4.0 5.0 5.0 7.0 9.0]) -- 4.0 +(stddev_pop [2.0 4.0 4.0 4.0 5.0 5.0 7.0 9.0]) -- 2.0 +(dev [2.0 4.0 4.0 4.0 5.0 5.0 7.0 9.0]) -- 2.0 +(< (abs (- (var [2.0 4.0 4.0 4.0 5.0 5.0 7.0 9.0]) 4.571428571428571)) 0.000001) -- true +(< (abs (- (stddev [2.0 4.0 4.0 4.0 5.0 5.0 7.0 9.0]) 2.138089935299395)) 0.000001) -- true + +;; ─── return type is F64 for every wrapper ───────────────────────── +(type (var [1 2 3])) -- 'f64 +(type (var_pop [1 2 3])) -- 'f64 +(type (stddev [1 2 3])) -- 'f64 +(type (stddev_pop [1 2 3])) -- 'f64 +(type (dev [1 2 3])) -- 'f64 + +;; ─── constant column → zero variance / dispersion ───────────────── +(var [3 3 3 3]) -- 0.0 +(var_pop [3 3 3 3]) -- 0.0 +(stddev [3 3 3 3]) -- 0.0 +(stddev_pop [3 3 3 3]) -- 0.0 +(dev [3 3 3 3]) -- 0.0 + +;; constant F64 column — same story. +(var_pop [7.5 7.5 7.5 7.5 7.5]) -- 0.0 +(stddev_pop [7.5 7.5 7.5 7.5 7.5]) -- 0.0 + +;; ─── two-element diff [1 5]: mean=3, Σ(x-mean)² = 4+4 = 8 ───────── +;; pop_var = 8/2 = 4, sample_var = 8/1 = 8 +;; pop_stddev = 2, sample_stddev = √8 ≈ 2.828427… +(var_pop [1 5]) -- 4.0 +(var [1 5]) -- 8.0 +(stddev_pop [1 5]) -- 2.0 +(dev [1 5]) -- 2.0 +(< (abs (- (stddev [1 5]) 2.828427124746190)) 0.000001) -- true + +;; ─── narrow-int coercion to F64 (I32 / I16 / U8 paths) ──────────── +(var_pop (as 'I32 [2 4 4 4 5 5 7 9])) -- 4.0 +(stddev_pop (as 'I32 [2 4 4 4 5 5 7 9])) -- 2.0 +(dev (as 'I32 [2 4 4 4 5 5 7 9])) -- 2.0 +(type (var_pop (as 'I32 [2 4 4 4 5 5 7 9]))) -- 'f64 + +(var_pop (as 'I16 [2 4 4 4 5 5 7 9])) -- 4.0 +(stddev_pop (as 'I16 [2 4 4 4 5 5 7 9])) -- 2.0 +(type (stddev_pop (as 'I16 [2 4 4 4 5 5 7 9]))) -- 'f64 + +(var_pop (as 'U8 [2 4 4 4 5 5 7 9])) -- 4.0 +(dev (as 'U8 [2 4 4 4 5 5 7 9])) -- 2.0 +(type (dev (as 'U8 [2 4 4 4 5 5 7 9]))) -- 'f64 + +;; sample stats over narrow-int input — tolerance-checked, type F64. +(< (abs (- (var (as 'I32 [2 4 4 4 5 5 7 9])) 4.571428571428571)) 0.000001) -- true +(< (abs (- (stddev (as 'I16 [2 4 4 4 5 5 7 9])) 2.138089935299395)) 0.000001) -- true + +;; ─── length-1 input: pop_* = 0, sample_* = typed F64 null (0Nf) ── +;; var_stddev_core(sample=0) → make_f64(0.0) +;; var_stddev_core(sample=1, n<=1) → ray_typed_null(-RAY_F64) == 0Nf +(var_pop [42]) -- 0.0 +(stddev_pop [42]) -- 0.0 +(dev [42]) -- 0.0 +(var [42]) -- 0Nf +(stddev [42]) -- 0Nf +(var [3.14]) -- 0Nf +(stddev [3.14]) -- 0Nf + +;; F64 length-1 pop side. +(var_pop [3.14]) -- 0.0 +(stddev_pop [3.14]) -- 0.0 + +;; ─── algebraic invariant: stddev² == var (within fp tolerance) ──── +(set V [2 4 4 4 5 5 7 9]) +(< (abs (- (* (stddev V) (stddev V)) (var V))) 0.000001) -- true +(< (abs (- (* (stddev_pop V) (stddev_pop V)) (var_pop V))) 0.000001) -- true +(< (abs (- (* (dev V) (dev V)) (var_pop V))) 0.000001) -- true + +;; dev is an alias for stddev_pop — must be bit-equal. +(== (dev V) (stddev_pop V)) -- true + +;; population variance is bounded above by sample variance for n>=2. +(<= (var_pop V) (var V)) -- true + +;; non-negativity over a random vector — drives the general path. +(set R (rand 200 1000)) +(>= (var R) 0.0) -- true +(>= (var_pop R) 0.0) -- true +(>= (stddev R) 0.0) -- true +(>= (stddev_pop R) 0.0) -- true +(>= (dev R) 0.0) -- true diff --git a/test/rfl/hof/wrappers.rfl b/test/rfl/hof/wrappers.rfl new file mode 100644 index 00000000..f199ef9d --- /dev/null +++ b/test/rfl/hof/wrappers.rfl @@ -0,0 +1,129 @@ +;; Happy-path coverage for the HOF wrapper dispatchers in +;; src/ops/collection.c: +;; ray_pmap_fn → forwards to ray_map_fn +;; ray_fold_left_fn → forwards to ray_fold_fn +;; ray_scan_left_fn → forwards to ray_scan_fn +;; +;; Existing test/rfl/hof/{pmap,fold,scan,right,map,filter,apply,lambda}.rfl +;; exercise the inner ops directly. This file specifically drives each +;; *public-API wrapper* across I64 / F64 / mixed-type inputs and verifies +;; result values, lengths, and the wrapper-equals-inner-op invariant. + +;; ───────────────────────────────────────────────────────────────── +;; pmap — parallel map wrapper (ray_pmap_fn) +;; ───────────────────────────────────────────────────────────────── + +;; I64 happy path — small list, lambda +(pmap (fn [x] (* x 2)) [1 2 3 4 5]) -- [2 4 6 8 10] +(pmap (fn [x] (+ x 1)) [10 20 30]) -- [11 21 31] + +;; I64 happy path — builtin verbs as fn +(pmap neg [1 2 3 4]) -- [-1 -2 -3 -4] +(pmap abs [-3 -2 -1 0 1 2 3]) -- [3 2 1 0 1 2 3] + +;; F64 happy path +(pmap (fn [x] (* x 2.0)) [1.5 2.5 3.5]) -- [3.0 5.0 7.0] +(pmap (fn [x] (+ x 0.5)) [1.0 2.0 3.0]) -- [1.5 2.5 3.5] + +;; Mixed-type coercion — I64 list with F64 lambda promotes result +(pmap (fn [x] (* x 1.5)) [2 4 6]) -- [3.0 6.0 9.0] + +;; Large list — force the parallel-dispatch path (if thresholded) +(set BIG (til 1000)) +(count (pmap (fn [x] (+ x 1)) BIG)) -- 1000 +(sum (pmap (fn [x] (+ x 1)) BIG)) -- (+ 1000 (sum BIG)) + +;; Wrapper equivalence: pmap == map on the same input +(set L (rand 200 500)) +(pmap (fn [x] (* x 2)) L) -- (map (fn [x] (* x 2)) L) +(pmap neg L) -- (map neg L) + +;; Result type/count invariants +(count (pmap (fn [x] x) [1 2 3 4 5])) -- 5 +(count (pmap (fn [x] (* x x)) (til 50))) -- 50 + +;; ───────────────────────────────────────────────────────────────── +;; fold-left — left-associative fold wrapper (ray_fold_left_fn) +;; ───────────────────────────────────────────────────────────────── + +;; I64 happy path — + with seed 0 (sum) +(fold-left + 0 [1 2 3 4 5]) -- 15 +(fold-left + 0 [10 20 30 40]) -- 100 + +;; I64 happy path — * with seed 1 (product) +(fold-left * 1 [1 2 3 4]) -- 24 +(fold-left * 1 [2 3 5]) -- 30 + +;; F64 happy path — sum and product +(fold-left + 0.0 [1.5 2.5 3.0]) -- 7.0 +(fold-left * 1.0 [2.0 0.5 4.0]) -- 4.0 + +;; Mixed-type coercion — I64 seed + F64 list +(fold-left + 0 [1.5 2.5 3.0]) -- 7.0 + +;; Lambda fn — explicit binary lambda +(fold-left (fn [a b] (+ a b)) 0 [1 2 3 4]) -- 10 +(fold-left (fn [a b] (* a b)) 1 [2 3 4]) -- 24 + +;; Non-associative op (subtraction) — left-fold is well-defined: +;; (((0-1)-2)-3)-4 = -10 +(fold-left - 0 [1 2 3 4]) -- -10 + +;; Empty list + seed returns seed unchanged +(fold-left + 42 []) -- 42 +(fold-left + 0.0 []) -- 0.0 + +;; 2-arg form (no seed) uses first element as initial accumulator +(fold-left + [1 2 3 4 5]) -- 15 +(fold-left * [1 2 3 4]) -- 24 +(fold-left + [10]) -- 10 + +;; Wrapper equivalence: fold-left == fold on same inputs +(set V (rand 50 100)) +(fold-left + 0 V) -- (fold + 0 V) +(fold-left + V) -- (fold + V) +(fold-left * 1 [2 3 4 5]) -- (fold * 1 [2 3 4 5]) + +;; fold-left + 0 V equals (sum V) +(fold-left + 0 V) -- (sum V) + +;; ───────────────────────────────────────────────────────────────── +;; scan-left — left-prefix scan wrapper (ray_scan_left_fn) +;; ───────────────────────────────────────────────────────────────── + +;; I64 happy path — prefix sums +(scan-left + [1 2 3 4]) -- [1 3 6 10] +(scan-left + [1 2 3 4 5]) -- [1 3 6 10 15] + +;; I64 happy path — prefix products +(scan-left * [1 2 3 4]) -- [1 2 6 24] +(scan-left * [2 3 4]) -- [2 6 24] + +;; F64 happy path +(scan-left + [1.0 2.0 3.0]) -- [1.0 3.0 6.0] +(scan-left * [1.0 2.0 3.0 4.0]) -- [1.0 2.0 6.0 24.0] + +;; Singleton — scan of one element is itself +(scan-left + [42]) -- [42] +(scan-left * [7]) -- [7] + +;; Lambda fn +(scan-left (fn [a b] (+ a b)) [1 2 3 4]) -- [1 3 6 10] + +;; Length preserved (count invariant) +(set W (rand 50 100)) +(count W) -- (count (scan-left + W)) +(count (scan-left * [1 2 3 4 5])) -- 5 + +;; last(scan-left + v) equals (fold + 0 v) when v non-empty +(last (scan-left + [1 2 3 4 5])) -- 15 +(last (scan-left + W)) -- (fold + 0 W) + +;; first(scan-left f v) equals v[0] +(first (scan-left + [7 1 2 3])) -- 7 +(first (scan-left * [9 2 3])) -- 9 + +;; Wrapper equivalence: scan-left == scan on same inputs +(scan-left + [1 2 3 4 5]) -- (scan + [1 2 3 4 5]) +(scan-left * [1 2 3 4]) -- (scan * [1 2 3 4]) +(scan-left + W) -- (scan + W) diff --git a/test/rfl/io/csv_splayed.rfl b/test/rfl/io/csv_splayed.rfl new file mode 100644 index 00000000..f91b220d --- /dev/null +++ b/test/rfl/io/csv_splayed.rfl @@ -0,0 +1,181 @@ +;; Coverage for src/io/csv.c splayed-writer paths, src/store/col.c str-pool +;; copy on load, and the GUID column writer. +;; +;; Reachability map (per llvm-cov, 0% on master): +;; +;; csv_splayed_writer_open — opens per-column tmp file +;; csv_splayed_writer_append — per-chunk col append +;; csv_splayed_writer_close — finalizes header + nullmap, atomic rename +;; csv_splayed_writer_null_bit — per-row null-bit accumulator +;; csv_splayed_writer_zero_nulls — backfill / pad helpers (count=0 fast path +;; reached here; the inner loop requires +;; multi-chunk input which needs >1M rows — +;; see "Reachability notes" at end) +;; +;; All paths above are exercised through the `.csv.splayed` builtin +;; (src/ops/builtins.c::ray_read_csv_splayed_fn), which writes a CSV into +;; a directory as one file per column via ray_csv_save_splayed_named_opts. +;; +;; col_copy_str_pool / col_load_str_vec are reached via the standard +;; ray_col_load path when the saved column carries a STR str_pool. +;; +;; Every assertion is deterministic — no rand on the LHS, no timestamps. +;; Splayed dirs and CSV fixtures use the `rf_test_splayed_*` prefix so the +;; Makefile-side `rm -f rf_test_*.csv` keeps fixtures bounded; the splayed +;; *directories* themselves are cleaned by the test header below. + +;; ────────────── scrub stale state from a prior run ────────────── +;; `.csv.splayed` errors out if the destination dir already has stale +;; column files from a previous interrupted run. A leftover sym file +;; with stale IDs would also flip the load-back path to "corrupt". +(.sys.exec "rm -rf rf_test_splayed_basic rf_test_splayed_nulls rf_test_splayed_str rf_test_splayed_sym rf_test_splayed_guid rf_test_splayed_*.csv") -- 0 + +;; ════════════════════════════════════════════════════════════════ +;; 1. Basic mixed-column round trip — no nulls. +;; +;; Hits csv_splayed_writer_open + _append + _close for each column. +;; Three columns ⇒ three open/close pairs, exercising the SYM_W32 attr +;; branch in writer_open (the `s` column) and the non-SYM branch (`id` +;; and `px`). +;; ════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'id,s,px\\n1,aa,1.5\\n2,bb,2.5\\n3,aa,3.5\\n4,cc,4.5\\n5,aa,5.5\\n' > rf_test_splayed_basic.csv") -- 0 +(set Tbasic (.csv.splayed "rf_test_splayed_basic.csv" "rf_test_splayed_basic/")) + +(count Tbasic) -- 5 +(sum (at Tbasic 'id)) -- 15 +(sum (at Tbasic 'px)) -- 17.5 +;; SYM column survives — duplicates intern to the same id. +(count (at Tbasic 's)) -- 5 +;; Header was parsed → column names match. +(key Tbasic) -- ['id 's 'px] + +;; Round-trip load via .db.splayed.get exercises ray_read_splayed which +;; walks every column file and validates the writer's on-disk format. +(set Rbasic (.db.splayed.get "rf_test_splayed_basic/")) +(count Rbasic) -- 5 +(sum (at Rbasic 'id)) -- 15 +(sum (at Rbasic 'px)) -- 17.5 +(key Rbasic) -- ['id 's 'px] +;; Values match position-by-position — proves writer + reader agree on +;; row order and width-narrowed SYM widths. +(at Rbasic 'id) -- (at Tbasic 'id) +(at Rbasic 'px) -- (at Tbasic 'px) +(at Rbasic 's) -- (at Tbasic 's) + +;; ════════════════════════════════════════════════════════════════ +;; 2. Null rows — empty CSV fields produce typed nulls. +;; +;; Materialized columns carry RAY_ATTR_HAS_NULLS, which routes through +;; csv_splayed_writer_null_bit for every row (14 regions) and the +;; count=0 early-return in csv_splayed_writer_zero_nulls (1 region). +;; The bit accumulator flushes when null_bits reaches 8, so 9+ rows +;; force at least one full-byte fwrite of null_acc and exercise the +;; flush-then-reset path inside null_bit. +;; ════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'a,b\\n1,1.5\\n,2.5\\n3,\\n,\\n5,5.5\\n6,6.5\\n,7.5\\n8,\\n9,9.5\\n10,10.5\\n,11.5\\n12,12.5\\n' > rf_test_splayed_nulls.csv") -- 0 +(set Tnull (.csv.splayed "rf_test_splayed_nulls.csv" "rf_test_splayed_nulls/")) + +(count Tnull) -- 12 +(key Tnull) -- ['a 'b] +;; Sum skips nulls — only the non-null `a` cells contribute. +(sum (at Tnull 'a)) -- 54 +;; F64 sum of non-null `b` cells: 1.5+2.5+5.5+6.5+7.5+9.5+10.5+11.5+12.5 = 67.5 +(sum (at Tnull 'b)) -- 67.5 + +;; Round-trip load: the saved column file carries an external nullmap +;; bitmap (RAY_ATTR_NULLMAP_EXT in writer_close), and ray_col_load +;; restores it. Re-loaded sums match the original. +(set Rnull (.db.splayed.get "rf_test_splayed_nulls/")) +(count Rnull) -- 12 +(sum (at Rnull 'a)) -- 54 +(sum (at Rnull 'b)) -- 67.5 +(at Rnull 'a) -- (at Tnull 'a) +(at Rnull 'b) -- (at Tnull 'b) + +;; ════════════════════════════════════════════════════════════════ +;; 3. STR column via explicit schema — exercises ray_splay_save_bulk +;; fallback inside ray_csv_save_splayed_named_opts (col.c writes the +;; STR + adjacent str_pool inline), and col_copy_str_pool on load. +;; +;; The schema-overridden form takes (.csv.splayed [names] [types] csv dir), +;; same dispatcher as ray_read_csv_splayed_fn. +;; ════════════════════════════════════════════════════════════════ +(.sys.exec "printf '1,alpha\\n2,beta\\n3,gamma\\n4,delta\\n5,epsilon\\n' > rf_test_splayed_str.csv") -- 0 +(set Tstr (.csv.splayed [id name] [I64 STR] "rf_test_splayed_str.csv" "rf_test_splayed_str/")) +(count Tstr) -- 5 +(sum (at Tstr 'id)) -- 15 +;; STR column round-trips via the str_pool copy in col_copy_str_pool. +(at (at Tstr 'name) 0) -- "alpha" +(at (at Tstr 'name) 4) -- "epsilon" + +;; Re-load from disk also hits col_copy_str_pool — fresh STR + pool. +(set Rstr (.db.splayed.get "rf_test_splayed_str/")) +(count Rstr) -- 5 +(at (at Rstr 'name) 0) -- "alpha" +(at (at Rstr 'name) 2) -- "gamma" +(at (at Rstr 'name) 4) -- "epsilon" +(at Rstr 'id) -- (at Tstr 'id) + +;; ════════════════════════════════════════════════════════════════ +;; 4. SYM column round-trip — drives the writer's SYM_W32 branch +;; (uint32_t fwrite per chunk) and the on-load sym validation. +;; Multiple distinct symbol values + duplicates exercise the intern +;; table and the saved-sym-count footer. +;; ════════════════════════════════════════════════════════════════ +(.sys.exec "printf 'tag,v\\nAAPL,100\\nGOOG,200\\nMSFT,300\\nAAPL,400\\nGOOG,500\\nMSFT,600\\nAAPL,700\\n' > rf_test_splayed_sym.csv") -- 0 +(set Tsym (.csv.splayed "rf_test_splayed_sym.csv" "rf_test_splayed_sym/")) +(count Tsym) -- 7 +(sum (at Tsym 'v)) -- 2800 +(count (at Tsym 'tag)) -- 7 + +;; Round-trip — re-loading hits ray_sym_load via the splayed/sym file. +(set Rsym (.db.splayed.get "rf_test_splayed_sym/")) +(count Rsym) -- 7 +(sum (at Rsym 'v)) -- 2800 +(at Rsym 'tag) -- (at Tsym 'tag) + +;; ════════════════════════════════════════════════════════════════ +;; 5. GUID column via .csv.write — exercises csv_write_guid (8-4-4-4-12). +;; (.csv.splayed has no GUID type inference path from text input, so +;; the writer is reached via the row-formatted .csv.write builtin.) +;; +;; The exact bytes depend on `guid` codegen, so the assertion pins the +;; structural invariant: 36-char canonical form, dashes at offsets +;; 8/13/18/23. +;; ════════════════════════════════════════════════════════════════ +(set Tguid (table [g] (list (guid 3)))) +(.csv.write Tguid "rf_test_splayed_guid.csv") -- 0 +(set rawg (read "rf_test_splayed_guid.csv")) +(set linesg (split rawg "\n")) +(count linesg) -- 5 +(at linesg 0) -- "g" +;; Each data row is a 36-char canonical GUID string. +(count (at linesg 1)) -- 36 +(count (at linesg 2)) -- 36 +(count (at linesg 3)) -- 36 +;; Dashes appear at the canonical offsets (8, 13, 18, 23). +(at (at linesg 1) 8) -- "-" +(at (at linesg 1) 13) -- "-" +(at (at linesg 1) 18) -- "-" +(at (at linesg 1) 23) -- "-" + +;; ────────────── cleanup ────────────── +;; CSV files match the Makefile's `rf_test_*.csv` clean rule; splayed +;; dirs we created need explicit removal. +(.sys.exec "rm -rf rf_test_splayed_basic rf_test_splayed_nulls rf_test_splayed_str rf_test_splayed_sym rf_test_splayed_*.csv") -- 0 + +;; ────────────── reachability notes ────────────── +;; csv_splayed_writer_zero_nulls — only the count<=0 early-return is +;; hit here. The inner loop requires either (a) a chunked write where +;; one chunk has a null-bearing column and a later chunk doesn't, or +;; (b) the reverse, both of which need >1M rows to cross the default +;; CSV_PART_ROWS_DEFAULT chunk boundary. Reachable but not regression- +;; sized — would need a C-level test that passes a smaller rows_per_chunk. +;; +;; csv_splayed_writer_abort — only the err-path in writer_open's caller +;; loop and the post-close abort exercise it. Requires open() or +;; fwrite() failure mid-flight; not reachable from a happy-path test. +;; +;; sym_lazy_materialize_to_locked / sym_lazy_unmap_locked — gated by +;; SYM_LAZY_LOAD_MIN_BYTES = 64 MiB. Out of reach for a regression +;; test; covered by larger benchmarks. diff --git a/test/rfl/query/parallel_probe.rfl b/test/rfl/query/parallel_probe.rfl new file mode 100644 index 00000000..5f85efca --- /dev/null +++ b/test/rfl/query/parallel_probe.rfl @@ -0,0 +1,107 @@ +;; Coverage for the parallel row→gid probe path in `src/ops/query.c`: +;; `idxbuf_hist_fn` + `idxbuf_scat_fn`. Activated by the post-DAG +;; non-agg scatter when: +;; - `select` has a non-agg projection (a column ref, not a +;; reduction), so n_nonaggs > 0; +;; - the by-clause is a single scalar key (single-key scatter path); +;; - nrows >= 200_000 (the dispatch-overhead amortisation gate); +;; - 0 < n_groups <= 65_536 (per-task histogram sizing). +;; +;; The path also requires >= 2 worker threads. Single-worker dev +;; boxes fall through to the serial histogram below; the asserts here +;; are correctness-only (totals, per-group sizes), so both branches +;; return the same results. +;; +;; Shape: +;; `(select {v: v by: k from: T})` +;; produces a keyed table with one LIST-column row per group, each cell +;; holding the per-group slice of `v`. Compare to a reference computed +;; via `(group k)` + per-group sum / count. +;; +;; History: commit 774ce68f "perf(query): parallel row→gid probe for +;; non-agg scatter". + +;; ─── Fixture 1: 200_000 rows, 100 groups (well under the 65_536 cap) +(set Nbig 200000) +(set Ngrp 100) +(set Tbig (table [k v] (list (% (til Nbig) Ngrp) (til Nbig)))) + +;; Result shape: keyed table with `k` key column + `v` LIST column. +(set Rbig (select {v: v by: k from: Tbig})) +(count Rbig) -- 100 + +;; Per-group count = Nbig / Ngrp = 2000 for every group. +(count (at Rbig 'v)) -- 100 +(count (at (at Rbig 'v) 0)) -- 2000 +(count (at (at Rbig 'v) 50)) -- 2000 +(count (at (at Rbig 'v) 99)) -- 2000 + +;; Sum of per-group counts equals total row count. +(fold + 0 (map count (at Rbig 'v))) -- 200000 + +;; Total of all values across groups equals sum(til Nbig) +;; = 200000 * 199999 / 2 = 19_999_900_000. +(fold + 0 (map sum (at Rbig 'v))) -- 19999900000 + +;; Group 0 holds rows {0, 100, 200, ..., 199900}. +;; AP sum: 2000 * (0 + 199900) / 2 = 199_900_000. +(sum (at (at Rbig 'v) 0)) -- 199900000 + +;; Group 99 holds rows {99, 199, 299, ..., 199999}. +;; AP sum: 2000 * (99 + 199999) / 2 = 200_098_000. +(sum (at (at Rbig 'v) 99)) -- 200098000 + +;; First-cell value in the per-group LIST: group `g` starts at row `g`. +(at (at (at Rbig 'v) 0) 0) -- 0 +(at (at (at Rbig 'v) 1) 0) -- 1 +(at (at (at Rbig 'v) 99) 0) -- 99 + +;; ─── Fixture 2: SYM key, same gating +;; SYM keys flow through KEY_READ's SYM branch and remain in the parallel +;; scatter (key not LIST/STR/GUID, so no eval-group force-out). +(set Nsym 200000) +(set syms ['AAA 'BBB 'CCC 'DDD 'EEE]) +(set Tsym (table [k v] (list (take syms Nsym) (til Nsym)))) +(set Rsym (select {v: v by: k from: Tsym})) +(count Rsym) -- 5 +;; Even partition across 5 syms in round-robin order → 40_000 per group. +(count (at (at Rsym 'v) 0)) -- 40000 +(count (at (at Rsym 'v) 4)) -- 40000 +(fold + 0 (map count (at Rsym 'v))) -- 200000 +;; Total sum unchanged from total til. +(fold + 0 (map sum (at Rsym 'v))) -- 19999900000 + +;; ─── Fixture 3: with WHERE filter — the rowsel-masking applied to +;; row_gid before scatter, so dropped rows must not appear in any +;; per-group LIST. Survives the parallel idx_buf scatter via the +;; rgid_did_mask path (per-segment masking inside the probe). +(set Twh (table [k v] (list (% (til Nbig) Ngrp) (til Nbig)))) +(set Rwh (select {v: v by: k from: Twh where: (< v 100000)})) +(count Rwh) -- 100 +;; First half kept → per-group size = 100_000 / 100 = 1000. +(count (at (at Rwh 'v) 0)) -- 1000 +(count (at (at Rwh 'v) 50)) -- 1000 +;; Total surviving rows = 100_000; sum = 100_000 * 99_999 / 2 = 4_999_950_000. +(fold + 0 (map count (at Rwh 'v))) -- 100000 +(fold + 0 (map sum (at Rwh 'v))) -- 4999950000 + +;; ─── Fixture 4: non-agg projection alongside aggs — exercises the +;; same scatter index for the agg result AND the non-agg LIST column. +;; The `idx_buf` is shared by both downstream consumers. +(set Rmix (select {c: (count v) v: v by: k from: Tbig})) +(count Rmix) -- 100 +(at (at Rmix 'c) 0) -- 2000 +(sum (at Rmix 'c)) -- 200000 +(count (at (at Rmix 'v) 0)) -- 2000 + +;; ─── Fixture 5: just above the parallel-dispatch threshold (200_001). +;; The +1 row goes to the first surviving group; total row count must +;; still match. +(set Nedge 200001) +(set Tedge (table [k v] (list (% (til Nedge) Ngrp) (til Nedge)))) +(set Redge (select {v: v by: k from: Tedge})) +(count Redge) -- 100 +(fold + 0 (map count (at Redge 'v))) -- 200001 +;; Group 0 gets the extra row (row 200_000 → k = 200_000 % 100 = 0). +(count (at (at Redge 'v) 0)) -- 2001 +(count (at (at Redge 'v) 1)) -- 2000 diff --git a/test/rfl/query/per_group_buf.rfl b/test/rfl/query/per_group_buf.rfl new file mode 100644 index 00000000..47917292 --- /dev/null +++ b/test/rfl/query/per_group_buf.rfl @@ -0,0 +1,136 @@ +;; Coverage for the per-group buffer aggregators and helpers in +;; `src/ops/query.c`: +;; - `typed_vec_to_list` — demote a partly-typed result to a +;; LIST when a mid-loop cell type +;; doesn't fit the initial typed vec. +;; - `const_str_expr_copy` — constant-fold `(concat str ...)` +;; expressions inside select dict +;; projections. +;; - eval-level / DAG fast-path scatter for non-agg projections that +;; compute per group (`nonagg_eval_per_group_core` / +;; `nonagg_eval_per_group_buf`). +;; +;; Note on `aggr_unary_per_group_buf` / `aggr_med_per_group_buf` +;; (also targeted in this file's scope): the dispatch gate at +;; `query.c:7684` requires `is_streaming_aggr_unary_call(expr)` to fire +;; AND that the expr was bucketed into `nonagg_exprs` (i.e. NOT +;; `is_group_dag_agg_expr`). As of `resolve_agg_opcode` covering all +;; RAY_FN_AGGR unaries (sum/avg/min/max/med/dev/var/stddev/...), every +;; `(agg col)` shape now satisfies `is_agg_expr` → `is_group_dag_agg_expr` +;; → buckets into `n_aggs`, never reaches the non-agg scatter. These +;; two helpers are therefore not reachable from RFL today; see the +;; reachability note in the task report. + +;; ──────────────────────────────────────────────────────────────────── +;; const_str_expr_copy via `(concat ...)` constant folding in +;; a select projection. The DAG compiler `compile_const_str_expr` +;; walks the expression with `const_str_expr_len` and emits a single +;; folded `ray_const_str` op. Stays on the stack buffer (≤ 256 B) for +;; the small cases and heap-allocates for the longer one. +;; ──────────────────────────────────────────────────────────────────── +(set Tcs (table [k v] (list ['a 'b 'c] [1 2 3]))) + +;; Two-piece concat — small, stack buffer (< 256 B). +(at (at (select {s: (concat "hello" "-world") from: Tcs}) 's) 0) -- "hello-world" +(count (at (select {s: (concat "hello" "-world") from: Tcs}) 's)) -- 3 + +;; Three-piece concat — recurses through const_str_expr_copy. +(at (at (select {s: (concat "a" "b" "c") from: Tcs}) 's) 0) -- "abc" + +;; Nested concat — const_str_expr_len recognises the inner (concat ...) +;; head and recurses, const_str_expr_copy follows the same walk. +(at (at (select {s: (concat "x" (concat "y" "z")) from: Tcs}) 's) 0) -- "xyz" +(at (at (select {s: (concat (concat "ab" "cd") (concat "ef" "gh")) from: Tcs}) 's) 0) -- "abcdefgh" + +;; Longer folded result that overflows the 256-byte stack buffer → +;; heap path inside `compile_const_str_expr`. Folded length is +;; 26 * 12 = 312 bytes. Verify the projection emits 3 rows and the +;; first cell holds the full folded string. +(count (at (select {s: (concat "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz") from: Tcs}) 's)) -- 3 +(at (at (select {s: (concat "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz" "abcdefghijklmnopqrstuvwxyz") from: Tcs}) 's) 0) -- "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + +;; ──────────────────────────────────────────────────────────────────── +;; Multi-key by + non-agg-with-inner-agg → eval-level group path that +;; runs `nonagg_eval_per_group` over a LIST<(key,idx_list)> layout. +;; First-of-group cell decides the typed-vec storage type; subsequent +;; cells must match or the result demotes via typed_vec_to_list. +;; +;; Happy path: homogeneous numeric output → stays on the typed-vec +;; path, no demotion. Confirms the per-group eval correctness and +;; the typed-direct (collapsable) branch in nonagg_eval_per_group_core. +;; ──────────────────────────────────────────────────────────────────── + +;; Single-key q7 shape — non-agg outer, inner aggs reduce per-group. +;; Routes through the DAG fast scatter (`nonagg_eval_per_group_buf`). +(set Ta (table [k v1 v2] (list ['A 'A 'B 'B 'C 'C] [10 20 30 40 50 60] [5 15 25 35 45 55]))) +(count (select {r: (- (max v1) (min v2)) by: k from: Ta})) -- 3 +;; A: max(v1)=20 min(v2)=5 → 15 +;; B: max(v1)=40 min(v2)=25 → 15 +;; C: max(v1)=60 min(v2)=45 → 15 +(sum (at (select {r: (- (max v1) (min v2)) by: k from: Ta}) 'r)) -- 45 + +;; Multi-key by — forces use_eval_group → `nonagg_eval_per_group` +;; over the LIST layout. +(set Tb (table [g h v1 v2] (list ['A 'A 'B 'B 'A 'A 'B 'B] ['X 'Y 'X 'Y 'X 'Y 'X 'Y] [10 20 30 40 50 60 70 80] [1 2 3 4 5 6 7 8]))) +(count (select {r: (- (max v1) (min v2)) by: [g h] from: Tb})) -- 4 +;; (A,X): v1=[10,50] max=50; v2=[1,5] min=1 → 49 +;; (A,Y): v1=[20,60] max=60; v2=[2,6] min=2 → 58 +;; (B,X): v1=[30,70] max=70; v2=[3,7] min=3 → 67 +;; (B,Y): v1=[40,80] max=80; v2=[4,8] min=4 → 76 +;; Sum = 49+58+67+76 = 250 +(sum (at (select {r: (- (max v1) (min v2)) by: [g h] from: Tb}) 'r)) -- 250 + +;; F64 arithmetic-of-aggregates per group — same routing, F64 typed +;; vec result. +(set Tf (table [k x y] (list ['A 'A 'B 'B 'C 'C] [1.0 2.0 3.0 4.0 5.0 6.0] [2.0 4.0 6.0 8.0 10.0 12.0]))) +(count (select {r: (- (max y) (min x)) by: k from: Tf})) -- 3 +;; A: max(y)=4, min(x)=1 → 3 +;; B: max(y)=8, min(x)=3 → 5 +;; C: max(y)=12, min(x)=5 → 7 +(sum (at (select {r: (- (max y) (min x)) by: k from: Tf}) 'r)) -- 15.0 + +;; Integer key (non-symbol) — same row→gid + per-group eval. +(set Ti (table [k v] (list [1 1 2 2 3 3] [10 20 30 40 50 60]))) +;; (+ (max v) (min v)) per group: +;; k=1: max=20, min=10 → 30 +;; k=2: max=40, min=30 → 70 +;; k=3: max=60, min=50 → 110 +(sum (at (select {r: (+ (max v) (min v)) by: k from: Ti}) 'r)) -- 210 +(count (select {r: (+ (max v) (min v)) by: k from: Ti})) -- 3 + +;; Two-key with integer keys — eval-level multi-key, typed-direct. +(set Tk (table [k1 k2 v] (list [1 1 2 2 1 2] [10 20 10 20 10 20] [100 200 300 400 500 600]))) +;; Groups (k1, k2) — 4 distinct pairs: +;; (1, 10) v=[100, 500] → max=500, min=100 → diff=400 +;; (1, 20) v=[200] → max=200, min=200 → diff=0 +;; (2, 10) v=[300] → max=300, min=300 → diff=0 +;; (2, 20) v=[400, 600] → max=600, min=400 → diff=200 +(count (select {r: (- (max v) (min v)) by: [k1 k2] from: Tk})) -- 4 +(sum (at (select {r: (- (max v) (min v)) by: [k1 k2] from: Tk}) 'r)) -- 600 + +;; ──────────────────────────────────────────────────────────────────── +;; typed-direct happy path inside `nonagg_eval_per_group_core`. +;; First-group cell is a collapsable primitive (F64 scalar after +;; column-level type coercion of the mixed I64/F64 list), so the +;; result emerges as a typed F64 vec without invoking +;; `typed_vec_to_list` demotion. Confirms the row→gid scatter + +;; per-group eval produces a row-aligned typed column. +;; +;; (See reachability note in the task report: constructing a true +;; typed_vec_to_list demotion from RFL requires a per-group eval +;; whose cell type GENUINELY varies across groups — table column +;; coercion collapses mixed-I64/F64 literal lists to a single F64 +;; vector before the eval ever runs, so the cells are homogeneous +;; F64. The demotion line is exercised via internal callers; the +;; happy-path typed-direct branch is what RFL surfaces.) +;; ──────────────────────────────────────────────────────────────────── +(set Tlist (table [k v] (list ['A 'B 'C 'D] (list 1 2.5 3 4.5)))) +(set Rlist (select {r: (+ 0 (first v)) by: k from: Tlist})) +(count Rlist) -- 4 +;; Column v is coerced to F64; per-group first-of-slice yields +;; F64 atoms → all result cells are F64. +(at (at Rlist 'r) 0) -- 1.0 +(at (at Rlist 'r) 1) -- 2.5 +(at (at Rlist 'r) 2) -- 3.0 +(at (at Rlist 'r) 3) -- 4.5 +(type (at Rlist 'r)) -- 'F64 diff --git a/test/rfl/sort/fused_topn.rfl b/test/rfl/sort/fused_topn.rfl new file mode 100644 index 00000000..38e3ab98 --- /dev/null +++ b/test/rfl/sort/fused_topn.rfl @@ -0,0 +1,94 @@ +;; Fused top-N / bottom-N pipeline — Rayfall coverage. +;; +;; Drives ray_top_fn / ray_bot_fn through the RFL surface, which +;; flows into topk_take_vec → topk_indices_single (radix-encoded +;; bounded-heap path) or topk_indices_cmp_single (SYM comparator +;; heap, sort.c:3173). +;; +;; Complements arith/top_bot.rfl by widening type / shape coverage +;; (TIMESTAMP, DATE, sorted-input, deeper filter+top fusion, K=1 / +;; K=mid / K=N corners). Each assertion targets a happy-path +;; configuration; null / wrong-type cases live in top_bot.rfl. + +;; ─── numeric fast path: I64 ──────────────────────────────────────── +(top [10 20 5 25 15 30 1 35 8 22] 4) -- [35 30 25 22] +(bot [10 20 5 25 15 30 1 35 8 22] 4) -- [1 5 8 10] + +;; K=1 (smallest possible) — verify max/min compatibility. +(top [42 17 99 3 88 56] 1) -- [99] +(bot [42 17 99 3 88 56] 1) -- [3] + +;; ─── numeric fast path: F64 (negative, fractional) ───────────────── +(top [-1.5 2.5 -0.5 3.5 -1.0 4.5 2.0] 3) -- [4.5 3.5 2.5] +(bot [-1.5 2.5 -0.5 3.5 -1.0 4.5 2.0] 3) -- [-1.5 -1.0 -0.5] + +;; ─── narrow integer types ────────────────────────────────────────── +;; I32, I16, U8 fast paths — preserve the input type, k < n. +(top (as 'I32 [100 200 50 250 150 300 75]) 2) -- (as 'I32 [300 250]) +(bot (as 'I32 [100 200 50 250 150 300 75]) 2) -- (as 'I32 [50 75]) +(top (as 'I16 [9 1 5 7 3 8 2 6 4]) 3) -- (as 'I16 [9 8 7]) +(bot (as 'I16 [9 1 5 7 3 8 2 6 4]) 3) -- (as 'I16 [1 2 3]) +(top (as 'U8 [9 1 5 7 3 8 2 6 4]) 3) -- (as 'U8 [9 8 7]) +(bot (as 'U8 [9 1 5 7 3 8 2 6 4]) 3) -- (as 'U8 [1 2 3]) + +;; ─── already-sorted / reverse-sorted inputs ──────────────────────── +;; topk_indices_single must still scan and produce the correct prefix +;; even when the source is monotonic. +(top [1 2 3 4 5 6 7 8 9 10] 3) -- [10 9 8] +(bot [1 2 3 4 5 6 7 8 9 10] 3) -- [1 2 3] +(top [10 9 8 7 6 5 4 3 2 1] 3) -- [10 9 8] +(bot [10 9 8 7 6 5 4 3 2 1] 3) -- [1 2 3] + +;; ─── filter + top-N fusion (the "fused topk" benchmark shape) ────── +;; (top (where v > 100 v) k) — filter then partial-sort. This is +;; the Q25-Q27 shape from be3b5364. +(set Vf [50 120 80 200 30 150 90 175 60 110]) +(top (filter Vf (> Vf 100)) 3) -- [200 175 150] +(bot (filter Vf (> Vf 100)) 2) -- [110 120] + +;; Predicate selects exactly K elements: top must return them all. +(top (filter Vf (> Vf 150)) 2) -- [200 175] +(bot (filter Vf (> Vf 150)) 2) -- [175 200] + +;; ─── algebraic identity: (top v k) is a prefix of (desc v) ───────── +(set V32 [100 5 201 12 302 7 403 9 50 25 75 33 66]) +(top V32 5) -- (take (desc V32) 5) +(bot V32 5) -- (take (asc V32) 5) + +;; ─── K = count(v) edge: top/bot equal full sort ──────────────────── +(set Vk [5 2 8 1 9 4 7 3 6]) +(top Vk (count Vk)) -- (desc Vk) +(bot Vk (count Vk)) -- (asc Vk) + +;; ─── temporal types: DATE, TIME, TIMESTAMP fast path ─────────────── +;; The radix-encoded heap path handles DATE/TIME/TIMESTAMP identically +;; to their underlying I32 / I64 representations, so happy-path top/bot +;; on these is a meaningful coverage smoke check. +(set D (as 'DATE [2025.03.15 2025.01.01 2025.06.20 2025.02.10 2025.12.31])) +(top D 2) -- (as 'DATE [2025.12.31 2025.06.20]) +(bot D 2) -- (as 'DATE [2025.01.01 2025.02.10]) + +(set T (as 'TIME [12:00:00.000 09:30:00.000 15:45:00.000 06:15:00.000 23:59:59.999])) +(top T 2) -- (as 'TIME [23:59:59.999 15:45:00.000]) +(bot T 2) -- (as 'TIME [06:15:00.000 09:30:00.000]) + +;; ─── larger N to exercise the multi-morsel scan ──────────────────── +;; (top of a larger random-ish vector) must agree with (take (desc v) k). +(set Vbig (rand 1024 10000)) +(top Vbig 7) -- (take (desc Vbig) 7) +(bot Vbig 7) -- (take (asc Vbig) 7) + +;; ─── K = 2 over a tie-rich input ─────────────────────────────────── +;; Multiple equal max values — bot path's order must still be stable. +(top [5 5 5 5 5 5] 2) -- [5 5] +(bot [5 5 5 5 5 5] 2) -- [5 5] +(top [7 3 7 3 7 3] 2) -- [7 7] +(bot [7 3 7 3 7 3] 2) -- [3 3] + +;; ─── count / type invariants ─────────────────────────────────────── +(count (top [1 2 3 4 5] 3)) -- 3 +(count (bot [1 2 3 4 5] 3)) -- 3 +(type (top (as 'I32 [1 2 3 4 5]) 2)) -- 'I32 +(type (bot (as 'I16 [1 2 3 4 5]) 2)) -- 'I16 +(type (top (as 'U8 [1 2 3 4 5]) 2)) -- 'U8 +(type (top (as 'F64 [1.0 2.0 3.0]) 2)) -- 'F64 diff --git a/test/rfl/temporal/extract.rfl b/test/rfl/temporal/extract.rfl new file mode 100644 index 00000000..c19eb27a --- /dev/null +++ b/test/rfl/temporal/extract.rfl @@ -0,0 +1,142 @@ +;; Happy-path coverage for the temporal extract helpers in src/ops/temporal.c: +;; ray_extract_yyyy_fn / mm / dd / hh / minute / ss / dow / doy +;; +;; Each helper is a thin wrapper around ray_temporal_extract; reached from +;; RFL via the unary builtins registered in src/lang/eval.c. Atom and +;; vector input paths are exercised here. Null / wrong-type / OOB branches +;; are out of scope (P2). + +;; ───────────────────────────── yyyy (year) ───────────────────────────── +;; date atom +(yyyy 2024.03.15) -- 2024 +(yyyy 2000.01.01) -- 2000 +;; leap-year boundary (29 Feb of a leap year decodes to year 2024) +(yyyy 2024.02.29) -- 2024 +;; year-end → next-year boundary +(yyyy 2024.12.31) -- 2024 +(yyyy 2025.01.01) -- 2025 +;; pre-epoch date (days_since_2000 < 0) still decomposes correctly +(yyyy 1999.12.31) -- 1999 +;; timestamp atom +(yyyy 2024.07.04D09:15:30.000000000) -- 2024 +;; vector +(yyyy [2024.01.01 2024.07.04]) -- [2024 2024] +(yyyy [1999.12.31 2000.01.01 2025.01.01]) -- [1999 2000 2025] + +;; ───────────────────────────── mm (month) ────────────────────────────── +(mm 2024.03.15) -- 3 +(mm 2024.01.01) -- 1 +(mm 2024.12.31) -- 12 +;; leap-year Feb +(mm 2024.02.29) -- 2 +;; pre-epoch +(mm 1999.12.31) -- 12 +;; timestamp atom +(mm 2024.07.04D09:15:30.000000000) -- 7 +;; vector covering every month +(mm [2024.01.10 2024.02.10 2024.03.10 2024.04.10 2024.05.10 2024.06.10]) -- [1 2 3 4 5 6] +(mm [2024.07.10 2024.08.10 2024.09.10 2024.10.10 2024.11.10 2024.12.10]) -- [7 8 9 10 11 12] + +;; ─────────────────────────── dd (day-of-month) ───────────────────────── +(dd 2024.03.15) -- 15 +(dd 2024.01.01) -- 1 +(dd 2024.01.31) -- 31 +;; leap-day +(dd 2024.02.29) -- 29 +;; year-end +(dd 2024.12.31) -- 31 +;; timestamp atom +(dd 2024.07.04D09:15:30.000000000) -- 4 +;; vector at month boundaries +(dd [2024.01.01 2024.01.31 2024.02.29 2024.12.31]) -- [1 31 29 31] + +;; ───────────────────────────── hh (hour) ─────────────────────────────── +;; time atom +(hh 00:00:00.000) -- 0 +(hh 12:34:56.000) -- 12 +(hh 23:59:59.999) -- 23 +;; timestamp atom +(hh 2024.03.15D00:00:00.000000000) -- 0 +(hh 2024.03.15D12:34:56.000000000) -- 12 +(hh 2024.03.15D23:59:59.999999999) -- 23 +;; pure-date timestamp boundary (date atom decodes to midnight UTC) +(hh 2024.03.15) -- 0 +;; vector of times +(hh [00:00:00.000 06:30:00.000 12:00:00.000 18:45:00.000 23:59:59.000]) -- [0 6 12 18 23] +;; vector of timestamps +(hh [2024.03.15D01:00:00.000000000 2024.03.15D13:00:00.000000000]) -- [1 13] + +;; ──────────────────────────── minute ─────────────────────────────────── +(minute 00:00:00.000) -- 0 +(minute 12:34:56.000) -- 34 +(minute 12:59:00.000) -- 59 +(minute 2024.03.15D12:34:56.000000000) -- 34 +(minute 2024.03.15D08:00:00.000000000) -- 0 +;; vector +(minute [00:00:00.000 00:15:00.000 00:30:00.000 00:45:00.000 00:59:00.000]) -- [0 15 30 45 59] + +;; ──────────────────────────── ss (second) ────────────────────────────── +(ss 00:00:00.000) -- 0 +(ss 12:34:56.000) -- 56 +(ss 12:30:59.999) -- 59 +(ss 2024.03.15D12:34:56.000000000) -- 56 +(ss 2024.03.15D12:34:00.000000000) -- 0 +;; vector +(ss [00:00:00.000 00:00:01.000 00:00:30.000 00:00:59.000]) -- [0 1 30 59] + +;; ───────────────────────────── dow (day-of-week) ─────────────────────── +;; rayforce's `dow` returns 1..7 with Mon=1 .. Sun=7 +;; (formula: ((days_since_2000 % 7) + 7 + 5) % 7 + 1) +;; Verified by hand against the Gregorian calendar: +;; 2024-01-01 = Mon, 2024-03-15 = Fri, 2024-07-04 = Thu, +;; 2024-12-31 = Tue, 2000-01-01 = Sat, 2023-12-31 = Sun. +(dow 2024.01.01) -- 1 +(dow 2024.03.15) -- 5 +(dow 2024.07.04) -- 4 +(dow 2024.12.31) -- 2 +(dow 2000.01.01) -- 6 +(dow 2023.12.31) -- 7 +;; timestamp atom takes the same path +(dow 2024.03.15D12:34:56.000000000) -- 5 +;; vector — one of each weekday code +;; (2024.01.01 Mon, 2024.01.02 Tue, 2024.01.03 Wed, 2024.01.04 Thu, +;; 2024.01.05 Fri, 2024.01.06 Sat, 2024.01.07 Sun) +(dow [2024.01.01 2024.01.02 2024.01.03 2024.01.04 2024.01.05 2024.01.06 2024.01.07]) -- [1 2 3 4 5 6 7] + +;; ───────────────────────────── doy (day-of-year) ─────────────────────── +;; non-leap baseline +(doy 2023.01.01) -- 1 +(doy 2023.03.01) -- 60 +(doy 2023.12.31) -- 365 +;; leap year shifts everything from Mar 1 onward by +1 +(doy 2024.01.01) -- 1 +(doy 2024.02.29) -- 60 +(doy 2024.03.01) -- 61 +(doy 2024.07.04) -- 186 +(doy 2024.12.31) -- 366 +;; century rules: 2000 is leap (div 400), 1900 was not (div 100, not 400) +(doy 2000.12.31) -- 366 +;; timestamp atom +(doy 2024.03.15D12:34:56.000000000) -- 75 +;; vector across the year +(doy [2024.01.01 2024.02.29 2024.03.01 2024.07.04 2024.12.31]) -- [1 60 61 186 366] + +;; ───────────────────── combined extractor round-trip ─────────────────── +;; A single timestamp decomposes consistently across every helper. +(set TS 2024.03.15D12:34:56.000000000) +(yyyy TS) -- 2024 +(mm TS) -- 3 +(dd TS) -- 15 +(hh TS) -- 12 +(minute TS) -- 34 +(ss TS) -- 56 +(dow TS) -- 5 +(doy TS) -- 75 + +;; Same idea on a vector — each helper produces a parallel column. +(set DS [2024.01.01 2024.07.04 2024.12.31]) +(yyyy DS) -- [2024 2024 2024] +(mm DS) -- [1 7 12] +(dd DS) -- [1 4 31] +(dow DS) -- [1 4 2] +(doy DS) -- [1 186 366] From c421fac896467ba446e8c16a18414d232e40e64f Mon Sep 17 00:00:00 2001 From: Serhii Savchuk Date: Tue, 19 May 2026 11:33:18 +0300 Subject: [PATCH 2/3] test: round1-2 C-level coverage push MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends three C-level test files with happy-path coverage for: test/test_fused_group.c (+575 lines, 10 new tests): - mk_combine_hist_fn / mk_combine_scat_fn / mk_combine_dedup_fn — multi-key fused_group 3-pass radix scatter combine - fp_expr_const_str — phase-3 const-string LIKE predicate gate - fp_count_heap_up/down/consider, fp_count_emit_keep_min — fused TOP-N count heap test/test_sort.c (+425 lines, 12 new tests): - ray_top_fn / ray_bot_fn for I64/F64/SYM at K1_000_001 test/test_public_api.c (+260 lines, 33 new tests): - ray_obj_type / ray_obj_attrs across atom/vec/list/dict/table - ray_vec_get_i64 across I64/I32/I16/U8/BOOL/TIMESTAMP - ray_vec_get_f64 across F64/F32 - ray_vec_get_sym_id across W64/W32/W16/W8 - ray_runtime_create_with_sym, _with_sym_err, runtime_destroy(NULL) - ray_request_interrupt / ray_clear_interrupt / ray_interrupted - ray_eval_*_interrupt wrappers (thread-local sig_atomic flag) - ray_eval_get_nfo / ray_eval_set_nfo handle round-trip - ray_eval_set_restricted / ray_eval_get_restricted - ray_get_error_trace populated after lambda type-error, cleared on next ray_eval_str RAY_DATE / RAY_TIME branches of ray_vec_get_i64 are flagged as intentionally uncovered (see in-file comment) — fix in follow-up. All tests happy-path only (correct types / shapes). Co-Authored-By: Claude Opus 4.7 (1M context) --- test/test_fused_group.c | 575 +++++++++++++++++++++++++++++++++++++++ test/test_public_api.c | 576 +++++++++++++++++++++++++++++++++++++++- test/test_sort.c | 425 +++++++++++++++++++++++++++++ 3 files changed, 1574 insertions(+), 2 deletions(-) diff --git a/test/test_fused_group.c b/test/test_fused_group.c index f83902db..afa9e46d 100644 --- a/test/test_fused_group.c +++ b/test/test_fused_group.c @@ -36,6 +36,7 @@ #include "ops/internal.h" #include "ops/ops.h" #include "ops/fused_group.h" +#include "lang/parse.h" #include "table/sym.h" #include @@ -1380,6 +1381,568 @@ static test_result_t test_eq_no_match(void) { PASS(); } +/* ────────────────────────────────────────────────────────────────────── + * Coverage extensions: multi-key parallel combine (mk_combine_hist_fn / + * mk_combine_scat_fn / mk_combine_dedup_fn), fused TOP-N count heap + * (fp_count_heap_* + fp_count_emit_keep_min), and Phase-3 const-string + * predicate gate (fp_expr_const_str). + * ────────────────────────────────────────────────────────────────────── */ + +/* mk_combine_parallel path: 2 wide I64 keys (16 bytes total → wide=1). + * Drive enough distinct (k1,k2) pairs past FP_COMBINE_PAR_MIN (50,000) + * across all worker shards so the 3-pass radix scatter activates. Each + * worker sees its row range and shards into a private HT — with all- + * distinct rows the shard fills equal nrows/nw, summing past 50K across + * the pool. */ +static test_result_t test_mk_combine_2i64_parallel_wide(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t N = 80000; + ray_t* k1c = ray_vec_new(RAY_I64, N); k1c->len = N; + ray_t* k2c = ray_vec_new(RAY_I64, N); k2c->len = N; + ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N; + int64_t* k1 = (int64_t*)ray_data(k1c); + int64_t* k2 = (int64_t*)ray_data(k2c); + int64_t* v = (int64_t*)ray_data(vc); + /* All (k1, k2) pairs distinct so per-shard n_filled = rows/nw and + * total_local = N — comfortably above FP_COMBINE_PAR_MIN (50K). */ + for (int64_t i = 0; i < N; i++) { + k1[i] = i; + k2[i] = i * 3 + 7; + v[i] = i + 1; + } + + int64_t s_k1 = ray_sym_intern("k1", 2); + int64_t s_k2 = ray_sym_intern("k2", 2); + int64_t s_v = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, s_k1, k1c); ray_release(k1c); + tbl = ray_table_add_col(tbl, s_k2, k2c); ray_release(k2c); + tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_k1 = ray_scan(g, "k1"); + ray_op_t* scan_k2 = ray_scan(g, "k2"); + ray_op_t* scan_v = ray_scan(g, "v"); + ray_op_t* scan_vp = ray_scan(g, "v"); + ray_op_t* zero = ray_const_i64(g, 0); + /* Non-trivial WHERE that passes everything. */ + ray_op_t* pred = ray_binop(g, OP_GE, scan_vp, zero); + + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_v }; + ray_op_t* keys[] = { scan_k1, scan_k2 }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 2, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_t* res = ray_execute(g, fused); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + /* All pairs distinct → N output rows. */ + TEST_ASSERT_EQ_I(ray_table_nrows(res), N); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* cnt_col = ray_table_get_col(res, cnt_sym); + TEST_ASSERT_NOT_NULL(cnt_col); + int64_t total = 0; + for (int64_t i = 0; i < ray_table_nrows(res); i++) + total += ((int64_t*)ray_data(cnt_col))[i]; + TEST_ASSERT_EQ_I(total, N); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* mk_combine narrow branch: 2 I32 keys → 8 bytes total → wide=0. All + * (k1, k2) pairs distinct so total_local hits the parallel threshold. + * Exercises the !wide branches of mk_combine_hist_fn / scat_fn / dedup_fn. */ +static test_result_t test_mk_combine_2i32_parallel_narrow(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t N = 80000; + ray_t* k1c = ray_vec_new(RAY_I32, N); k1c->len = N; + ray_t* k2c = ray_vec_new(RAY_I32, N); k2c->len = N; + ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N; + int32_t* k1 = (int32_t*)ray_data(k1c); + int32_t* k2 = (int32_t*)ray_data(k2c); + int64_t* v = (int64_t*)ray_data(vc); + /* k1 = i / 4, k2 = i % 4 → all (k1,k2) distinct because i = k1*4 + k2. */ + for (int64_t i = 0; i < N; i++) { + k1[i] = (int32_t)(i / 4); + k2[i] = (int32_t)(i % 4); + v[i] = i + 1; + } + + int64_t s_k1 = ray_sym_intern("k1", 2); + int64_t s_k2 = ray_sym_intern("k2", 2); + int64_t s_v = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, s_k1, k1c); ray_release(k1c); + tbl = ray_table_add_col(tbl, s_k2, k2c); ray_release(k2c); + tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_k1 = ray_scan(g, "k1"); + ray_op_t* scan_k2 = ray_scan(g, "k2"); + ray_op_t* scan_v = ray_scan(g, "v"); + ray_op_t* scan_vp = ray_scan(g, "v"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_vp, zero); + + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_v }; + ray_op_t* keys[] = { scan_k1, scan_k2 }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 2, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_t* res = ray_execute(g, fused); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_table_nrows(res), N); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* cnt_col = ray_table_get_col(res, cnt_sym); + int64_t total = 0; + for (int64_t i = 0; i < ray_table_nrows(res); i++) + total += ((int64_t*)ray_data(cnt_col))[i]; + TEST_ASSERT_EQ_I(total, N); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* mk_combine 2 SYM keys with W32 width. Total = 4+4 = 8 bytes → wide=0. + * Each row carries a distinct (s1, s2) pair so total_local exceeds + * FP_COMBINE_PAR_MIN. */ +static test_result_t test_mk_combine_2sym_parallel(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t N = 80000; + ray_t* s1c = ray_sym_vec_new(RAY_SYM_W32, N); s1c->len = N; + ray_t* s2c = ray_sym_vec_new(RAY_SYM_W32, N); s2c->len = N; + ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N; + int32_t* s1 = (int32_t*)ray_data(s1c); + int32_t* s2 = (int32_t*)ray_data(s2c); + int64_t* v = (int64_t*)ray_data(vc); + /* Intern N distinct symbols up front so we can index into them. */ + int64_t pool[400]; + char nm[16]; + for (int j = 0; j < 400; j++) { + int l = snprintf(nm, sizeof(nm), "sym_%04d", j); + pool[j] = ray_sym_intern(nm, (size_t)l); + } + /* (s1[i], s2[i]) = (pool[i / 400], pool[i % 400]) — 400 × 400 = 160K + * possible pairs; with N=80K rows all pairs distinct (i runs 0..N). */ + for (int64_t i = 0; i < N; i++) { + s1[i] = (int32_t)pool[i / 400]; + s2[i] = (int32_t)pool[i % 400]; + v[i] = i + 1; + } + int64_t s_a = ray_sym_intern("a", 1); + int64_t s_b = ray_sym_intern("b", 1); + int64_t s_v = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, s_a, s1c); ray_release(s1c); + tbl = ray_table_add_col(tbl, s_b, s2c); ray_release(s2c); + tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_a = ray_scan(g, "a"); + ray_op_t* scan_b = ray_scan(g, "b"); + ray_op_t* scan_v = ray_scan(g, "v"); + ray_op_t* scan_vp = ray_scan(g, "v"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_vp, zero); + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_v }; + ray_op_t* keys[] = { scan_a, scan_b }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 2, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_t* res = ray_execute(g, fused); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + /* All pairs distinct. */ + TEST_ASSERT_EQ_I(ray_table_nrows(res), N); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* cnt_col = ray_table_get_col(res, cnt_sym); + int64_t total = 0; + for (int64_t i = 0; i < ray_table_nrows(res); i++) + total += ((int64_t*)ray_data(cnt_col))[i]; + TEST_ASSERT_EQ_I(total, N); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* mk_combine mixed: SYM_W32 (4 bytes) + I64 (8 bytes) = 12 bytes → wide=1. + * Exercises the wide branch with a SYM-bearing decompose at materialize. */ +static test_result_t test_mk_combine_sym_i64_parallel(void) { + ray_heap_init(); + (void)ray_sym_init(); + + int64_t N = 80000; + ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, N); sc->len = N; + ray_t* kc = ray_vec_new(RAY_I64, N); kc->len = N; + ray_t* vc = ray_vec_new(RAY_I64, N); vc->len = N; + int32_t* s = (int32_t*)ray_data(sc); + int64_t* k = (int64_t*)ray_data(kc); + int64_t* v = (int64_t*)ray_data(vc); + int64_t pool[400]; + char nm[16]; + for (int j = 0; j < 400; j++) { + int l = snprintf(nm, sizeof(nm), "msy_%04d", j); + pool[j] = ray_sym_intern(nm, (size_t)l); + } + /* (s[i], k[i]) = (pool[i % 400], i) — N distinct pairs (k unique). */ + for (int64_t i = 0; i < N; i++) { + s[i] = (int32_t)pool[i % 400]; + k[i] = i; + v[i] = i + 1; + } + int64_t s_s = ray_sym_intern("s", 1); + int64_t s_k = ray_sym_intern("k", 1); + int64_t s_v = ray_sym_intern("v", 1); + ray_t* tbl = ray_table_new(3); + tbl = ray_table_add_col(tbl, s_s, sc); ray_release(sc); + tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc); + tbl = ray_table_add_col(tbl, s_v, vc); ray_release(vc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_s = ray_scan(g, "s"); + ray_op_t* scan_k = ray_scan(g, "k"); + ray_op_t* scan_v = ray_scan(g, "v"); + ray_op_t* scan_vp = ray_scan(g, "v"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_vp, zero); + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_v }; + ray_op_t* keys[] = { scan_s, scan_k }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 2, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_t* res = ray_execute(g, fused); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_table_nrows(res), N); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* cnt_col = ray_table_get_col(res, cnt_sym); + int64_t total = 0; + for (int64_t i = 0; i < ray_table_nrows(res); i++) + total += ((int64_t*)ray_data(cnt_col))[i]; + TEST_ASSERT_EQ_I(total, N); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* Forward-declare the runtime API for fp_expr_const_str tests. Mirrors + * test_fused_topk.c pattern — fp_expr_const_str is called only from + * fp_check_like inside ray_fused_group_supported, which needs a parsed + * AST. ray_parse requires a live runtime for its symbol-table state. */ +struct ray_runtime_s; +typedef struct ray_runtime_s ray_runtime_t; +extern ray_runtime_t* ray_runtime_create(int argc, char** argv); +extern void ray_runtime_destroy(ray_runtime_t* rt); +extern ray_runtime_t* __RUNTIME; + +/* fp_expr_const_str: LIKE on a SYM column with a string-literal pattern + * should be recognised by the planner gate (returns 1). Exercises the + * `expr->type == -RAY_STR && !RAY_ATTR_NAME` base case of the recursive + * walker. */ +static test_result_t test_fp_expr_const_str_simple_like(void) { + ray_runtime_create(0, NULL); + + /* Tiny SYM table — fp_check_like requires the column to exist and be + * STR/SYM type. */ + ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, 3); sc->len = 3; + int32_t* sd = (int32_t*)ray_data(sc); + int64_t s_a = ray_sym_intern("apple", 5); + int64_t s_b = ray_sym_intern("banana", 6); + int64_t s_c = ray_sym_intern("cherry", 6); + sd[0] = (int32_t)s_a; sd[1] = (int32_t)s_b; sd[2] = (int32_t)s_c; + int64_t s_name = ray_sym_intern("name", 4); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_name, sc); ray_release(sc); + + ray_t* expr = ray_parse("(like name \"app*\")"); + TEST_ASSERT_NOT_NULL(expr); + TEST_ASSERT_FALSE(RAY_IS_ERR(expr)); + + /* Predicate gate must accept (like sym_col "literal") — this recurses + * through fp_check_like → fp_expr_const_str on the literal. */ + int ok = ray_fused_group_supported(expr, tbl); + TEST_ASSERT_EQ_I(ok, 1); + + ray_release(expr); + ray_release(tbl); + ray_runtime_destroy(__RUNTIME); + PASS(); +} + +/* fp_expr_const_str: nested (concat str str) pattern. Exercises the + * "is_concat" branch + recursion into each child. */ +static test_result_t test_fp_expr_const_str_concat_like(void) { + ray_runtime_create(0, NULL); + + ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, 2); sc->len = 2; + int32_t* sd = (int32_t*)ray_data(sc); + int64_t s_x = ray_sym_intern("foo_x", 5); + int64_t s_y = ray_sym_intern("foo_y", 5); + sd[0] = (int32_t)s_x; sd[1] = (int32_t)s_y; + int64_t s_n = ray_sym_intern("name", 4); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_n, sc); ray_release(sc); + + /* Pattern is (concat "foo" "*") — a nested-list const-string. */ + ray_t* expr = ray_parse("(like name (concat \"foo\" \"*\"))"); + TEST_ASSERT_NOT_NULL(expr); + TEST_ASSERT_FALSE(RAY_IS_ERR(expr)); + + int ok = ray_fused_group_supported(expr, tbl); + TEST_ASSERT_EQ_I(ok, 1); + + ray_release(expr); + ray_release(tbl); + ray_runtime_destroy(__RUNTIME); + PASS(); +} + +/* fp_expr_const_str: deeply-nested (concat (concat str str) str) — drives + * the recursive fp_expr_const_str over a tree, not just a flat list. */ +static test_result_t test_fp_expr_const_str_nested_concat(void) { + ray_runtime_create(0, NULL); + + ray_t* sc = ray_sym_vec_new(RAY_SYM_W32, 1); sc->len = 1; + int32_t* sd = (int32_t*)ray_data(sc); + int64_t s_q = ray_sym_intern("abcdefg", 7); + sd[0] = (int32_t)s_q; + int64_t s_n = ray_sym_intern("name", 4); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_n, sc); ray_release(sc); + + ray_t* expr = ray_parse("(like name (concat (concat \"a\" \"b\") \"*\"))"); + TEST_ASSERT_NOT_NULL(expr); + TEST_ASSERT_FALSE(RAY_IS_ERR(expr)); + int ok = ray_fused_group_supported(expr, tbl); + TEST_ASSERT_EQ_I(ok, 1); + + ray_release(expr); + ray_release(tbl); + ray_runtime_destroy(__RUNTIME); + PASS(); +} + +/* fp_count_heap_*: U8 column → fp_try_direct_count1 fires (256 slots); + * with emit_filter.top_count_take = 3 and many distinct keys, the + * fp_count_emit_keep_min path runs the heap (n_slots ≫ k_take). */ +static test_result_t test_fp_count_heap_u8_top3(void) { + ray_heap_init(); + (void)ray_sym_init(); + + /* 20 distinct U8 keys with sharply different counts so the top-3 is + * unambiguous: key i appears (i+1) times — total rows = 1+2+...+20 + * = 210. */ + int64_t total_rows = 0; + for (int64_t i = 1; i <= 20; i++) total_rows += i; + ray_t* kc = ray_vec_new(RAY_U8, total_rows); kc->len = total_rows; + uint8_t* k = (uint8_t*)ray_data(kc); + int64_t pos = 0; + for (int64_t key = 1; key <= 20; key++) { + for (int64_t r = 0; r < key; r++) k[pos++] = (uint8_t)key; + } + int64_t s_k = ray_sym_intern("k", 1); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_k = ray_scan(g, "k"); + ray_op_t* scan_pred = ray_scan(g, "k"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_pred, zero); + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_k }; + ray_op_t* keys[] = { scan_k }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_group_emit_filter_t prev = ray_group_emit_filter_get(); + ray_group_emit_filter_t filter = {0}; + filter.enabled = 1; + filter.agg_index = 0; + filter.top_count_take = 3; + ray_group_emit_filter_set(filter); + ray_t* res = ray_execute(g, fused); + ray_group_emit_filter_set(prev); + + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + /* Top-3 counts: keys 20, 19, 18 with counts 20, 19, 18 respectively. + * fp_count_emit_keep_min returns heap[0] = 18 — every group with + * count >= 18 is retained, so exactly 3 rows. */ + TEST_ASSERT_EQ_I(ray_table_nrows(res), 3); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* k_col = ray_table_get_col(res, s_k); + ray_t* c_col = ray_table_get_col(res, cnt_sym); + TEST_ASSERT_NOT_NULL(k_col); + TEST_ASSERT_NOT_NULL(c_col); + int seen_18 = 0, seen_19 = 0, seen_20 = 0; + for (int64_t i = 0; i < ray_table_nrows(res); i++) { + int64_t key = (int64_t)((uint8_t*)ray_data(k_col))[i]; + int64_t cnt = ((int64_t*)ray_data(c_col))[i]; + if (key == 18) { TEST_ASSERT_EQ_I(cnt, 18); seen_18 = 1; } + if (key == 19) { TEST_ASSERT_EQ_I(cnt, 19); seen_19 = 1; } + if (key == 20) { TEST_ASSERT_EQ_I(cnt, 20); seen_20 = 1; } + } + TEST_ASSERT_TRUE(seen_18 && seen_19 && seen_20); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* fp_count_heap_*: I16 key → fp_try_direct_count1 with 65536 slots; with + * a small top-K the heap_up / heap_down branches both fire as the heap + * gets pushed past capacity and then sees rows that displace heap[0]. */ +static test_result_t test_fp_count_heap_i16_top5(void) { + ray_heap_init(); + (void)ray_sym_init(); + + /* 12 distinct I16 keys, counts decreasing as the key increases. The + * key sequence (intentionally not sorted) drives both the up-heap + * (initial fill) and down-heap (replace heap[0] when a bigger count + * appears later in the slot walk) paths. */ + int64_t per_key[12] = { 5, 11, 3, 17, 2, 9, 13, 21, 1, 7, 19, 4 }; + int64_t total_rows = 0; + for (int i = 0; i < 12; i++) total_rows += per_key[i]; + ray_t* kc = ray_vec_new(RAY_I16, total_rows); kc->len = total_rows; + int16_t* k = (int16_t*)ray_data(kc); + int64_t pos = 0; + for (int i = 0; i < 12; i++) + for (int64_t r = 0; r < per_key[i]; r++) + k[pos++] = (int16_t)(i + 100); /* keys 100..111 */ + int64_t s_k = ray_sym_intern("k", 1); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_k = ray_scan(g, "k"); + ray_op_t* scan_pred = ray_scan(g, "k"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_pred, zero); + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_k }; + ray_op_t* keys[] = { scan_k }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_group_emit_filter_t prev = ray_group_emit_filter_get(); + ray_group_emit_filter_t filter = {0}; + filter.enabled = 1; + filter.agg_index = 0; + filter.top_count_take = 5; + ray_group_emit_filter_set(filter); + ray_t* res = ray_execute(g, fused); + ray_group_emit_filter_set(prev); + + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + /* Top-5 counts: sorted descending = 21, 19, 17, 13, 11. keep_min = 11. + * Result rows: every key whose count >= 11. Counts 21,19,17,13,11 → + * 5 rows. */ + TEST_ASSERT_EQ_I(ray_table_nrows(res), 5); + + /* Verify the result counts are exactly {21,19,17,13,11}. */ + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* c_col = ray_table_get_col(res, cnt_sym); + TEST_ASSERT_NOT_NULL(c_col); + int64_t expect[5] = { 11, 13, 17, 19, 21 }; + int seen[5] = {0, 0, 0, 0, 0}; + for (int64_t i = 0; i < ray_table_nrows(res); i++) { + int64_t c = ((int64_t*)ray_data(c_col))[i]; + for (int j = 0; j < 5; j++) + if (c == expect[j] && !seen[j]) { seen[j] = 1; break; } + } + for (int j = 0; j < 5; j++) TEST_ASSERT_TRUE(seen[j]); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* fp_count_emit_keep_min via the serial-combine path of count1 with a + * wide-key I64 column. fp_try_direct_count1 rejects (kt != BOOL/U8/I16) + * so the code falls through to fp_combine_and_materialize. With + * use_emit_filter on, the parallel-combine branch is skipped (line 1343) + * and the serial combine + fp_count_emit_keep_min path runs. The + * used_key_slots parameter is non-NULL in this branch, exercising the + * `used_key_slots && !used_key_slots[s * 2]` skip. */ +static test_result_t test_fp_count_emit_keep_min_i64_serial(void) { + ray_heap_init(); + (void)ray_sym_init(); + + /* 15 distinct I64 keys with monotone counts 1..15. Big enough that + * after the serial HT-build the open-addressed table has many empty + * slots interspersed with filled ones, exercising the + * used_key_slots-skip branch. */ + int64_t per_key[15]; + int64_t total_rows = 0; + for (int i = 0; i < 15; i++) { per_key[i] = i + 1; total_rows += per_key[i]; } + ray_t* kc = ray_vec_new(RAY_I64, total_rows); kc->len = total_rows; + int64_t* k = (int64_t*)ray_data(kc); + int64_t pos = 0; + for (int i = 0; i < 15; i++) + for (int64_t r = 0; r < per_key[i]; r++) + k[pos++] = (int64_t)(1000 + i); + int64_t s_k = ray_sym_intern("k", 1); + ray_t* tbl = ray_table_new(1); + tbl = ray_table_add_col(tbl, s_k, kc); ray_release(kc); + + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* scan_k = ray_scan(g, "k"); + ray_op_t* scan_pred = ray_scan(g, "k"); + ray_op_t* zero = ray_const_i64(g, 0); + ray_op_t* pred = ray_binop(g, OP_GE, scan_pred, zero); + uint16_t agg_ops[] = { OP_COUNT }; + ray_op_t* agg_ins[] = { scan_k }; + ray_op_t* keys[] = { scan_k }; + ray_op_t* fused = ray_filtered_group(g, pred, keys, 1, agg_ops, agg_ins, 1); + TEST_ASSERT_NOT_NULL(fused); + + ray_group_emit_filter_t prev = ray_group_emit_filter_get(); + ray_group_emit_filter_t filter = {0}; + filter.enabled = 1; + filter.agg_index = 0; + filter.top_count_take = 4; + ray_group_emit_filter_set(filter); + ray_t* res = ray_execute(g, fused); + ray_group_emit_filter_set(prev); + + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + /* Top-4 counts = 15, 14, 13, 12. keep_min = 12 → 4 rows. */ + TEST_ASSERT_EQ_I(ray_table_nrows(res), 4); + + int64_t cnt_sym = ray_sym_intern("count", 5); + ray_t* c_col = ray_table_get_col(res, cnt_sym); + TEST_ASSERT_NOT_NULL(c_col); + int64_t expect[4] = { 12, 13, 14, 15 }; + int seen[4] = { 0, 0, 0, 0 }; + for (int64_t i = 0; i < ray_table_nrows(res); i++) { + int64_t c = ((int64_t*)ray_data(c_col))[i]; + for (int j = 0; j < 4; j++) + if (c == expect[j] && !seen[j]) { seen[j] = 1; break; } + } + for (int j = 0; j < 4; j++) TEST_ASSERT_TRUE(seen[j]); + + ray_release(res); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + const test_entry_t fused_group_entries[] = { { "fused_group/eq_count", test_eq_count, NULL, NULL }, { "fused_group/ne_two_groups", test_ne_two_groups, NULL, NULL }, @@ -1408,5 +1971,17 @@ const test_entry_t fused_group_entries[] = { { "fused_group/multi_agg_and_pred", test_multi_agg_and_pred, NULL, NULL }, { "fused_group/multi_agg_unsigned_inputs", test_multi_agg_unsigned_inputs, NULL, NULL }, { "fused_group/count1_sym_key_w32", test_count1_sym_key_w32, NULL, NULL }, + /* mk_combine_* (multi-key parallel 3-pass radix scatter) + fused + * TOP-N count heap + Phase-3 const-string LIKE gate. */ + { "fused_group/mk_combine_2i64_parallel_wide", test_mk_combine_2i64_parallel_wide, NULL, NULL }, + { "fused_group/mk_combine_2i32_parallel_narrow",test_mk_combine_2i32_parallel_narrow,NULL, NULL }, + { "fused_group/mk_combine_2sym_parallel", test_mk_combine_2sym_parallel, NULL, NULL }, + { "fused_group/mk_combine_sym_i64_parallel", test_mk_combine_sym_i64_parallel, NULL, NULL }, + { "fused_group/fp_expr_const_str_simple_like", test_fp_expr_const_str_simple_like, NULL, NULL }, + { "fused_group/fp_expr_const_str_concat_like", test_fp_expr_const_str_concat_like, NULL, NULL }, + { "fused_group/fp_expr_const_str_nested_concat",test_fp_expr_const_str_nested_concat,NULL, NULL }, + { "fused_group/fp_count_heap_u8_top3", test_fp_count_heap_u8_top3, NULL, NULL }, + { "fused_group/fp_count_heap_i16_top5", test_fp_count_heap_i16_top5, NULL, NULL }, + { "fused_group/fp_count_emit_keep_min_i64_serial", test_fp_count_emit_keep_min_i64_serial, NULL, NULL }, { NULL, NULL, NULL, NULL }, }; diff --git a/test/test_public_api.c b/test/test_public_api.c index afd7d8f5..240efb84 100644 --- a/test/test_public_api.c +++ b/test/test_public_api.c @@ -21,8 +21,27 @@ * SOFTWARE. */ +#define _DEFAULT_SOURCE /* mkdtemp */ + #include "test.h" #include +#include "lang/eval.h" +#include +#include +#include +#include + +/* Most introspection helpers need a live heap/runtime so vectors and + * atoms can be constructed via the public API. Match the test_link.c + * pattern: bring up a runtime in setup, tear it down afterwards. */ +struct ray_runtime_s; +typedef struct ray_runtime_s ray_runtime_t; +extern ray_runtime_t* ray_runtime_create(int argc, char** argv); +extern void ray_runtime_destroy(ray_runtime_t* rt); +extern ray_runtime_t* __RUNTIME; + +static void public_api_setup(void) { ray_runtime_create(0, NULL); } +static void public_api_teardown(void) { ray_runtime_destroy(__RUNTIME); } static test_result_t test_public_ipc_client_symbols(void) { int64_t (*connect_fn)(const char*, uint16_t, const char*, const char*) = ray_ipc_connect; @@ -62,8 +81,561 @@ static test_result_t test_public_query_and_format_symbols(void) { PASS(); } +/* ─── ray_obj_type / ray_obj_attrs ────────────────────────────────── + * + * The FFI helpers are thin readers of v->type and v->attrs. Atoms + * carry the negative form of the type tag; vectors carry the positive + * tag. RAY_LIST is type 0, RAY_TABLE is 98, RAY_DICT is 99. */ + +static test_result_t test_public_obj_type_atom_i64(void) { + ray_t* v = ray_i64(42); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_EQ_I(ray_obj_type(v), -RAY_I64); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_atom_f64(void) { + ray_t* v = ray_f64(3.14); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_EQ_I(ray_obj_type(v), -RAY_F64); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_atom_sym(void) { + int64_t sid = ray_sym_intern("alpha", 5); + ray_t* v = ray_sym(sid); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_EQ_I(ray_obj_type(v), -RAY_SYM); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_vec_i64(void) { + ray_t* v = ray_vec_new(RAY_I64, 4); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_FALSE(RAY_IS_ERR(v)); + TEST_ASSERT_EQ_I(ray_obj_type(v), RAY_I64); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_vec_f64(void) { + ray_t* v = ray_vec_new(RAY_F64, 4); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_FALSE(RAY_IS_ERR(v)); + TEST_ASSERT_EQ_I(ray_obj_type(v), RAY_F64); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_vec_sym(void) { + /* ray_sym_vec_new stores the width in the low 2 bits of attrs; + * ray_obj_attrs should expose them verbatim. */ + ray_t* v = ray_sym_vec_new(RAY_SYM_W32, 4); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_FALSE(RAY_IS_ERR(v)); + TEST_ASSERT_EQ_I(ray_obj_type(v), RAY_SYM); + TEST_ASSERT_EQ_I(ray_obj_attrs(v) & 0x3, RAY_SYM_W32); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_list(void) { + ray_t* v = ray_list_new(2); + TEST_ASSERT_NOT_NULL(v); + TEST_ASSERT_FALSE(RAY_IS_ERR(v)); + TEST_ASSERT_EQ_I(ray_obj_type(v), RAY_LIST); + TEST_ASSERT_EQ_I(ray_obj_attrs(v), 0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_obj_type_table(void) { + ray_t* tbl = ray_table_new(2); + TEST_ASSERT_NOT_NULL(tbl); + TEST_ASSERT_FALSE(RAY_IS_ERR(tbl)); + TEST_ASSERT_EQ_I(ray_obj_type(tbl), RAY_TABLE); + TEST_ASSERT_EQ_I(ray_obj_attrs(tbl), 0); + ray_release(tbl); + PASS(); +} + +static test_result_t test_public_obj_type_dict(void) { + /* Two-element typed-vec keys + typed-vec vals → dict. */ + ray_t* keys = ray_vec_new(RAY_I64, 2); + int64_t k0 = 10, k1 = 20; + keys = ray_vec_append(keys, &k0); + keys = ray_vec_append(keys, &k1); + ray_t* vals = ray_vec_new(RAY_I64, 2); + int64_t v0 = 100, v1 = 200; + vals = ray_vec_append(vals, &v0); + vals = ray_vec_append(vals, &v1); + + ray_t* d = ray_dict_new(keys, vals); + TEST_ASSERT_NOT_NULL(d); + TEST_ASSERT_FALSE(RAY_IS_ERR(d)); + TEST_ASSERT_EQ_I(ray_obj_type(d), RAY_DICT); + TEST_ASSERT_EQ_I(ray_obj_attrs(d), 0); + ray_release(d); + PASS(); +} + +/* ─── ray_vec_get_i64 — every integer width branch ─────────────────── + * + * Implementation (src/core/runtime.c) dispatches on vec->type: + * I64 / DATE / TIME / TIMESTAMP → int64_t cast + * I32 → int32_t cast + * I16 → int16_t cast + * U8 / BOOL → uint8_t cast + * + * For each branch read at idx 0, mid, and last to exercise the indexing + * arithmetic on top of the type-specific element size. */ + +static test_result_t test_public_vec_get_i64_i64(void) { + ray_t* v = ray_vec_new(RAY_I64, 5); + int64_t xs[] = { -1000, 1, 2, 3, 9223372036854775000LL }; + for (int i = 0; i < 5; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), xs[2]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 4), xs[4]); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_i64_i32(void) { + ray_t* v = ray_vec_new(RAY_I32, 4); + int32_t xs[] = { -7, 0, 12345, 2147483600 }; + for (int i = 0; i < 4; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), (int64_t)xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), (int64_t)xs[1]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 3), (int64_t)xs[3]); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_i64_i16(void) { + ray_t* v = ray_vec_new(RAY_I16, 3); + int16_t xs[] = { -32000, 0, 32000 }; + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), (int64_t)xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), (int64_t)xs[1]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), (int64_t)xs[2]); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_i64_u8(void) { + ray_t* v = ray_vec_new(RAY_U8, 4); + uint8_t xs[] = { 0, 1, 200, 255 }; + for (int i = 0; i < 4; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), (int64_t)xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), (int64_t)xs[2]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 3), (int64_t)xs[3]); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_i64_bool(void) { + ray_t* v = ray_vec_new(RAY_BOOL, 3); + uint8_t xs[] = { 0, 1, 1 }; + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), 0); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), 1); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), 1); + ray_release(v); + PASS(); +} + +/* NOTE: RAY_DATE / RAY_TIME branches of ray_vec_get_i64 are intentionally + * NOT covered here. Their on-disk element width is 4 bytes (see + * ray_type_sizes in src/core/types.c), but ray_vec_get_i64 dispatches + * them through the same 8-byte cast as RAY_I64 / RAY_TIMESTAMP — reading + * past the row boundary. Reported separately; do not write a "happy + * path" test that locks in the broken behaviour. */ + +static test_result_t test_public_vec_get_i64_timestamp(void) { + ray_t* v = ray_vec_new(RAY_TIMESTAMP, 3); + int64_t xs[] = { 0, 1700000000000000000LL, 1800000000000000000LL }; + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), xs[1]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), xs[2]); + ray_release(v); + PASS(); +} + +/* ─── ray_vec_get_f64 — F32/F64 branches ───────────────────────────── + * + * Implementation accepts only RAY_F64 and RAY_F32; any other type + * returns 0.0. Integer vectors do NOT coerce — verified by reading + * the source. Cover only the supported (happy) types here. */ + +static test_result_t test_public_vec_get_f64_f64(void) { + ray_t* v = ray_vec_new(RAY_F64, 4); + double xs[] = { -1.5, 0.0, 2.25, 1e10 }; + for (int i = 0; i < 4; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 0), xs[0], 0.0); + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 2), xs[2], 0.0); + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 3), xs[3], 0.0); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_f64_f32(void) { + ray_t* v = ray_vec_new(RAY_F32, 3); + float xs[] = { -0.5f, 1.25f, 3.5f }; + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + /* F32 values round-trip exactly to double when they are representable + * in 24-bit mantissa form — these are powers-of-two-fraction sums. */ + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 0), (double)xs[0], 1e-6); + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 1), (double)xs[1], 1e-6); + TEST_ASSERT_EQ_F(ray_vec_get_f64(v, 2), (double)xs[2], 1e-6); + ray_release(v); + PASS(); +} + +/* ─── ray_vec_get_sym_id — every SYM width ─────────────────────────── + * + * The implementation dispatches through ray_read_sym which respects the + * width-encoded attrs. Use ray_sym_intern to obtain real IDs, append + * via the W64-shaped int64 elem (ray_vec_append normalizes width), then + * verify the round-trip. W8 sym vec only addresses ≤255 distinct IDs; + * the first builtins claim low slots so our user-interned names land in + * a range that still fits an 8-bit index. */ + +static test_result_t test_public_vec_get_sym_id_w64(void) { + int64_t a = ray_sym_intern("pub_w64_a", 9); + int64_t b = ray_sym_intern("pub_w64_b", 9); + int64_t c = ray_sym_intern("pub_w64_c", 9); + + ray_t* v = ray_sym_vec_new(RAY_SYM_W64, 3); + v = ray_vec_append(v, &a); + v = ray_vec_append(v, &b); + v = ray_vec_append(v, &c); + + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 0), a); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 1), b); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 2), c); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_sym_id_w32(void) { + int64_t a = ray_sym_intern("pub_w32_a", 9); + int64_t b = ray_sym_intern("pub_w32_b", 9); + + ray_t* v = ray_sym_vec_new(RAY_SYM_W32, 2); + v = ray_vec_append(v, &a); + v = ray_vec_append(v, &b); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 0), a); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 1), b); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_sym_id_w16(void) { + int64_t a = ray_sym_intern("pub_w16_a", 9); + int64_t b = ray_sym_intern("pub_w16_b", 9); + + ray_t* v = ray_sym_vec_new(RAY_SYM_W16, 2); + v = ray_vec_append(v, &a); + v = ray_vec_append(v, &b); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 0), a); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 1), b); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_sym_id_w8(void) { + /* W8 indices only address up to 255 distinct entries. By the time + * the runtime is up the symbol table holds the builtin set; user + * intern IDs are appended after. Provided the cumulative count + * stays under 256 (well within the fresh-runtime budget), the W8 + * append path will succeed. */ + int64_t a = ray_sym_intern("pub_w8_a", 8); + int64_t b = ray_sym_intern("pub_w8_b", 8); + + /* Skip when the runtime's existing builtins have already pushed past + * the W8 ceiling — the public API doesn't expose narrowing semantics + * here and we want a deterministic happy path. */ + if (a > 0xFF || b > 0xFF) { + SKIP("sym ID exceeds W8 range — happy-path narrowing unreachable"); + } + + ray_t* v = ray_sym_vec_new(RAY_SYM_W8, 2); + v = ray_vec_append(v, &a); + v = ray_vec_append(v, &b); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 0), a); + TEST_ASSERT_EQ_I(ray_vec_get_sym_id(v, 1), b); + ray_release(v); + PASS(); +} + +/* ─── ray_runtime_create_with_sym* (happy path, eval round-trip) ───── + * + * These tests own their runtime lifecycle (no setup/teardown entry). + * Pass a path that doesn't exist: per the contract, ENOENT is the + * normal first-run case — out_sym_err stays RAY_OK and the runtime + * comes up. Run a trivial eval to confirm the language stack is live, + * then destroy. */ + +static test_result_t test_public_runtime_create_with_sym_eval(void) { + char tmpl[] = "/tmp/rayforce-pub-rt-XXXXXX"; + char* dir = mkdtemp(tmpl); + TEST_ASSERT_NOT_NULL(dir); + char path[256]; + snprintf(path, sizeof(path), "%s/missing.sym", dir); + + ray_runtime_t* rt = ray_runtime_create_with_sym(path); + TEST_ASSERT_NOT_NULL(rt); + + /* Trivial eval — confirms ray_lang_init ran and the env has the + * arithmetic builtin wired up. Rayfall uses Lisp-style prefix + * notation (see test_lang.c). */ + ray_t* r = ray_eval_str("(+ 1 2)"); + TEST_ASSERT_NOT_NULL(r); + TEST_ASSERT_FALSE(RAY_IS_ERR(r)); + TEST_ASSERT_EQ_I(ray_obj_type(r), -RAY_I64); + TEST_ASSERT_EQ_I(r->i64, 3); + ray_release(r); + + ray_runtime_destroy(rt); + rmdir(dir); + PASS(); +} + +static test_result_t test_public_runtime_create_with_sym_err_eval(void) { + char tmpl[] = "/tmp/rayforce-pub-rt-err-XXXXXX"; + char* dir = mkdtemp(tmpl); + TEST_ASSERT_NOT_NULL(dir); + char path[256]; + snprintf(path, sizeof(path), "%s/missing.sym", dir); + + ray_err_t err = RAY_ERR_OOM; /* poison */ + ray_runtime_t* rt = ray_runtime_create_with_sym_err(path, &err); + TEST_ASSERT_NOT_NULL(rt); + /* ENOENT is the documented first-run case: out_sym_err must be + * cleared to RAY_OK by runtime_create_impl. */ + TEST_ASSERT_EQ_I((int)err, (int)RAY_OK); + + ray_t* r = ray_eval_str("(* 5 6)"); + TEST_ASSERT_NOT_NULL(r); + TEST_ASSERT_FALSE(RAY_IS_ERR(r)); + TEST_ASSERT_EQ_I(r->i64, 30); + ray_release(r); + + ray_runtime_destroy(rt); + rmdir(dir); + PASS(); +} + +/* ray_runtime_destroy(NULL) is documented as a no-op via the early + * `if (!rt) return;` guard. Pin that behaviour so a future refactor + * can't silently break it. No setup/teardown — we don't want a real + * runtime alive when we hand the destroyer NULL. */ +static test_result_t test_public_runtime_destroy_null_is_noop(void) { + ray_runtime_destroy(NULL); + PASS(); +} + +/* ═══════════════════════════════════════════════════════════════════════ + * Interrupt API — global flag (eval.c). Happy-path set/get round-trip + * for the public ray_*_interrupt names and their legacy ray_eval_* + * wrappers. The flag is thread-local sig_atomic_t storage; here we + * only verify the set→get→clear contract. + * ═══════════════════════════════════════════════════════════════════════ */ + +static test_result_t test_public_interrupt_roundtrip(void) { + ray_clear_interrupt(); + TEST_ASSERT_FALSE(ray_interrupted()); + + ray_request_interrupt(); + TEST_ASSERT_TRUE(ray_interrupted()); + + ray_clear_interrupt(); + TEST_ASSERT_FALSE(ray_interrupted()); + PASS(); +} + +static test_result_t test_public_interrupt_idempotent_set(void) { + ray_clear_interrupt(); + ray_request_interrupt(); + ray_request_interrupt(); + TEST_ASSERT_TRUE(ray_interrupted()); + ray_clear_interrupt(); + TEST_ASSERT_FALSE(ray_interrupted()); + PASS(); +} + +static test_result_t test_public_eval_interrupt_wrappers(void) { + ray_eval_clear_interrupt(); + TEST_ASSERT_EQ_I(ray_eval_is_interrupted(), 0); + TEST_ASSERT_FALSE(ray_interrupted()); + + ray_eval_request_interrupt(); + TEST_ASSERT_TRUE(ray_eval_is_interrupted() != 0); + TEST_ASSERT_TRUE(ray_interrupted()); + + ray_eval_clear_interrupt(); + TEST_ASSERT_EQ_I(ray_eval_is_interrupted(), 0); + TEST_ASSERT_FALSE(ray_interrupted()); + PASS(); +} + +static test_result_t test_public_interrupt_cross_path(void) { + ray_clear_interrupt(); + + ray_request_interrupt(); + TEST_ASSERT_TRUE(ray_eval_is_interrupted() != 0); + ray_clear_interrupt(); + + ray_eval_request_interrupt(); + TEST_ASSERT_TRUE(ray_interrupted()); + ray_eval_clear_interrupt(); + TEST_ASSERT_FALSE(ray_interrupted()); + PASS(); +} + +/* nfo API — get/set returns the same handle. */ +static test_result_t test_public_eval_nfo_roundtrip(void) { + ray_t* prev = ray_eval_get_nfo(); + + ray_eval_set_nfo(NULL); + TEST_ASSERT_NULL(ray_eval_get_nfo()); + + const char* src = "(+ 1 2)"; + ray_t* nfo = ray_nfo_create("test", 4, src, 7); + TEST_ASSERT_NOT_NULL(nfo); + TEST_ASSERT_FALSE(RAY_IS_ERR(nfo)); + + ray_eval_set_nfo(nfo); + TEST_ASSERT_EQ_PTR(ray_eval_get_nfo(), nfo); + + ray_eval_set_nfo(prev); + ray_release(nfo); + PASS(); +} + +/* Restricted-mode API — pure data store with no side effects on benign arith. */ +static test_result_t test_public_eval_restricted_setget(void) { + ray_eval_set_restricted(false); + TEST_ASSERT_FALSE(ray_eval_get_restricted()); + + ray_eval_set_restricted(true); + TEST_ASSERT_TRUE(ray_eval_get_restricted()); + + ray_eval_set_restricted(false); + TEST_ASSERT_FALSE(ray_eval_get_restricted()); + PASS(); +} + +static test_result_t test_public_eval_restricted_allows_arith(void) { + ray_eval_set_restricted(true); + ray_t* r = ray_eval_str("(+ 1 2)"); + ray_eval_set_restricted(false); + + TEST_ASSERT_NOT_NULL(r); + TEST_ASSERT_FALSE(RAY_IS_ERR(r)); + TEST_ASSERT_EQ_I(r->type, -RAY_I64); + TEST_ASSERT_EQ_I(r->i64, 3); + ray_release(r); + PASS(); +} + +/* Error-trace API — ray_eval_str clears the trace at entry. */ +static test_result_t test_public_get_error_trace_populated(void) { + ray_t* def = ray_eval_str("(set boom (fn [x] (+ x 1)))"); + TEST_ASSERT_NOT_NULL(def); + TEST_ASSERT_FALSE(RAY_IS_ERR(def)); + ray_release(def); + + ray_t* err = ray_eval_str("(boom \"not-a-number\")"); + TEST_ASSERT_NOT_NULL(err); + TEST_ASSERT_TRUE(RAY_IS_ERR(err)); + + ray_t* trace = ray_get_error_trace(); + TEST_ASSERT_NOT_NULL(trace); + TEST_ASSERT_EQ_I(trace->type, RAY_LIST); + TEST_ASSERT_TRUE(ray_len(trace) > 0); + + ray_t* frame0 = ((ray_t**)ray_data(trace))[0]; + TEST_ASSERT_NOT_NULL(frame0); + TEST_ASSERT_EQ_I(frame0->type, RAY_LIST); + TEST_ASSERT_EQ_I(ray_len(frame0), 4); + + ray_release(err); + PASS(); +} + +static test_result_t test_public_get_error_trace_cleared_on_eval(void) { + ray_t* def = ray_eval_str("(set boom2 (fn [x] (+ x 1)))"); + TEST_ASSERT_NOT_NULL(def); + ray_release(def); + + ray_t* err = ray_eval_str("(boom2 \"x\")"); + TEST_ASSERT_TRUE(RAY_IS_ERR(err)); + TEST_ASSERT_NOT_NULL(ray_get_error_trace()); + ray_release(err); + + ray_t* ok = ray_eval_str("(+ 10 20)"); + TEST_ASSERT_NOT_NULL(ok); + TEST_ASSERT_FALSE(RAY_IS_ERR(ok)); + TEST_ASSERT_NULL(ray_get_error_trace()); + ray_release(ok); + PASS(); +} + const test_entry_t public_api_entries[] = { - { "public/ipc_client_symbols", test_public_ipc_client_symbols, NULL, NULL }, - { "public/query_and_format_symbols", test_public_query_and_format_symbols, NULL, NULL }, + { "public/ipc_client_symbols", test_public_ipc_client_symbols, NULL, NULL }, + { "public/query_and_format_symbols", test_public_query_and_format_symbols, NULL, NULL }, + + { "public/obj_type_atom_i64", test_public_obj_type_atom_i64, public_api_setup, public_api_teardown }, + { "public/obj_type_atom_f64", test_public_obj_type_atom_f64, public_api_setup, public_api_teardown }, + { "public/obj_type_atom_sym", test_public_obj_type_atom_sym, public_api_setup, public_api_teardown }, + { "public/obj_type_vec_i64", test_public_obj_type_vec_i64, public_api_setup, public_api_teardown }, + { "public/obj_type_vec_f64", test_public_obj_type_vec_f64, public_api_setup, public_api_teardown }, + { "public/obj_type_vec_sym", test_public_obj_type_vec_sym, public_api_setup, public_api_teardown }, + { "public/obj_type_list", test_public_obj_type_list, public_api_setup, public_api_teardown }, + { "public/obj_type_table", test_public_obj_type_table, public_api_setup, public_api_teardown }, + { "public/obj_type_dict", test_public_obj_type_dict, public_api_setup, public_api_teardown }, + + { "public/vec_get_i64_i64", test_public_vec_get_i64_i64, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_i32", test_public_vec_get_i64_i32, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_i16", test_public_vec_get_i64_i16, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_u8", test_public_vec_get_i64_u8, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_bool", test_public_vec_get_i64_bool, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_timestamp", test_public_vec_get_i64_timestamp, public_api_setup, public_api_teardown }, + + { "public/vec_get_f64_f64", test_public_vec_get_f64_f64, public_api_setup, public_api_teardown }, + { "public/vec_get_f64_f32", test_public_vec_get_f64_f32, public_api_setup, public_api_teardown }, + + { "public/vec_get_sym_id_w64", test_public_vec_get_sym_id_w64, public_api_setup, public_api_teardown }, + { "public/vec_get_sym_id_w32", test_public_vec_get_sym_id_w32, public_api_setup, public_api_teardown }, + { "public/vec_get_sym_id_w16", test_public_vec_get_sym_id_w16, public_api_setup, public_api_teardown }, + { "public/vec_get_sym_id_w8", test_public_vec_get_sym_id_w8, public_api_setup, public_api_teardown }, + + /* These tests manage their own runtime lifecycle. */ + { "public/runtime_create_with_sym_eval", test_public_runtime_create_with_sym_eval, NULL, NULL }, + { "public/runtime_create_with_sym_err_eval", test_public_runtime_create_with_sym_err_eval, NULL, NULL }, + { "public/runtime_destroy_null_is_noop", test_public_runtime_destroy_null_is_noop, NULL, NULL }, + + /* eval interrupt / nfo / restricted / error-trace public API. */ + { "public/interrupt_roundtrip", test_public_interrupt_roundtrip, NULL, NULL }, + { "public/interrupt_idempotent_set", test_public_interrupt_idempotent_set, NULL, NULL }, + { "public/eval_interrupt_wrappers", test_public_eval_interrupt_wrappers, NULL, NULL }, + { "public/interrupt_cross_path", test_public_interrupt_cross_path, NULL, NULL }, + { "public/eval_nfo_roundtrip", test_public_eval_nfo_roundtrip, public_api_setup, public_api_teardown }, + { "public/eval_restricted_setget", test_public_eval_restricted_setget, NULL, NULL }, + { "public/eval_restricted_allows_arith", test_public_eval_restricted_allows_arith, public_api_setup, public_api_teardown }, + { "public/get_error_trace_populated", test_public_get_error_trace_populated, public_api_setup, public_api_teardown }, + { "public/get_error_trace_cleared_on_eval",test_public_get_error_trace_cleared_on_eval,public_api_setup, public_api_teardown }, + { NULL, NULL, NULL, NULL }, }; diff --git a/test/test_sort.c b/test/test_sort.c index f563d896..3939b46a 100644 --- a/test/test_sort.c +++ b/test/test_sort.c @@ -1029,6 +1029,413 @@ static test_result_t test_sort_bool_nulls_first(void) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * top / bot (partial top-N / bottom-N) — happy path + * + * Targets ray_top_fn / ray_bot_fn (sort.c:3448, 3453) which dispatch + * through topk_take_vec → topk_indices_single → either the radix- + * encoded heap path (numeric types) or topk_indices_cmp_single + + * topk_indices_cmp + topk_cmp_sift_down (SYM type, sort.c:3173). + * + * Happy-path only: correct-type / correct-shape inputs. Null / + * wrong-type / K-edge cases are covered elsewhere (top_bot.rfl). + * ══════════════════════════════════════════════════════════════════ */ + +/* (top vec K) over an I64 vec with K < N — exercises the numeric + * radix-encoded bounded-heap path inside topk_indices_single. */ +static test_result_t test_top_i64_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t data[] = {3, 1, 5, 2, 7, 4, 9, 6, 8}; + ray_t* v = ray_vec_from_raw(RAY_I64, data, 9); + TEST_ASSERT_NOT_NULL(v); + + ray_t* k = ray_i64(3); + ray_t* res = ray_top_fn(v, k); + TEST_ASSERT_NOT_NULL(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 3); + TEST_ASSERT_EQ_I(res->type, RAY_I64); + + /* Top 3 of {3,1,5,2,7,4,9,6,8} desc = {9,8,7}. */ + const int64_t* r = (const int64_t*)ray_data(res); + TEST_ASSERT_EQ_I(r[0], 9); + TEST_ASSERT_EQ_I(r[1], 8); + TEST_ASSERT_EQ_I(r[2], 7); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (top vec 1) — degenerate K=1 path: heap-of-one == max. */ +static test_result_t test_top_i64_k_eq_one(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t data[] = {3, 1, 5, 2, 7, 4, 9, 6, 8}; + ray_t* v = ray_vec_from_raw(RAY_I64, data, 9); + ray_t* k = ray_i64(1); + ray_t* res = ray_top_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 1); + TEST_ASSERT_EQ_I(((const int64_t*)ray_data(res))[0], 9); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (bot vec K) — mirror path with desc=0; verifies bot's heap orientation. */ +static test_result_t test_bot_i64_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t data[] = {3, 1, 5, 2, 7, 4, 9, 6, 8}; + ray_t* v = ray_vec_from_raw(RAY_I64, data, 9); + ray_t* k = ray_i64(3); + ray_t* res = ray_bot_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 3); + + /* Bot 3 asc = {1,2,3}. */ + const int64_t* r = (const int64_t*)ray_data(res); + TEST_ASSERT_EQ_I(r[0], 1); + TEST_ASSERT_EQ_I(r[1], 2); + TEST_ASSERT_EQ_I(r[2], 3); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (top vec K) over F64 — exercises the F64 branch of the radix encode + * inside the bounded-heap path. */ +static test_result_t test_top_f64_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + double data[] = {1.5, 2.5, 0.5, 3.5, -1.0, 4.5, 2.0}; + ray_t* v = ray_vec_from_raw(RAY_F64, data, 7); + ray_t* k = ray_i64(3); + ray_t* res = ray_top_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 3); + TEST_ASSERT_EQ_I(res->type, RAY_F64); + + /* Top 3 desc of {1.5,2.5,0.5,3.5,-1.0,4.5,2.0} = {4.5, 3.5, 2.5}. */ + const double* r = (const double*)ray_data(res); + TEST_ASSERT_EQ_F(r[0], 4.5, 1e-9); + TEST_ASSERT_EQ_F(r[1], 3.5, 1e-9); + TEST_ASSERT_EQ_F(r[2], 2.5, 1e-9); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (bot vec K) over F64 — F64 branch with desc=0. */ +static test_result_t test_bot_f64_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + double data[] = {1.5, 2.5, 0.5, 3.5, -1.0, 4.5, 2.0}; + ray_t* v = ray_vec_from_raw(RAY_F64, data, 7); + ray_t* k = ray_i64(2); + ray_t* res = ray_bot_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 2); + + const double* r = (const double*)ray_data(res); + TEST_ASSERT_EQ_F(r[0], -1.0, 1e-9); + TEST_ASSERT_EQ_F(r[1], 0.5, 1e-9); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (top vec K=N) — k>=len → falls through to ray_desc_fn (full sort), + * which returns a lazy chain that must be materialized. Exercises + * the K==N short-circuit in topk_take_vec. */ +static test_result_t test_top_i64_k_eq_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t data[] = {3, 1, 5, 2, 7}; + ray_t* v = ray_vec_from_raw(RAY_I64, data, 5); + ray_t* k = ray_i64(5); + ray_t* res = ray_top_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + if (ray_is_lazy(res)) res = ray_lazy_materialize(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 5); + + /* Full desc = {7,5,3,2,1}. */ + const int64_t* r = (const int64_t*)ray_data(res); + TEST_ASSERT_EQ_I(r[0], 7); + TEST_ASSERT_EQ_I(r[4], 1); + for (int64_t i = 1; i < 5; i++) + TEST_ASSERT_FMT(r[i] <= r[i-1], + "top k==n not desc-sorted at %lld", (long long)i); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (bot vec K=N) mirror. */ +static test_result_t test_bot_i64_k_eq_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t data[] = {3, 1, 5, 2, 7}; + ray_t* v = ray_vec_from_raw(RAY_I64, data, 5); + ray_t* k = ray_i64(5); + ray_t* res = ray_bot_fn(v, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + if (ray_is_lazy(res)) res = ray_lazy_materialize(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 5); + + const int64_t* r = (const int64_t*)ray_data(res); + TEST_ASSERT_EQ_I(r[0], 1); + TEST_ASSERT_EQ_I(r[4], 7); + + ray_release(res); ray_release(k); ray_release(v); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (top symvec K) — RAY_SYM dispatches to topk_indices_cmp_single + * (sort.c:3173), which calls topk_indices_cmp + topk_cmp_sift_down. + * Exercises the comparator-heap branch of the top-K fast path that + * the numeric radix encoding doesn't cover. */ +static test_result_t test_top_sym_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t s_apple = ray_sym_intern("apple", 5); + int64_t s_banana = ray_sym_intern("banana", 6); + int64_t s_cherry = ray_sym_intern("cherry", 6); + int64_t s_date = ray_sym_intern("date", 4); + int64_t s_elder = ray_sym_intern("elder", 5); + int64_t s_fig = ray_sym_intern("fig", 3); + + /* SYM_W64 width: index slot is int64_t */ + int64_t N = 12; + ray_t* sv = ray_sym_vec_new(RAY_SYM_W64, N); + TEST_ASSERT_NOT_NULL(sv); + sv->len = N; + int64_t syms[6] = { s_apple, s_banana, s_cherry, s_date, s_elder, s_fig }; + int64_t* sd = (int64_t*)ray_data(sv); + for (int64_t i = 0; i < N; i++) sd[i] = syms[i % 6]; + + /* (top sv 3) → top 3 lex-desc symbols. Lex order: + * apple < banana < cherry < date < elder < fig + * Each symbol appears twice (N=12, 6 syms), so the desc top 3 must + * draw from the {fig, fig, elder} multiset (two fig + one elder) + * since fig and elder are the two highest symbols. */ + ray_t* k = ray_i64(3); + ray_t* res = ray_top_fn(sv, k); + TEST_ASSERT_NOT_NULL(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 3); + TEST_ASSERT_TRUE(RAY_IS_SYM(res->type)); + + /* Read all three sym ids — the result is desc-sorted so r0 ≥ r1 ≥ r2 + * in lex order. Expected (with stable tie-break): fig, fig, elder. */ + const int64_t r0 = ray_read_sym(ray_data(res), 0, res->type, res->attrs); + const int64_t r1 = ray_read_sym(ray_data(res), 1, res->type, res->attrs); + const int64_t r2 = ray_read_sym(ray_data(res), 2, res->type, res->attrs); + TEST_ASSERT_EQ_I(r0, s_fig); + TEST_ASSERT_EQ_I(r1, s_fig); + TEST_ASSERT_EQ_I(r2, s_elder); + + ray_release(res); ray_release(k); ray_release(sv); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (bot symvec K) — mirror direction over SYM, exercising + * topk_indices_cmp_single with desc=0. */ +static test_result_t test_bot_sym_k_lt_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t s_apple = ray_sym_intern("apple", 5); + int64_t s_banana = ray_sym_intern("banana", 6); + int64_t s_cherry = ray_sym_intern("cherry", 6); + int64_t s_date = ray_sym_intern("date", 4); + int64_t s_elder = ray_sym_intern("elder", 5); + + int64_t N = 10; + ray_t* sv = ray_sym_vec_new(RAY_SYM_W64, N); + sv->len = N; + int64_t syms[5] = { s_apple, s_banana, s_cherry, s_date, s_elder }; + int64_t* sd = (int64_t*)ray_data(sv); + for (int64_t i = 0; i < N; i++) sd[i] = syms[i % 5]; + + ray_t* k = ray_i64(2); + ray_t* res = ray_bot_fn(sv, k); + TEST_ASSERT_NOT_NULL(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 2); + + /* Bot 2 asc = {apple, apple}: 'apple' appears at rows 0 and 5. */ + const int64_t r0 = ray_read_sym(ray_data(res), 0, res->type, res->attrs); + const int64_t r1 = ray_read_sym(ray_data(res), 1, res->type, res->attrs); + TEST_ASSERT_EQ_I(r0, s_apple); + TEST_ASSERT_EQ_I(r1, s_apple); + + ray_release(res); ray_release(k); ray_release(sv); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* (top symvec K=N) — K==N → falls through to ray_desc_fn (full sort + * over SYM), still a happy-path traverse. */ +static test_result_t test_top_sym_k_eq_n(void) { + ray_heap_init(); + ray_sym_init(); + + int64_t s_a = ray_sym_intern("aa", 2); + int64_t s_b = ray_sym_intern("bb", 2); + int64_t s_c = ray_sym_intern("cc", 2); + + int64_t N = 3; + ray_t* sv = ray_sym_vec_new(RAY_SYM_W64, N); + sv->len = N; + int64_t* sd = (int64_t*)ray_data(sv); + sd[0] = s_b; sd[1] = s_a; sd[2] = s_c; + + ray_t* k = ray_i64(3); + ray_t* res = ray_top_fn(sv, k); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + if (ray_is_lazy(res)) res = ray_lazy_materialize(res); + TEST_ASSERT_FALSE(RAY_IS_ERR(res)); + TEST_ASSERT_EQ_I(ray_len(res), 3); + /* desc lex: cc, bb, aa */ + TEST_ASSERT_EQ_I(ray_read_sym(ray_data(res), 0, res->type, res->attrs), s_c); + TEST_ASSERT_EQ_I(ray_read_sym(ray_data(res), 1, res->type, res->attrs), s_b); + TEST_ASSERT_EQ_I(ray_read_sym(ray_data(res), 2, res->type, res->attrs), s_a); + + ray_release(res); ray_release(k); ray_release(sv); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* ══════════════════════════════════════════════════════════════════ + * MSD bucket sort — msd_radix_sort_run dispatches to + * msd_bucket_sort_fn + bucket_lsb_sort only when both + * nrows > 1,000,000 AND + * key_nbytes > 5 (range needs ≥6 bytes) + * apply (sort.c:810). Build a 1.1M-row I64 vec with a 56-bit value + * range that ensures compute_key_nbytes returns ≥6, so we drop into + * the MSD path with 256 buckets and per-bucket LSB radix. + * ══════════════════════════════════════════════════════════════════ */ + +static test_result_t test_sort_msd_bucket_i64(void) { + ray_heap_init(); + ray_sym_init(); + + /* Just over 1M rows so we exceed the `n > 1000000` gate. */ + const int64_t N = 1000001; + ray_t* vec = ray_vec_new(RAY_I64, N); + TEST_ASSERT_NOT_NULL(vec); + int64_t* d = (int64_t*)ray_data(vec); + + /* Spread values across ~2^56 so the encoded key_nbytes is 7, + * tripping the n_bytes > 5 gate. Use a simple deterministic + * pseudo-random pattern that's neither sorted nor reverse-sorted. */ + const int64_t big = (int64_t)1 << 56; + for (int64_t i = 0; i < N; i++) { + /* Mix bits in the upper 7 bytes so every key_nbytes byte is + * non-uniform → no MSD-uniform fallback. */ + uint64_t m = (uint64_t)(i * 2654435761ULL); + d[i] = (int64_t)(m % (uint64_t)big); + } + vec->len = N; + + uint8_t desc = 0; + ray_t* result = ray_sort(&vec, &desc, NULL, 1, N); + TEST_ASSERT_NOT_NULL(result); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + TEST_ASSERT_EQ_I(ray_len(result), N); + + /* Verify ascending order at sparse checkpoints; full O(n) scan is + * also fine but costly. Walk every 137th element (coprime with + * 64 / 128 / 256) so we land on every bucket boundary class. */ + const int64_t* r = (const int64_t*)ray_data(result); + int64_t prev = r[0]; + for (int64_t i = 137; i < N; i += 137) { + TEST_ASSERT_FMT(r[i] >= prev, + "msd asc out of order at %lld: %lld < %lld", + (long long)i, (long long)r[i], (long long)prev); + prev = r[i]; + } + /* Sanity: adjacent pairs at start and end. */ + TEST_ASSERT_TRUE(r[1] >= r[0]); + TEST_ASSERT_TRUE(r[N-1] >= r[N-2]); + + ray_release(result); + ray_release(vec); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + +/* MSD bucket sort, descending — same path, exercises the desc branch + * of radix_encode_fn that feeds the bucketed sort. Smaller checks + * keep runtime moderate. */ +static test_result_t test_sort_msd_bucket_i64_desc(void) { + ray_heap_init(); + ray_sym_init(); + + const int64_t N = 1000001; + ray_t* vec = ray_vec_new(RAY_I64, N); + int64_t* d = (int64_t*)ray_data(vec); + + /* Same big spread as asc test, different seed. */ + const int64_t big = (int64_t)1 << 56; + for (int64_t i = 0; i < N; i++) { + uint64_t m = (uint64_t)((i + 17) * 2246822519ULL); + d[i] = (int64_t)(m % (uint64_t)big); + } + vec->len = N; + + uint8_t desc = 1; + ray_t* result = ray_sort(&vec, &desc, NULL, 1, N); + TEST_ASSERT_FALSE(RAY_IS_ERR(result)); + TEST_ASSERT_EQ_I(ray_len(result), N); + + const int64_t* r = (const int64_t*)ray_data(result); + int64_t prev = r[0]; + for (int64_t i = 211; i < N; i += 211) { + TEST_ASSERT_FMT(r[i] <= prev, + "msd desc out of order at %lld: %lld > %lld", + (long long)i, (long long)r[i], (long long)prev); + prev = r[i]; + } + + ray_release(result); + ray_release(vec); + ray_sym_destroy(); + ray_heap_destroy(); + PASS(); +} + /* ─── Entry table ────────────────────────────────────────────────── */ const test_entry_t sort_entries[] = { @@ -1073,5 +1480,23 @@ const test_entry_t sort_entries[] = { { "sort/u8_nulls_last_asc", test_sort_u8_nulls_last_asc, NULL, NULL }, { "sort/u8_nulls_first_desc", test_sort_u8_nulls_first_desc, NULL, NULL }, { "sort/bool_nulls_first", test_sort_bool_nulls_first, NULL, NULL }, + /* top / bot — partial top-N / bottom-N happy paths. Drive + * ray_top_fn / ray_bot_fn over numeric and SYM vectors with + * K Date: Tue, 19 May 2026 11:36:31 +0300 Subject: [PATCH 3/3] fix(runtime): ray_vec_get_i64 reads DATE/TIME with wrong width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ray_vec_get_i64 dispatched RAY_DATE and RAY_TIME through the same int64 cast as RAY_I64 / RAY_TIMESTAMP. But ray_type_sizes in src/core/types.c declares both DATE and TIME as 4-byte elements, not 8 — so the cast read 8 bytes per element, returning garbage for idx 0 (upper half captured the adjacent element) and OOB reading once idx >= 1. Fix: split DATE / TIME off the int64 path; read them as int32 alongside RAY_I32. RAY_TIMESTAMP stays on the int64 path (it is genuinely 8 bytes). Adds two TDD tests in test_public_api.c covering known DATE and TIME values; both FAIL before the fix and PASS after. Replaces the prior "intentionally NOT covered" comment. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/runtime.c | 6 ++++-- test/test_public_api.c | 35 +++++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/core/runtime.c b/src/core/runtime.c index 05706203..4a17a2e3 100644 --- a/src/core/runtime.c +++ b/src/core/runtime.c @@ -368,10 +368,12 @@ uint8_t ray_obj_attrs(ray_t* v) { int64_t ray_vec_get_i64(ray_t* vec, int64_t idx) { if (!vec || idx < 0 || idx >= vec->len) return 0; - if (vec->type == RAY_I64 || vec->type == RAY_DATE || vec->type == RAY_TIME || vec->type == RAY_TIMESTAMP) { + if (vec->type == RAY_I64 || vec->type == RAY_TIMESTAMP) { return ((const int64_t*)ray_data(vec))[idx]; } - if (vec->type == RAY_I32) return ((const int32_t*)ray_data(vec))[idx]; + if (vec->type == RAY_I32 || vec->type == RAY_DATE || vec->type == RAY_TIME) { + return ((const int32_t*)ray_data(vec))[idx]; + } if (vec->type == RAY_I16) return ((const int16_t*)ray_data(vec))[idx]; if (vec->type == RAY_U8 || vec->type == RAY_BOOL) return ((const uint8_t*)ray_data(vec))[idx]; return 0; diff --git a/test/test_public_api.c b/test/test_public_api.c index 240efb84..fd938534 100644 --- a/test/test_public_api.c +++ b/test/test_public_api.c @@ -253,12 +253,33 @@ static test_result_t test_public_vec_get_i64_bool(void) { PASS(); } -/* NOTE: RAY_DATE / RAY_TIME branches of ray_vec_get_i64 are intentionally - * NOT covered here. Their on-disk element width is 4 bytes (see - * ray_type_sizes in src/core/types.c), but ray_vec_get_i64 dispatches - * them through the same 8-byte cast as RAY_I64 / RAY_TIMESTAMP — reading - * past the row boundary. Reported separately; do not write a "happy - * path" test that locks in the broken behaviour. */ +/* RAY_DATE / RAY_TIME branches — element width is 4 bytes (int32) per + * ray_type_sizes in src/core/types.c. ray_vec_get_i64 must read them as + * int32, not int64. */ + +static test_result_t test_public_vec_get_i64_date(void) { + ray_t* v = ray_vec_new(RAY_DATE, 3); + /* Pick three distinct int32 day values that differ in both halves so + * a wrong-width read would catch obviously-wrong adjacent bytes. */ + int32_t xs[] = { 0, 8766, 19724 }; /* 1970.01.01, 1994.01.01, 2024.01.01 */ + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), xs[1]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), xs[2]); + ray_release(v); + PASS(); +} + +static test_result_t test_public_vec_get_i64_time(void) { + ray_t* v = ray_vec_new(RAY_TIME, 3); + int32_t xs[] = { 0, 43200000, 86399000 }; /* 00:00:00.000, 12:00:00.000, 23:59:59.000 */ + for (int i = 0; i < 3; i++) v = ray_vec_append(v, &xs[i]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 0), xs[0]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 1), xs[1]); + TEST_ASSERT_EQ_I(ray_vec_get_i64(v, 2), xs[2]); + ray_release(v); + PASS(); +} static test_result_t test_public_vec_get_i64_timestamp(void) { ray_t* v = ray_vec_new(RAY_TIMESTAMP, 3); @@ -611,6 +632,8 @@ const test_entry_t public_api_entries[] = { { "public/vec_get_i64_i16", test_public_vec_get_i64_i16, public_api_setup, public_api_teardown }, { "public/vec_get_i64_u8", test_public_vec_get_i64_u8, public_api_setup, public_api_teardown }, { "public/vec_get_i64_bool", test_public_vec_get_i64_bool, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_date", test_public_vec_get_i64_date, public_api_setup, public_api_teardown }, + { "public/vec_get_i64_time", test_public_vec_get_i64_time, public_api_setup, public_api_teardown }, { "public/vec_get_i64_timestamp", test_public_vec_get_i64_timestamp, public_api_setup, public_api_teardown }, { "public/vec_get_f64_f64", test_public_vec_get_f64_f64, public_api_setup, public_api_teardown },