From 93979e489cbb8b50a1ade25e171ae52ee06e462b Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 01:32:08 -0700 Subject: [PATCH 01/11] fix(cactus): unify runtime state across bundle entry points via globalThis singleton (HIGH) --- .../Cactus_Download.integration.test.ts | 4 +- .../Cactus_DownloadRemove.test.ts | 12 +-- .../Cactus_ToolCalling.integration.test.ts | 4 +- providers/cactus/src/ai.browser.ts | 28 +++--- providers/cactus/src/ai.ts | 28 +++--- .../src/ai/common/Cactus_JobRunFns.browser.ts | 4 +- .../cactus/src/ai/common/Cactus_JobRunFns.ts | 4 +- .../src/ai/common/Cactus_Runtime.browser.ts | 87 ++++++++++++------- .../cactus/src/ai/common/Cactus_Runtime.ts | 87 ++++++++++++------- .../src/ai/common/Cactus_RuntimeState.ts | 76 ++++++++++++++++ 10 files changed, 224 insertions(+), 110 deletions(-) create mode 100644 providers/cactus/src/ai/common/Cactus_RuntimeState.ts diff --git a/packages/test/src/test/ai-provider-cactus/Cactus_Download.integration.test.ts b/packages/test/src/test/ai-provider-cactus/Cactus_Download.integration.test.ts index c9f1c3de9..a7244c3ed 100644 --- a/packages/test/src/test/ai-provider-cactus/Cactus_Download.integration.test.ts +++ b/packages/test/src/test/ai-provider-cactus/Cactus_Download.integration.test.ts @@ -21,8 +21,8 @@ describe.skipIf(!RUN)("Cactus_Download (integration)", () => { beforeEach(() => { dir = mkdtempSync(join(tmpdir(), "cactus-download-")); - cactusEngines.clear(); - cactusConfigJson.clear(); + cactusEngines().clear(); + cactusConfigJson().clear(); }); afterEach(() => { diff --git a/packages/test/src/test/ai-provider-cactus/Cactus_DownloadRemove.test.ts b/packages/test/src/test/ai-provider-cactus/Cactus_DownloadRemove.test.ts index 07384b809..75a53565e 100644 --- a/packages/test/src/test/ai-provider-cactus/Cactus_DownloadRemove.test.ts +++ b/packages/test/src/test/ai-provider-cactus/Cactus_DownloadRemove.test.ts @@ -13,13 +13,13 @@ const Cactus_DownloadRemove = runFnFor(["model.download-remove"]); describe("Cactus_DownloadRemove", () => { afterEach(() => { - cactusEngines.clear(); - cactusConfigJson.clear(); + cactusEngines().clear(); + cactusConfigJson().clear(); }); it("drops cached engine and config; emits finish", async () => { - cactusEngines.set("needle-26m", {} as any); - cactusConfigJson.set("needle-26m", { fake: true }); + cactusEngines().set("needle-26m", {} as any); + cactusConfigJson().set("needle-26m", { fake: true }); let finished = false; const controller = new AbortController(); @@ -40,7 +40,7 @@ describe("Cactus_DownloadRemove", () => { } ); expect(finished).toBe(true); - expect(cactusEngines.has("needle-26m")).toBe(false); - expect(cactusConfigJson.has("needle-26m")).toBe(false); + expect(cactusEngines().has("needle-26m")).toBe(false); + expect(cactusConfigJson().has("needle-26m")).toBe(false); }); }); diff --git a/packages/test/src/test/ai-provider-cactus/Cactus_ToolCalling.integration.test.ts b/packages/test/src/test/ai-provider-cactus/Cactus_ToolCalling.integration.test.ts index c1c02915b..65872756d 100644 --- a/packages/test/src/test/ai-provider-cactus/Cactus_ToolCalling.integration.test.ts +++ b/packages/test/src/test/ai-provider-cactus/Cactus_ToolCalling.integration.test.ts @@ -49,8 +49,8 @@ describe.skipIf(!RUN)("Cactus_ToolCalling (integration)", () => { }); afterAll(() => { - cactusEngines.clear(); - cactusConfigJson.clear(); + cactusEngines().clear(); + cactusConfigJson().clear(); rmSync(dir, { recursive: true, force: true }); }); diff --git a/providers/cactus/src/ai.browser.ts b/providers/cactus/src/ai.browser.ts index 60f992589..00a488a2e 100644 --- a/providers/cactus/src/ai.browser.ts +++ b/providers/cactus/src/ai.browser.ts @@ -9,12 +9,11 @@ export * from "./ai/common/Cactus_Constants"; export * from "./ai/common/Cactus_ModelCatalog"; export * from "./ai/common/Cactus_ModelSchema"; -// Mutable runtime state (e.g. cactusEngines, cactusEngineLoadsInFlight, -// cactusConfigJson, cactusSessions) is intentionally NOT re-exported here. -// The `./ai` and `./ai-runtime` entry points are bundled separately, so -// re-exporting from both creates two distinct module instances, and reads -// on one would not see writes from the other. Import runtime state from -// `@workglow/cactus/ai-runtime` instead. +// Mutable runtime state lives on a globalThis-keyed singleton (see +// `Cactus_RuntimeState`). The `./ai` and `./ai-runtime` entry points are +// bundled separately; if each held its own module-level Map, reads on one +// bundle would not see writes from the other. Routing through the singleton +// keeps state consistent across bundles in the same realm. export * from "./ai/CactusProvider.browser"; export * from "./ai/CactusQueuedProvider.browser"; export * from "./ai/registerCactus.browser"; @@ -22,22 +21,21 @@ export * from "./ai/registerCactus.browser"; import { CactusQueuedProvider } from "./ai/CactusQueuedProvider.browser"; import { CACTUS_RUN_FN_SPECS } from "./ai/common/Cactus_Capabilities"; import { CACTUS_RUN_FNS } from "./ai/common/Cactus_JobRunFns.browser"; -import { cactusConfigJson, cactusEngines } from "./ai/common/Cactus_Runtime.browser"; +import { getCactusConfigJson, getCactusEngines } from "./ai/common/Cactus_Runtime.browser"; /** * @internal Symbols exported only for use by `@workglow/test`. Not part of the stable public API. * - * `cactusEngines` and `cactusConfigJson` are re-exported here so that tests can - * seed and inspect the runtime state used by the run-fns bundled into the `./ai` - * entry point. The `./ai` and `./ai-runtime` entry points are bundled separately - * and their runtime state copies are distinct module instances. Reading - * the runtime state via `_testOnly` (rather than `@workglow/cactus/ai-runtime`) - * guarantees the test observes the same Map that the run-fns mutate. + * Test consumers should call `cactusEngines()` / `cactusConfigJson()` to obtain + * the underlying Maps. Because runtime state is now backed by a + * `globalThis`-keyed singleton, both the `./ai` and `./ai-runtime` bundles + * observe the same Map identity — but accessing via functions also lets tests + * call `__resetRuntimeForTests()` between specs without stale captures. */ export const _testOnly = { CactusQueuedProvider, CACTUS_RUN_FN_SPECS, CACTUS_RUN_FNS, - cactusEngines, - cactusConfigJson, + cactusEngines: getCactusEngines, + cactusConfigJson: getCactusConfigJson, } as const; diff --git a/providers/cactus/src/ai.ts b/providers/cactus/src/ai.ts index f9e8f827b..220e2e7ce 100644 --- a/providers/cactus/src/ai.ts +++ b/providers/cactus/src/ai.ts @@ -9,12 +9,11 @@ export * from "./ai/common/Cactus_Constants"; export * from "./ai/common/Cactus_ModelCatalog"; export * from "./ai/common/Cactus_ModelSchema"; -// Mutable runtime state (e.g. cactusEngines, cactusEngineLoadsInFlight, -// cactusConfigJson, cactusSessions) is intentionally NOT re-exported here. -// The `./ai` and `./ai-runtime` entry points are bundled separately, so -// re-exporting from both creates two distinct module instances — and reads -// on one would not see writes from the other. Import runtime state from -// `@workglow/cactus/ai-runtime` instead. +// Mutable runtime state lives on a globalThis-keyed singleton (see +// `Cactus_RuntimeState`). The `./ai` and `./ai-runtime` entry points are +// bundled separately; if each held its own module-level Map, reads on one +// bundle would not see writes from the other. Routing through the singleton +// keeps state consistent across bundles in the same realm. export * from "./ai/CactusProvider"; export * from "./ai/CactusQueuedProvider"; export * from "./ai/registerCactus"; @@ -22,22 +21,21 @@ export * from "./ai/registerCactus"; import { CactusQueuedProvider } from "./ai/CactusQueuedProvider"; import { CACTUS_RUN_FN_SPECS } from "./ai/common/Cactus_Capabilities"; import { CACTUS_RUN_FNS } from "./ai/common/Cactus_JobRunFns"; -import { cactusConfigJson, cactusEngines } from "./ai/common/Cactus_Runtime"; +import { getCactusConfigJson, getCactusEngines } from "./ai/common/Cactus_Runtime"; /** * @internal Symbols exported only for use by `@workglow/test`. Not part of the stable public API. * - * `cactusEngines` and `cactusConfigJson` are re-exported here so that tests can - * seed and inspect the runtime state used by the run-fns bundled into the `./ai` - * entry point. The `./ai` and `./ai-runtime` entry points are bundled separately - * — their copies of `Cactus_Runtime.ts` are distinct module instances. Reading - * the runtime state via `_testOnly` (rather than `@workglow/cactus/ai-runtime`) - * guarantees the test observes the same Map that the run-fns mutate. + * Test consumers should call `cactusEngines()` / `cactusConfigJson()` to obtain + * the underlying Maps. Because runtime state is now backed by a + * `globalThis`-keyed singleton, both the `./ai` and `./ai-runtime` bundles + * observe the same Map identity — but accessing via functions also lets tests + * call `__resetRuntimeForTests()` between specs without stale captures. */ export const _testOnly = { CactusQueuedProvider, CACTUS_RUN_FN_SPECS, CACTUS_RUN_FNS, - cactusEngines, - cactusConfigJson, + cactusEngines: getCactusEngines, + cactusConfigJson: getCactusConfigJson, } as const; diff --git a/providers/cactus/src/ai/common/Cactus_JobRunFns.browser.ts b/providers/cactus/src/ai/common/Cactus_JobRunFns.browser.ts index c537fb858..7250d4697 100644 --- a/providers/cactus/src/ai/common/Cactus_JobRunFns.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_JobRunFns.browser.ts @@ -20,10 +20,10 @@ import { Cactus_ModelSearch } from "./Cactus_ModelSearch"; import { Cactus_ToolCalling } from "./Cactus_ToolCalling.browser"; export { - cactusConfigJson, - cactusEngines, deleteCactusSession, disposeCactusResources, + getCactusConfigJson, + getCactusEngines, getOrLoadEngine, loadSdk, removeCachedAssets, diff --git a/providers/cactus/src/ai/common/Cactus_JobRunFns.ts b/providers/cactus/src/ai/common/Cactus_JobRunFns.ts index 59cfb59c3..64e1f72e6 100644 --- a/providers/cactus/src/ai/common/Cactus_JobRunFns.ts +++ b/providers/cactus/src/ai/common/Cactus_JobRunFns.ts @@ -20,10 +20,10 @@ import { Cactus_ModelSearch } from "./Cactus_ModelSearch"; import { Cactus_ToolCalling } from "./Cactus_ToolCalling"; export { - cactusConfigJson, - cactusEngines, deleteCactusSession, disposeCactusResources, + getCactusConfigJson, + getCactusEngines, getOrLoadEngine, loadSdk, removeCachedAssets, diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts index e09a28601..b2f73211b 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts @@ -17,6 +17,14 @@ import { type CactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; +import { + getCactusCachedModelIds, + getCactusConfigJson, + getCactusEngineLoadsInFlight, + getCactusEngines, + getCactusSessions, + getRuntime, +} from "./Cactus_RuntimeState"; type NeedleSdkModule = typeof import("needle-rs"); // `NeedleWasm` has a private constructor so `InstanceType<...>` cannot be used. @@ -108,23 +116,21 @@ export async function fetchAssetBytes( // ============================================================================ // Engine cache (in-memory, per worker/process) +// +// All Maps/Sets live on a globalThis-keyed singleton (see Cactus_RuntimeState). +// This ensures the `./ai` and `./ai-runtime` bundles — each compiled separately — +// share state. Callers should never store these references in module scope. // ============================================================================ -/** @internal Exported for tests. */ -export const cactusEngines: Map = new Map(); -/** @internal Exported for tests. */ -export const cactusConfigJson: Map = new Map(); -/** Tracks models whose assets have been persisted (downloaded) but not necessarily loaded. */ -const cactusCachedModelIds: Set = new Set(); - -const cactusEngineLoadsInFlight = new Map>(); - export async function getOrLoadEngine(model: CactusModelConfig): Promise { + const state = getRuntime(); const model_id = model.provider_config.model_id; - const cached = cactusEngines.get(model_id); + const cached = state.engines.get(model_id) as NeedleEngine | undefined; if (cached) return cached; - const inFlight = cactusEngineLoadsInFlight.get(model_id); + const inFlight = state.engineLoadsInFlight.get(model_id) as + | Promise + | undefined; if (inFlight) return inFlight; const loadPromise = (async (): Promise => { @@ -140,9 +146,9 @@ export async function getOrLoadEngine(model: CactusModelConfig): Promise { - cactusEngineLoadsInFlight.delete(model_id); + state.engineLoadsInFlight.delete(model_id); }); - cactusEngineLoadsInFlight.set(model_id, loadPromise); + state.engineLoadsInFlight.set(model_id, loadPromise); return loadPromise; } export function isModelLoaded(model_id: string): boolean { - return cactusEngines.has(model_id); + return getRuntime().engines.has(model_id); } /** Mark a model_id as having its assets persisted in Cache Storage. */ export function markModelCached(model_id: string): void { - cactusCachedModelIds.add(model_id); + getRuntime().cachedModelIds.add(model_id); } /** Returns true if the model's assets have been downloaded or the engine is currently loaded. */ export function isModelCached(model_id: string): boolean { - return cactusEngines.has(model_id) || cactusCachedModelIds.has(model_id); + const state = getRuntime(); + return state.engines.has(model_id) || state.cachedModelIds.has(model_id); } export async function getCactusModelCacheInfo( @@ -234,11 +241,8 @@ export async function getCactusModelCacheInfo( // Sessions (no-op — needle-rs is stateless across calls) // ============================================================================ -/** @internal Exported for tests. */ -export const cactusSessions: Map> = new Map(); - export async function deleteCactusSession(id: string): Promise { - return cactusSessions.delete(id); + return getCactusSessions().delete(id); } // ============================================================================ @@ -259,17 +263,18 @@ async function removeBrowserCacheEntries(entry: CactusCatalogEntry): Promise void }).free?.(); + engine.free?.(); } catch { /* best effort */ } } - cactusEngines.delete(model_id); - cactusConfigJson.delete(model_id); - cactusCachedModelIds.delete(model_id); + state.engines.delete(model_id); + state.configJson.delete(model_id); + state.cachedModelIds.delete(model_id); } export async function removeCachedAssets(model: CactusModelConfig): Promise { @@ -282,11 +287,27 @@ export async function removeCachedAssets(model: CactusModelConfig): Promise { - for (const id of Array.from(cactusEngines.keys())) { + const state = getRuntime(); + for (const id of Array.from(state.engines.keys())) { disposeCactusEngine(id); } - cactusEngines.clear(); - cactusConfigJson.clear(); - cactusCachedModelIds.clear(); - cactusSessions.clear(); + state.engines.clear(); + state.configJson.clear(); + state.cachedModelIds.clear(); + state.sessions.clear(); } + +// ============================================================================ +// Legacy re-exports for callers that imported the maps/sets directly. +// +// Prefer the accessor form (`getCactusEngines()`, etc.) so that +// `__resetRuntimeForTests()` produces fresh state. +// ============================================================================ + +export { + getCactusCachedModelIds, + getCactusConfigJson, + getCactusEngineLoadsInFlight, + getCactusEngines, + getCactusSessions, +} from "./Cactus_RuntimeState"; diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 6e0d6b965..6b40c1b66 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -13,6 +13,14 @@ import { type CactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; +import { + getCactusCachedModelIds, + getCactusConfigJson, + getCactusEngineLoadsInFlight, + getCactusEngines, + getCactusSessions, + getRuntime, +} from "./Cactus_RuntimeState"; type NeedleSdkModule = typeof import("needle-rs"); // `NeedleWasm` has a private constructor so `InstanceType<...>` cannot be used. @@ -200,23 +208,21 @@ export async function fetchAssetBytes( // ============================================================================ // Engine cache (in-memory, per worker/process) +// +// All Maps/Sets live on a globalThis-keyed singleton (see Cactus_RuntimeState). +// This ensures the `./ai` and `./ai-runtime` bundles — each compiled separately — +// share state. Callers should never store these references in module scope. // ============================================================================ -/** @internal Exported for tests. */ -export const cactusEngines: Map = new Map(); -/** @internal Exported for tests. */ -export const cactusConfigJson: Map = new Map(); -/** Tracks models whose assets have been persisted (downloaded) but not necessarily loaded. */ -const cactusCachedModelIds: Set = new Set(); - -const cactusEngineLoadsInFlight = new Map>(); - export async function getOrLoadEngine(model: CactusModelConfig): Promise { + const state = getRuntime(); const model_id = model.provider_config.model_id; - const cached = cactusEngines.get(model_id); + const cached = state.engines.get(model_id) as NeedleEngine | undefined; if (cached) return cached; - const inFlight = cactusEngineLoadsInFlight.get(model_id); + const inFlight = state.engineLoadsInFlight.get(model_id) as + | Promise + | undefined; if (inFlight) return inFlight; const loadPromise = (async (): Promise => { @@ -232,9 +238,9 @@ export async function getOrLoadEngine(model: CactusModelConfig): Promise { - cactusEngineLoadsInFlight.delete(model_id); + state.engineLoadsInFlight.delete(model_id); }); - cactusEngineLoadsInFlight.set(model_id, loadPromise); + state.engineLoadsInFlight.set(model_id, loadPromise); return loadPromise; } export function isModelLoaded(model_id: string): boolean { - return cactusEngines.has(model_id); + return getRuntime().engines.has(model_id); } /** Mark a model_id as having its assets persisted on disk / in Cache Storage. */ export function markModelCached(model_id: string): void { - cactusCachedModelIds.add(model_id); + getRuntime().cachedModelIds.add(model_id); } /** Returns true if the model's assets have been downloaded or the engine is currently loaded. */ export function isModelCached(model_id: string): boolean { - return cactusEngines.has(model_id) || cactusCachedModelIds.has(model_id); + const state = getRuntime(); + return state.engines.has(model_id) || state.cachedModelIds.has(model_id); } export async function getCactusModelCacheInfo( @@ -330,11 +337,8 @@ export async function getCactusModelCacheInfo( // Sessions (no-op — needle-rs is stateless across calls) // ============================================================================ -/** @internal Exported for tests. */ -export const cactusSessions: Map> = new Map(); - export async function deleteCactusSession(id: string): Promise { - return cactusSessions.delete(id); + return getCactusSessions().delete(id); } // ============================================================================ @@ -363,17 +367,18 @@ async function removeNodeCacheDir(model: CactusModelConfig, model_id: string): P } function disposeCactusEngine(model_id: string): void { - const engine = cactusEngines.get(model_id); + const state = getRuntime(); + const engine = state.engines.get(model_id); if (engine) { try { - (engine as unknown as { free?: () => void }).free?.(); + engine.free?.(); } catch { /* best effort */ } } - cactusEngines.delete(model_id); - cactusConfigJson.delete(model_id); - cactusCachedModelIds.delete(model_id); + state.engines.delete(model_id); + state.configJson.delete(model_id); + state.cachedModelIds.delete(model_id); } export async function removeCachedAssets(model: CactusModelConfig): Promise { @@ -386,11 +391,27 @@ export async function removeCachedAssets(model: CactusModelConfig): Promise { - for (const id of Array.from(cactusEngines.keys())) { + const state = getRuntime(); + for (const id of Array.from(state.engines.keys())) { disposeCactusEngine(id); } - cactusEngines.clear(); - cactusConfigJson.clear(); - cactusCachedModelIds.clear(); - cactusSessions.clear(); + state.engines.clear(); + state.configJson.clear(); + state.cachedModelIds.clear(); + state.sessions.clear(); } + +// ============================================================================ +// Legacy re-exports for callers that imported the maps/sets directly. +// +// Prefer the accessor form (`getCactusEngines()`, etc.) so that +// `__resetRuntimeForTests()` produces fresh state. +// ============================================================================ + +export { + getCactusCachedModelIds, + getCactusConfigJson, + getCactusEngineLoadsInFlight, + getCactusEngines, + getCactusSessions, +} from "./Cactus_RuntimeState"; diff --git a/providers/cactus/src/ai/common/Cactus_RuntimeState.ts b/providers/cactus/src/ai/common/Cactus_RuntimeState.ts new file mode 100644 index 000000000..2ccf252a7 --- /dev/null +++ b/providers/cactus/src/ai/common/Cactus_RuntimeState.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// Single source of truth for Cactus runtime state across bundle entry points. +// +// The package emits separate bundles for `./ai`, `./ai-runtime`, +// `./ai.browser`, and `./ai-runtime.browser`. Each bundle contains its own +// compiled copy of Cactus_Runtime.ts -> module-level Maps duplicate -> state +// desyncs. This module routes all access through a globalThis-keyed +// singleton (Symbol.for so duplicate symbol identities do not break it), +// so every bundle observes the same Maps. +// +// In a Web Worker context globalThis is per-worker, which is correct: +// engines do not transfer across workers anyway. + +type AnyEngine = { free?: () => void }; + +export interface CactusRuntimeState { + readonly version: 1; + readonly engines: Map; + readonly configJson: Map; + readonly cachedModelIds: Set; + readonly engineLoadsInFlight: Map>; + readonly sessions: Map>; +} + +const RUNTIME_KEY = Symbol.for("@workglow/cactus.runtime.v1"); +type GlobalWithRuntime = { [RUNTIME_KEY]?: CactusRuntimeState }; + +export function getRuntime(): CactusRuntimeState { + const g = globalThis as unknown as GlobalWithRuntime; + let state = g[RUNTIME_KEY]; + if (!state) { + state = { + version: 1, + engines: new Map(), + configJson: new Map(), + cachedModelIds: new Set(), + engineLoadsInFlight: new Map(), + sessions: new Map(), + }; + g[RUNTIME_KEY] = state; + } else if (state.version !== 1) { + throw new Error( + `Cactus runtime state version mismatch (found v${state.version}, expected v1). ` + + `Mixed @workglow/cactus versions in the same process.` + ); + } + return state; +} + +/** @internal Test-only — resets all maps by removing the singleton. */ +export function __resetRuntimeForTests(): void { + const g = globalThis as unknown as GlobalWithRuntime; + delete g[RUNTIME_KEY]; +} + +// Convenience accessors used by Cactus_Runtime{,.browser}.ts +export function getCactusEngines() { + return getRuntime().engines; +} +export function getCactusConfigJson() { + return getRuntime().configJson; +} +export function getCactusCachedModelIds() { + return getRuntime().cachedModelIds; +} +export function getCactusEngineLoadsInFlight() { + return getRuntime().engineLoadsInFlight; +} +export function getCactusSessions() { + return getRuntime().sessions; +} From 5d1670721636de27545c5d2af98e0a64e0bff219 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 01:35:33 -0700 Subject: [PATCH 02/11] feat(cactus): verify SHA-256 integrity of fetched model assets (HIGH) --- .../src/ai/common/Cactus_Download.browser.ts | 24 +++- .../cactus/src/ai/common/Cactus_Download.ts | 24 +++- .../cactus/src/ai/common/Cactus_Integrity.ts | 104 +++++++++++++++ .../src/ai/common/Cactus_ModelCatalog.ts | 70 +++++++++-- .../src/ai/common/Cactus_Runtime.browser.ts | 73 +++++++++-- .../cactus/src/ai/common/Cactus_Runtime.ts | 119 +++++++++++++++--- .../common/__tests__/Cactus_Integrity.test.ts | 114 +++++++++++++++++ .../Cactus_Runtime.crossBundle.test.ts | 59 +++++++++ .../__tests__/Cactus_RuntimeState.test.ts | 66 ++++++++++ 9 files changed, 604 insertions(+), 49 deletions(-) create mode 100644 providers/cactus/src/ai/common/Cactus_Integrity.ts create mode 100644 providers/cactus/src/ai/common/__tests__/Cactus_Integrity.test.ts create mode 100644 providers/cactus/src/ai/common/__tests__/Cactus_Runtime.crossBundle.test.ts create mode 100644 providers/cactus/src/ai/common/__tests__/Cactus_RuntimeState.test.ts diff --git a/providers/cactus/src/ai/common/Cactus_Download.browser.ts b/providers/cactus/src/ai/common/Cactus_Download.browser.ts index 682aeecb8..4b8365525 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.browser.ts @@ -9,7 +9,8 @@ import type { ModelDownloadTaskRunInput, ModelDownloadTaskRunOutput, } from "@workglow/ai"; -import { getCactusCatalogEntry } from "./Cactus_ModelCatalog"; +import { CactusIntegrityError } from "./Cactus_Integrity"; +import { assetSpecsOf, getCactusCatalogEntry } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; import { fetchAssetBytes, markModelCached } from "./Cactus_Runtime.browser"; @@ -23,14 +24,25 @@ export const Cactus_Download: AiProviderRunFn< const entry = getCactusCatalogEntry(model_id); if (!entry) throw new Error(`Unknown Cactus model_id: ${model_id}`); - const assets = [entry.assets.weights, entry.assets.vocab, entry.assets.config]; - for (let i = 0; i < assets.length; i++) { + const specs = assetSpecsOf(entry); + for (let i = 0; i < specs.length; i++) { + const spec = specs[i]; emit({ type: "phase", - message: `Downloading ${assets[i]}`, - progress: Math.round(((i + 0.5) / assets.length) * 99), + message: `Downloading ${spec.filename}`, + progress: Math.round(((i + 0.5) / specs.length) * 99), }); - await fetchAssetBytes(model, assets[i]); + try { + await fetchAssetBytes(model, spec); + } catch (err) { + if (err instanceof CactusIntegrityError) { + emit({ + type: "phase", + message: `Integrity check failed for ${spec.filename}: expected sha256 ${err.expected}, got ${err.actual}`, + }); + } + throw err; + } } markModelCached(model_id); emit({ type: "finish", data: { model: input.model! } }); diff --git a/providers/cactus/src/ai/common/Cactus_Download.ts b/providers/cactus/src/ai/common/Cactus_Download.ts index d6c014a52..c3b64b930 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.ts @@ -9,7 +9,8 @@ import type { ModelDownloadTaskRunInput, ModelDownloadTaskRunOutput, } from "@workglow/ai"; -import { getCactusCatalogEntry } from "./Cactus_ModelCatalog"; +import { CactusIntegrityError } from "./Cactus_Integrity"; +import { assetSpecsOf, getCactusCatalogEntry } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; import { fetchAssetBytes, markModelCached } from "./Cactus_Runtime"; @@ -23,14 +24,25 @@ export const Cactus_Download: AiProviderRunFn< const entry = getCactusCatalogEntry(model_id); if (!entry) throw new Error(`Unknown Cactus model_id: ${model_id}`); - const assets = [entry.assets.weights, entry.assets.vocab, entry.assets.config]; - for (let i = 0; i < assets.length; i++) { + const specs = assetSpecsOf(entry); + for (let i = 0; i < specs.length; i++) { + const spec = specs[i]; emit({ type: "phase", - message: `Downloading ${assets[i]}`, - progress: Math.round(((i + 0.5) / assets.length) * 99), + message: `Downloading ${spec.filename}`, + progress: Math.round(((i + 0.5) / specs.length) * 99), }); - await fetchAssetBytes(model, assets[i]); + try { + await fetchAssetBytes(model, spec); + } catch (err) { + if (err instanceof CactusIntegrityError) { + emit({ + type: "phase", + message: `Integrity check failed for ${spec.filename}: expected sha256 ${err.expected}, got ${err.actual}`, + }); + } + throw err; + } } markModelCached(model_id); emit({ type: "finish", data: { model: input.model! } }); diff --git a/providers/cactus/src/ai/common/Cactus_Integrity.ts b/providers/cactus/src/ai/common/Cactus_Integrity.ts new file mode 100644 index 000000000..34396f4dd --- /dev/null +++ b/providers/cactus/src/ai/common/Cactus_Integrity.ts @@ -0,0 +1,104 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * SHA-256 integrity verification for Cactus model assets. + * + * The trust boundary for locally-executed model weights is anchored at the + * catalog: every byte loaded from disk, Cache Storage, or the network must + * hash to the catalog-pinned digest. Anything else is treated as adversarial + * and refused. + */ + +/** Sentinel value used in the catalog while real hashes are not yet populated. */ +export const CACTUS_HASH_PLACEHOLDER = "TODO_FILL_AT_RELEASE"; + +export class CactusIntegrityError extends Error { + readonly url: string; + readonly filename: string; + readonly expected: string; + readonly actual: string; + constructor(opts: { url: string; filename: string; expected: string; actual: string }) { + super( + `Integrity check failed for ${opts.filename} from ${opts.url}: ` + + `expected sha256 ${opts.expected}, got ${opts.actual}` + ); + this.name = "CactusIntegrityError"; + this.url = opts.url; + this.filename = opts.filename; + this.expected = opts.expected; + this.actual = opts.actual; + } +} + +export async function sha256Hex(bytes: Uint8Array | ArrayBuffer): Promise { + const input = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes); + const digest = await globalThis.crypto.subtle.digest("SHA-256", input); + const view = new Uint8Array(digest); + let s = ""; + for (let i = 0; i < view.length; i++) { + s += view[i].toString(16).padStart(2, "0"); + } + return s; +} + +/** + * Returns `true` if `expected` is the well-known placeholder that means + * "maintainer has not populated a real hash yet." In that case callers SHOULD + * skip verification but MUST log a clear warning — this is intended for + * pre-release dev only and must never reach a tagged release. + */ +export function isHashPlaceholder(expected: string): boolean { + return expected === CACTUS_HASH_PLACEHOLDER; +} + +/** + * Hashes `bytes` and throws `CactusIntegrityError` if it does not match + * `expected`. Throws a plain `Error` if `expected` is malformed (not 64 hex + * chars), since that is a catalog-author bug, not a content bug. + * + * If `expected` is the `TODO_FILL_AT_RELEASE` placeholder, verification is + * skipped and a one-time warning is logged. This keeps developers unblocked + * before the real hashes land while making the gap impossible to miss. + */ +export async function verifySha256( + bytes: Uint8Array | ArrayBuffer, + expected: string, + ctx: { url: string; filename: string } +): Promise { + if (isHashPlaceholder(expected)) { + warnPlaceholderOnce(ctx.filename); + return; + } + if (typeof expected !== "string" || expected.length !== 64) { + throw new Error( + `Invalid catalog SHA-256 for ${ctx.filename}: must be 64 hex chars (got length ${ + typeof expected === "string" ? expected.length : typeof expected + })` + ); + } + const expectedLc = expected.toLowerCase(); + if (!/^[0-9a-f]{64}$/.test(expectedLc)) { + throw new Error( + `Invalid catalog SHA-256 for ${ctx.filename}: contains non-hex characters` + ); + } + const actual = await sha256Hex(bytes); + if (actual !== expectedLc) { + throw new CactusIntegrityError({ ...ctx, expected: expectedLc, actual }); + } +} + +const _warnedFiles = new Set(); +function warnPlaceholderOnce(filename: string): void { + if (_warnedFiles.has(filename)) return; + _warnedFiles.add(filename); + // eslint-disable-next-line no-console + console.warn( + `[@workglow/cactus] SHA-256 catalog entry for "${filename}" is a placeholder; ` + + `integrity verification is DISABLED. This must be populated before release.` + ); +} diff --git a/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts b/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts index 8d70e8e28..ea31a1823 100644 --- a/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts +++ b/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts @@ -11,6 +11,21 @@ import { CACTUS_NEEDLE_26M, } from "./Cactus_Constants"; +/** + * A single asset file in a Cactus model catalog entry. + * + * `sha256` is the lowercase-hex digest of the canonical asset bytes at the + * pinned `revision` in `CactusCatalogEntry`. It anchors the trust boundary: + * any byte that fails this check is treated as adversarial and refused. + */ +export interface CactusAssetSpec { + readonly filename: string; + /** Lowercase hex SHA-256, exactly 64 characters. */ + readonly sha256: string; + /** Expected byte length — used as a cheap pre-check before hashing. */ + readonly size: number; +} + export interface CactusCatalogEntry { readonly model_id: string; readonly title: string; @@ -18,13 +33,28 @@ export interface CactusCatalogEntry { readonly hf_repo: string; readonly revision: string; readonly assets: { - readonly weights: string; - readonly vocab: string; - readonly config: string; + readonly weights: CactusAssetSpec; + readonly vocab: CactusAssetSpec; + readonly config: CactusAssetSpec; }; readonly capabilities: readonly Capability[]; } +/** + * Asserts that `s` is a lowercase hex SHA-256 (64 hex chars). + * + * Used at catalog load time to surface malformed entries before any + * verification call sees them. + */ +export function assertHexSha256(s: string, ctxLabel?: string): asserts s is string { + if (typeof s !== "string" || s.length !== 64 || !/^[0-9a-f]{64}$/.test(s)) { + throw new Error( + `Invalid SHA-256 in catalog${ctxLabel ? ` (${ctxLabel})` : ""}: ` + + `expected 64 lowercase hex chars, got ${JSON.stringify(s)}` + ); + } +} + export const CACTUS_CATALOG: readonly CactusCatalogEntry[] = [ { model_id: CACTUS_NEEDLE_26M, @@ -34,9 +64,25 @@ export const CACTUS_CATALOG: readonly CactusCatalogEntry[] = [ hf_repo: CACTUS_DEFAULT_HF_REPO, revision: CACTUS_DEFAULT_REVISION, assets: { - weights: "needle.safetensors", - vocab: "vocab.txt", - config: "config.json", + // MAINTAINER: replace with sha256 of the asset at the pinned revision; + // see providers/cactus/scripts/hash-catalog.ts (planned follow-up). + // Verification is skipped while the value is the literal placeholder, but + // a clear warning is logged so this can never silently ship to release. + weights: { + filename: "needle.safetensors", + sha256: "TODO_FILL_AT_RELEASE", + size: 0, + }, + vocab: { + filename: "vocab.txt", + sha256: "TODO_FILL_AT_RELEASE", + size: 0, + }, + config: { + filename: "config.json", + sha256: "TODO_FILL_AT_RELEASE", + size: 0, + }, }, capabilities: ["tool-use"], }, @@ -46,6 +92,16 @@ export function getCactusCatalogEntry(model_id: string): CactusCatalogEntry | un return CACTUS_CATALOG.find((e) => e.model_id === model_id); } -export function cactusAssetUrl(entry: CactusCatalogEntry, filename: string): string { +/** Returns all three asset specs in fixed order: weights, vocab, config. */ +export function assetSpecsOf(entry: CactusCatalogEntry): readonly CactusAssetSpec[] { + return [entry.assets.weights, entry.assets.vocab, entry.assets.config]; +} + +export function cactusAssetUrl( + entry: CactusCatalogEntry, + filenameOrSpec: string | CactusAssetSpec +): string { + const filename = + typeof filenameOrSpec === "string" ? filenameOrSpec : filenameOrSpec.filename; return `https://huggingface.co/${entry.hf_repo}/resolve/${entry.revision}/${filename}`; } diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts index b2f73211b..704b0c932 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts @@ -11,10 +11,13 @@ */ import { CACTUS_CACHE_NAME } from "./Cactus_Constants"; +import { CactusIntegrityError, verifySha256 } from "./Cactus_Integrity"; import { + assetSpecsOf, cactusAssetUrl, - getCactusCatalogEntry, + type CactusAssetSpec, type CactusCatalogEntry, + getCactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; import { @@ -70,7 +73,7 @@ export function getCactusSdk(): NeedleSdkModule { // ============================================================================ function assetFilenames(entry: CactusCatalogEntry): string[] { - return [entry.assets.weights, entry.assets.vocab, entry.assets.config]; + return assetSpecsOf(entry).map((s) => s.filename); } async function getRemoteAssetSize( @@ -89,29 +92,77 @@ async function getRemoteAssetSize( } } -async function fetchAssetBytesBrowser(url: string): Promise { +async function fetchAssetBytesBrowser( + url: string, + spec: CactusAssetSpec +): Promise { const cachesApi = (globalThis as unknown as { caches: CacheStorage }).caches; const cache = await cachesApi.open(CACTUS_CACHE_NAME); const hit = await cache.match(url); if (hit) { - return new Uint8Array(await hit.arrayBuffer()); + const bytes = new Uint8Array(await hit.arrayBuffer()); + try { + await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); + return bytes; + } catch (err) { + if (err instanceof CactusIntegrityError) { + try { + await cache.delete(url); + } catch { + /* best effort */ + } + } else { + throw err; + } + } } const resp = await fetch(url); if (!resp.ok) throw new Error(`Cactus asset fetch failed (${resp.status}) for ${url}`); - // Clone first — Response bodies can only be consumed once. - await cache.put(url, resp.clone()); - return new Uint8Array(await resp.arrayBuffer()); + const contentType = resp.headers.get("content-type") ?? "application/octet-stream"; + const ab = await resp.arrayBuffer(); + const bytes = new Uint8Array(ab); + if (spec.size > 0 && bytes.byteLength !== spec.size) { + throw new CactusIntegrityError({ + url, + filename: spec.filename, + expected: `${spec.size} bytes`, + actual: `${bytes.byteLength} bytes`, + }); + } + // Verify BEFORE storing — never persist unverified bytes to the cache. + await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); + const headers = new Headers({ + "content-type": contentType, + "content-length": String(bytes.byteLength), + }); + await cache.put(url, new Response(bytes, { headers })); + return bytes; } export async function fetchAssetBytes( model: CactusModelConfig, - filename: string + specOrFilename: CactusAssetSpec | string ): Promise { const model_id = model.provider_config.model_id; const entry = getCactusCatalogEntry(model_id); if (!entry) throw new Error(`Unknown Cactus model_id: ${model_id}`); - const url = cactusAssetUrl(entry, filename); - return fetchAssetBytesBrowser(url); + const spec = resolveAssetSpec(entry, specOrFilename); + const url = cactusAssetUrl(entry, spec.filename); + return fetchAssetBytesBrowser(url, spec); +} + +function resolveAssetSpec( + entry: CactusCatalogEntry, + specOrFilename: CactusAssetSpec | string +): CactusAssetSpec { + if (typeof specOrFilename !== "string") return specOrFilename; + const found = assetSpecsOf(entry).find((s) => s.filename === specOrFilename); + if (!found) { + throw new Error( + `No asset spec for filename ${JSON.stringify(specOrFilename)} in catalog entry ${entry.model_id}` + ); + } + return found; } // ============================================================================ @@ -252,7 +303,7 @@ export async function deleteCactusSession(id: string): Promise { async function removeBrowserCacheEntries(entry: CactusCatalogEntry): Promise { const cachesApi = (globalThis as unknown as { caches: CacheStorage }).caches; const cache = await cachesApi.open(CACTUS_CACHE_NAME); - for (const filename of [entry.assets.weights, entry.assets.vocab, entry.assets.config]) { + for (const filename of assetFilenames(entry)) { const url = cactusAssetUrl(entry, filename); try { await cache.delete(url); diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 6b40c1b66..c1fdd7f58 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -7,10 +7,13 @@ import fs from "node:fs/promises"; import path from "node:path"; import { CACTUS_CACHE_NAME, CACTUS_DEFAULT_MODELS_DIR } from "./Cactus_Constants"; +import { CactusIntegrityError, verifySha256 } from "./Cactus_Integrity"; import { + assetSpecsOf, cactusAssetUrl, - getCactusCatalogEntry, + type CactusAssetSpec, type CactusCatalogEntry, + getCactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; import { @@ -84,7 +87,7 @@ function resolveModelDir(models_dir: string, model_id: string): string { } function assetFilenames(entry: CactusCatalogEntry): string[] { - return [entry.assets.weights, entry.assets.vocab, entry.assets.config]; + return assetSpecsOf(entry).map((s) => s.filename); } async function getRemoteAssetSize( @@ -154,56 +157,134 @@ async function getNodeAssetCacheInfo( }; } -async function fetchAssetBytesBrowser(url: string): Promise { +async function fetchAssetBytesBrowser( + url: string, + spec: CactusAssetSpec +): Promise { const cachesApi = (globalThis as unknown as { caches: CacheStorage }).caches; const cache = await cachesApi.open(CACTUS_CACHE_NAME); const hit = await cache.match(url); if (hit) { - return new Uint8Array(await hit.arrayBuffer()); + const bytes = new Uint8Array(await hit.arrayBuffer()); + try { + await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); + return bytes; + } catch (err) { + if (err instanceof CactusIntegrityError) { + // Cached bytes are corrupt / stale — evict and refetch. + try { + await cache.delete(url); + } catch { + /* best effort */ + } + } else { + throw err; + } + } } const resp = await fetch(url); if (!resp.ok) throw new Error(`Cactus asset fetch failed (${resp.status}) for ${url}`); - // Clone first — Response bodies can only be consumed once. - await cache.put(url, resp.clone()); - return new Uint8Array(await resp.arrayBuffer()); + const contentType = resp.headers.get("content-type") ?? "application/octet-stream"; + const ab = await resp.arrayBuffer(); + const bytes = new Uint8Array(ab); + if (spec.size > 0 && bytes.byteLength !== spec.size) { + throw new CactusIntegrityError({ + url, + filename: spec.filename, + expected: `${spec.size} bytes`, + actual: `${bytes.byteLength} bytes`, + }); + } + // Verify BEFORE storing — never persist unverified bytes to the cache. + await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); + const headers = new Headers({ + "content-type": contentType, + "content-length": String(bytes.byteLength), + }); + await cache.put(url, new Response(bytes, { headers })); + return bytes; } async function fetchAssetBytesNode( url: string, models_dir: string, model_id: string, - filename: string + spec: CactusAssetSpec ): Promise { const resolvedDir = resolveModelDir(models_dir, model_id); - const filePath = path.join(resolvedDir, filename); + const filePath = path.join(resolvedDir, spec.filename); try { const buf = await fs.readFile(filePath); - return new Uint8Array(buf); - } catch { - // fall through to fetch + const bytes = new Uint8Array(buf); + try { + await verifySha256(bytes, spec.sha256, { url: `file:${filePath}`, filename: spec.filename }); + return bytes; + } catch (err) { + if (err instanceof CactusIntegrityError) { + // On-disk asset is corrupt; evict and fall through to network. + await fs.unlink(filePath).catch(() => {}); + } else { + throw err; + } + } + } catch (err) { + // ENOENT or sibling read errors fall through to fetch. + if (err instanceof CactusIntegrityError) { + throw err; // unreachable, handled above + } } const resp = await fetch(url); if (!resp.ok) throw new Error(`Cactus asset fetch failed (${resp.status}) for ${url}`); const bytes = new Uint8Array(await resp.arrayBuffer()); + if (spec.size > 0 && bytes.byteLength !== spec.size) { + throw new CactusIntegrityError({ + url, + filename: spec.filename, + expected: `${spec.size} bytes`, + actual: `${bytes.byteLength} bytes`, + }); + } + // Verify BEFORE writing the tmp file — never atomically promote unverified bytes. + await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); await fs.mkdir(resolvedDir, { recursive: true }); const tmpPath = `${filePath}.tmp`; - await fs.writeFile(tmpPath, bytes); - await fs.rename(tmpPath, filePath); + try { + await fs.writeFile(tmpPath, bytes); + await fs.rename(tmpPath, filePath); + } catch (err) { + await fs.unlink(tmpPath).catch(() => {}); + throw err; + } return bytes; } export async function fetchAssetBytes( model: CactusModelConfig, - filename: string + specOrFilename: CactusAssetSpec | string ): Promise { const model_id = model.provider_config.model_id; const entry = getCactusCatalogEntry(model_id); if (!entry) throw new Error(`Unknown Cactus model_id: ${model_id}`); - const url = cactusAssetUrl(entry, filename); + const spec = resolveAssetSpec(entry, specOrFilename); + const url = cactusAssetUrl(entry, spec.filename); if (hasBrowserCacheStorage()) { - return fetchAssetBytesBrowser(url); + return fetchAssetBytesBrowser(url, spec); + } + return fetchAssetBytesNode(url, modelsDirOf(model), model_id, spec); +} + +function resolveAssetSpec( + entry: CactusCatalogEntry, + specOrFilename: CactusAssetSpec | string +): CactusAssetSpec { + if (typeof specOrFilename !== "string") return specOrFilename; + const found = assetSpecsOf(entry).find((s) => s.filename === specOrFilename); + if (!found) { + throw new Error( + `No asset spec for filename ${JSON.stringify(specOrFilename)} in catalog entry ${entry.model_id}` + ); } - return fetchAssetBytesNode(url, modelsDirOf(model), model_id, filename); + return found; } // ============================================================================ @@ -349,7 +430,7 @@ async function removeBrowserCacheEntries(entry: CactusCatalogEntry): Promise + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "vitest"; +import { + CACTUS_HASH_PLACEHOLDER, + CactusIntegrityError, + isHashPlaceholder, + sha256Hex, + verifySha256, +} from "../Cactus_Integrity"; + +// Known SHA-256 of the ASCII string "abc" — RFC 6234 test vector. +const SHA256_ABC = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"; + +function asciiBytes(s: string): Uint8Array { + const out = new Uint8Array(s.length); + for (let i = 0; i < s.length; i++) out[i] = s.charCodeAt(i); + return out; +} + +describe("sha256Hex", () => { + it('matches the known RFC 6234 vector for "abc"', async () => { + const hex = await sha256Hex(asciiBytes("abc")); + expect(hex).toBe(SHA256_ABC); + }); + + it("accepts ArrayBuffer input", async () => { + const buf = asciiBytes("abc").buffer; + const hex = await sha256Hex(buf); + expect(hex).toBe(SHA256_ABC); + }); + + it("produces 64 lowercase hex chars", async () => { + const hex = await sha256Hex(new Uint8Array([0])); + expect(hex).toMatch(/^[0-9a-f]{64}$/); + }); +}); + +describe("verifySha256", () => { + const ctx = { url: "https://example/asset", filename: "asset.bin" }; + + it("passes when the digest matches", async () => { + await expect(verifySha256(asciiBytes("abc"), SHA256_ABC, ctx)).resolves.toBeUndefined(); + }); + + it("accepts uppercase expected hex (normalized to lowercase)", async () => { + await expect( + verifySha256(asciiBytes("abc"), SHA256_ABC.toUpperCase(), ctx) + ).resolves.toBeUndefined(); + }); + + it("throws CactusIntegrityError when the digest does not match", async () => { + const wrong = "0".repeat(64); + await expect(verifySha256(asciiBytes("abc"), wrong, ctx)).rejects.toBeInstanceOf( + CactusIntegrityError + ); + }); + + it("throws plain Error when expected hash is too short", async () => { + await expect(verifySha256(asciiBytes("abc"), "a".repeat(63), ctx)).rejects.toThrow( + /Invalid catalog SHA-256/ + ); + }); + + it("throws plain Error when expected hash is too long", async () => { + await expect(verifySha256(asciiBytes("abc"), "a".repeat(65), ctx)).rejects.toThrow( + /Invalid catalog SHA-256/ + ); + }); + + it("throws plain Error when expected hash contains non-hex characters", async () => { + const bad = "z" + "a".repeat(63); + await expect(verifySha256(asciiBytes("abc"), bad, ctx)).rejects.toThrow( + /non-hex characters/ + ); + }); + + it("skips verification when expected is the placeholder sentinel", async () => { + await expect( + verifySha256(asciiBytes("garbage"), CACTUS_HASH_PLACEHOLDER, ctx) + ).resolves.toBeUndefined(); + }); +}); + +describe("isHashPlaceholder", () => { + it("recognizes the placeholder", () => { + expect(isHashPlaceholder(CACTUS_HASH_PLACEHOLDER)).toBe(true); + }); + + it("rejects real-looking hashes", () => { + expect(isHashPlaceholder(SHA256_ABC)).toBe(false); + }); +}); + +describe("CactusIntegrityError", () => { + it("carries url, filename, expected, actual on the instance", () => { + const err = new CactusIntegrityError({ + url: "u", + filename: "f", + expected: "e", + actual: "a", + }); + expect(err.name).toBe("CactusIntegrityError"); + expect(err.url).toBe("u"); + expect(err.filename).toBe("f"); + expect(err.expected).toBe("e"); + expect(err.actual).toBe("a"); + expect(err.message).toMatch(/Integrity check failed for f/); + }); +}); diff --git a/providers/cactus/src/ai/common/__tests__/Cactus_Runtime.crossBundle.test.ts b/providers/cactus/src/ai/common/__tests__/Cactus_Runtime.crossBundle.test.ts new file mode 100644 index 000000000..ce343e250 --- /dev/null +++ b/providers/cactus/src/ai/common/__tests__/Cactus_Runtime.crossBundle.test.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// Exercises the cross-bundle invariant: the `./ai` and `./ai-runtime` +// entry points each compile their own copy of Cactus_Runtime.ts, but both +// must observe the same engine cache because state is anchored on a +// globalThis-keyed singleton (see Cactus_RuntimeState). +// +// We simulate the two bundles by importing both the Node and browser +// variants of Cactus_Runtime through Vitest's dynamic-import machinery. +// In production the duplication is even tighter (two compiled copies of +// the same source), but the contract is identical: getRuntime() returns +// the same object across all importers in the same realm. + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { __resetRuntimeForTests } from "../Cactus_RuntimeState"; + +const RUNTIME_KEY = Symbol.for("@workglow/cactus.runtime.v1"); + +describe("Cactus runtime cross-bundle singleton", () => { + beforeEach(() => { + __resetRuntimeForTests(); + vi.resetModules(); + }); + + afterEach(() => { + __resetRuntimeForTests(); + vi.resetModules(); + }); + + it("shares the engine Map across separately-imported Cactus_RuntimeState modules", async () => { + // Two independent dynamic imports of the same module specifier still + // resolve to the same module instance under Vitest, but the production + // bug is about *distinct* compiled copies. We model that by going via + // globalThis directly: the v1 contract guarantees that any module which + // calls getRuntime() observes whatever Map is parked on the symbol. + const a = await import("../Cactus_RuntimeState"); + const enginesFromA = a.getCactusEngines(); + enginesFromA.set("shared-id", { free: () => {} }); + + // Pretend a second bundle imports the same module fresh. + vi.resetModules(); + const b = await import("../Cactus_RuntimeState"); + const enginesFromB = b.getCactusEngines(); + + expect(enginesFromB.has("shared-id")).toBe(true); + expect(enginesFromA).toBe(enginesFromB); + }); + + it("surfaces a version mismatch when a foreign payload is parked on the global key", async () => { + const g = globalThis as unknown as Record; + g[RUNTIME_KEY] = { version: 99 }; + const { getRuntime } = await import("../Cactus_RuntimeState"); + expect(() => getRuntime()).toThrow(/v99.*expected v1|version mismatch/); + }); +}); diff --git a/providers/cactus/src/ai/common/__tests__/Cactus_RuntimeState.test.ts b/providers/cactus/src/ai/common/__tests__/Cactus_RuntimeState.test.ts new file mode 100644 index 000000000..528c15d97 --- /dev/null +++ b/providers/cactus/src/ai/common/__tests__/Cactus_RuntimeState.test.ts @@ -0,0 +1,66 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { + __resetRuntimeForTests, + getCactusEngines, + getRuntime, +} from "../Cactus_RuntimeState"; + +const RUNTIME_KEY = Symbol.for("@workglow/cactus.runtime.v1"); + +describe("Cactus_RuntimeState", () => { + beforeEach(() => { + __resetRuntimeForTests(); + }); + + afterEach(() => { + __resetRuntimeForTests(); + }); + + it("lazily initializes a v1 state on first access", () => { + const state = getRuntime(); + expect(state.version).toBe(1); + expect(state.engines).toBeInstanceOf(Map); + expect(state.configJson).toBeInstanceOf(Map); + expect(state.cachedModelIds).toBeInstanceOf(Set); + expect(state.engineLoadsInFlight).toBeInstanceOf(Map); + expect(state.sessions).toBeInstanceOf(Map); + }); + + it("returns the same instance across calls (singleton)", () => { + const a = getRuntime(); + const b = getRuntime(); + expect(a).toBe(b); + expect(a.engines).toBe(b.engines); + }); + + it("is shared across distinct importers via globalThis", () => { + // Simulate a second bundle by setting the symbol on globalThis directly + // and then reading it back via the public accessor. + const engines = getCactusEngines(); + engines.set("x", { free: () => {} }); + const g = globalThis as unknown as Record }>; + expect(g[RUNTIME_KEY].engines.get("x")).toBeDefined(); + }); + + it("throws on version mismatch", () => { + const g = globalThis as unknown as Record; + g[RUNTIME_KEY] = { version: 99 }; + expect(() => getRuntime()).toThrow(/version mismatch/); + }); + + it("__resetRuntimeForTests removes the singleton, allowing a fresh state", () => { + const a = getRuntime(); + a.engines.set("foo", { free: () => {} }); + expect(getRuntime().engines.has("foo")).toBe(true); + __resetRuntimeForTests(); + const b = getRuntime(); + expect(b).not.toBe(a); + expect(b.engines.has("foo")).toBe(false); + }); +}); From 21eb3c2664b9b6bb3130e5c5f30c705237e639b0 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 08:27:13 -0700 Subject: [PATCH 03/11] fix(cactus): remove unused imports to satisfy noUnusedLocals (build fix) --- providers/cactus/src/ai/common/Cactus_Runtime.browser.ts | 9 +-------- providers/cactus/src/ai/common/Cactus_Runtime.ts | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts index 704b0c932..14bb03f7d 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts @@ -20,14 +20,7 @@ import { getCactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; -import { - getCactusCachedModelIds, - getCactusConfigJson, - getCactusEngineLoadsInFlight, - getCactusEngines, - getCactusSessions, - getRuntime, -} from "./Cactus_RuntimeState"; +import { getCactusSessions, getRuntime } from "./Cactus_RuntimeState"; type NeedleSdkModule = typeof import("needle-rs"); // `NeedleWasm` has a private constructor so `InstanceType<...>` cannot be used. diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index c1fdd7f58..121e697d1 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -16,14 +16,7 @@ import { getCactusCatalogEntry, } from "./Cactus_ModelCatalog"; import type { CactusModelConfig } from "./Cactus_ModelSchema"; -import { - getCactusCachedModelIds, - getCactusConfigJson, - getCactusEngineLoadsInFlight, - getCactusEngines, - getCactusSessions, - getRuntime, -} from "./Cactus_RuntimeState"; +import { getCactusSessions, getRuntime } from "./Cactus_RuntimeState"; type NeedleSdkModule = typeof import("needle-rs"); // `NeedleWasm` has a private constructor so `InstanceType<...>` cannot be used. From 7a8905ae61e6bd9c57cb18ea57b80e648c4818a5 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 08:29:31 -0700 Subject: [PATCH 04/11] fix(cactus): validate model_id and filename against allowlist regex (CodeQL) --- .../src/ai/common/Cactus_Runtime.browser.ts | 42 +++++++++++++++++ .../cactus/src/ai/common/Cactus_Runtime.ts | 45 +++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts index 14bb03f7d..3885de285 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.browser.ts @@ -32,6 +32,46 @@ export interface CactusModelCacheInfo { readonly file_sizes: Record | null; } +// ============================================================================ +// Path-safety allowlists (defense-in-depth, mirror of Cactus_Runtime.ts) +// +// The browser variant does not touch the filesystem, but applying the same +// validation keeps both code paths in sync, hardens cache-key inputs, and +// silences static analyzers that flag any use of user-supplied identifiers +// in URL/path-shaped strings. +// +// TODO: lift these helpers into a shared module if/when a third caller +// appears. Duplicated for now to avoid churn during the active PR. +// ============================================================================ + +const MODEL_ID_RE = /^[A-Za-z0-9_-]{1,64}$/; +const FILENAME_RE = /^[A-Za-z0-9_.-]+$/; + +function assertSafeModelId(model_id: string): void { + if (typeof model_id !== "string" || !MODEL_ID_RE.test(model_id)) { + throw new Error( + `Invalid Cactus model_id ${JSON.stringify(model_id)}: ` + + `must match ${MODEL_ID_RE} (alphanumeric, underscore, hyphen; 1-64 chars).` + ); + } +} + +function assertSafeFilename(filename: string): void { + if ( + typeof filename !== "string" || + filename.length === 0 || + filename.length > 255 || + filename === "." || + filename === ".." || + !FILENAME_RE.test(filename) + ) { + throw new Error( + `Invalid Cactus asset filename ${JSON.stringify(filename)}: ` + + `must match ${FILENAME_RE} (no path separators, no '..').` + ); + } +} + let _sdk: NeedleSdkModule | undefined; let _sdkInitPromise: Promise | undefined; @@ -89,6 +129,7 @@ async function fetchAssetBytesBrowser( url: string, spec: CactusAssetSpec ): Promise { + assertSafeFilename(spec.filename); const cachesApi = (globalThis as unknown as { caches: CacheStorage }).caches; const cache = await cachesApi.open(CACTUS_CACHE_NAME); const hit = await cache.match(url); @@ -137,6 +178,7 @@ export async function fetchAssetBytes( specOrFilename: CactusAssetSpec | string ): Promise { const model_id = model.provider_config.model_id; + assertSafeModelId(model_id); const entry = getCactusCatalogEntry(model_id); if (!entry) throw new Error(`Unknown Cactus model_id: ${model_id}`); const spec = resolveAssetSpec(entry, specOrFilename); diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 121e697d1..59355affe 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -28,6 +28,46 @@ export interface CactusModelCacheInfo { readonly file_sizes: Record | null; } +// ============================================================================ +// Path-safety allowlists (defense-in-depth) +// +// `model_id` originates from user-supplied `provider_config.model_id` and +// `filename` originates from the (effectively trusted) catalog. The catalog +// lookup in `getCactusCatalogEntry` already restricts `model_id` to known +// values, but static analyzers (CodeQL) cannot see through that lookup, so +// we re-enforce explicit character allowlists at every filesystem entry +// point. Both regexes reject path separators, `..`, NUL, and any other +// shell/path-special characters. +// ============================================================================ + +const MODEL_ID_RE = /^[A-Za-z0-9_-]{1,64}$/; +const FILENAME_RE = /^[A-Za-z0-9_.-]+$/; + +function assertSafeModelId(model_id: string): void { + if (typeof model_id !== "string" || !MODEL_ID_RE.test(model_id)) { + throw new Error( + `Invalid Cactus model_id ${JSON.stringify(model_id)}: ` + + `must match ${MODEL_ID_RE} (alphanumeric, underscore, hyphen; 1-64 chars).` + ); + } +} + +function assertSafeFilename(filename: string): void { + if ( + typeof filename !== "string" || + filename.length === 0 || + filename.length > 255 || + filename === "." || + filename === ".." || + !FILENAME_RE.test(filename) + ) { + throw new Error( + `Invalid Cactus asset filename ${JSON.stringify(filename)}: ` + + `must match ${FILENAME_RE} (no path separators, no '..').` + ); + } +} + let _sdk: NeedleSdkModule | undefined; let _sdkInitPromise: Promise | undefined; @@ -74,6 +114,7 @@ function modelsDirOf(model: CactusModelConfig): string { } function resolveModelDir(models_dir: string, model_id: string): string { + assertSafeModelId(model_id); return models_dir.startsWith("~/") ? path.join(process.env.HOME ?? process.env.USERPROFILE ?? ".", models_dir.slice(2), model_id) : path.resolve(models_dir, model_id); @@ -109,6 +150,7 @@ async function getNodeAssetCacheInfo( const resolvedDir = resolveModelDir(modelsDirOf(model), entry.model_id); const stats = await Promise.all( filenames.map(async (filename) => { + assertSafeFilename(filename); try { const stat = await fs.stat(path.join(resolvedDir, filename)); return { filename, size: stat.size, cached: true }; @@ -204,6 +246,8 @@ async function fetchAssetBytesNode( model_id: string, spec: CactusAssetSpec ): Promise { + assertSafeModelId(model_id); + assertSafeFilename(spec.filename); const resolvedDir = resolveModelDir(models_dir, model_id); const filePath = path.join(resolvedDir, spec.filename); try { @@ -435,6 +479,7 @@ async function removeBrowserCacheEntries(entry: CactusCatalogEntry): Promise { if (hasBrowserCacheStorage()) return; + assertSafeModelId(model_id); const models_dir = modelsDirOf(model); const resolvedDir = resolveModelDir(models_dir, model_id); await fs.rm(resolvedDir, { recursive: true, force: true }); From a6acfa4f75e685810afc0e3fc14f063dd5f2e591 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 08:30:17 -0700 Subject: [PATCH 05/11] fix(cactus): generic integrity error message and propagate err.message in Download CactusIntegrityError previously hard-coded "expected sha256 ..." in its message, producing confusing output like "expected sha256 22000000 bytes" for the size-mismatch callers. Make the constructor message label-agnostic so it reads naturally for both hash and byte-length mismatches. Cactus_Download(.browser) was reformatting the error message under the assumption it was always a SHA mismatch; just propagate err.message, which now phrases itself correctly for both cases. --- .../src/ai/common/Cactus_Download.browser.ts | 13 +++++++------ providers/cactus/src/ai/common/Cactus_Download.ts | 13 +++++++------ providers/cactus/src/ai/common/Cactus_Integrity.ts | 14 +++++++++++++- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Download.browser.ts b/providers/cactus/src/ai/common/Cactus_Download.browser.ts index 4b8365525..4cdedbb68 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.browser.ts @@ -35,12 +35,13 @@ export const Cactus_Download: AiProviderRunFn< try { await fetchAssetBytes(model, spec); } catch (err) { - if (err instanceof CactusIntegrityError) { - emit({ - type: "phase", - message: `Integrity check failed for ${spec.filename}: expected sha256 ${err.expected}, got ${err.actual}`, - }); - } + // Surface whatever the integrity layer phrased — it knows whether the + // mismatch was a SHA-256 digest or a byte-length pre-check, and the + // error message is already shaped correctly for both. + emit({ + type: "phase", + message: err instanceof CactusIntegrityError ? err.message : String(err), + }); throw err; } } diff --git a/providers/cactus/src/ai/common/Cactus_Download.ts b/providers/cactus/src/ai/common/Cactus_Download.ts index c3b64b930..81a6bbe75 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.ts @@ -35,12 +35,13 @@ export const Cactus_Download: AiProviderRunFn< try { await fetchAssetBytes(model, spec); } catch (err) { - if (err instanceof CactusIntegrityError) { - emit({ - type: "phase", - message: `Integrity check failed for ${spec.filename}: expected sha256 ${err.expected}, got ${err.actual}`, - }); - } + // Surface whatever the integrity layer phrased — it knows whether the + // mismatch was a SHA-256 digest or a byte-length pre-check, and the + // error message is already shaped correctly for both. + emit({ + type: "phase", + message: err instanceof CactusIntegrityError ? err.message : String(err), + }); throw err; } } diff --git a/providers/cactus/src/ai/common/Cactus_Integrity.ts b/providers/cactus/src/ai/common/Cactus_Integrity.ts index 34396f4dd..aa2f7e3c5 100644 --- a/providers/cactus/src/ai/common/Cactus_Integrity.ts +++ b/providers/cactus/src/ai/common/Cactus_Integrity.ts @@ -16,6 +16,14 @@ /** Sentinel value used in the catalog while real hashes are not yet populated. */ export const CACTUS_HASH_PLACEHOLDER = "TODO_FILL_AT_RELEASE"; +/** + * Raised whenever a Cactus asset fails an integrity check. The `expected` + * and `actual` fields are deliberately label-agnostic strings so the same + * error type covers both SHA-256 mismatches ("abc123...") and byte-length + * mismatches ("22000000 bytes"). The constructor message embeds them + * verbatim, so callers should phrase each side with whatever unit makes + * sense at the call site. + */ export class CactusIntegrityError extends Error { readonly url: string; readonly filename: string; @@ -24,7 +32,7 @@ export class CactusIntegrityError extends Error { constructor(opts: { url: string; filename: string; expected: string; actual: string }) { super( `Integrity check failed for ${opts.filename} from ${opts.url}: ` + - `expected sha256 ${opts.expected}, got ${opts.actual}` + `expected ${opts.expected}, got ${opts.actual}` ); this.name = "CactusIntegrityError"; this.url = opts.url; @@ -60,6 +68,10 @@ export function isHashPlaceholder(expected: string): boolean { * `expected`. Throws a plain `Error` if `expected` is malformed (not 64 hex * chars), since that is a catalog-author bug, not a content bug. * + * When the hashes mismatch, both `expected` and `actual` are lowercase hex + * SHA-256 strings; the resulting error message reads + * `expected , got `. + * * If `expected` is the `TODO_FILL_AT_RELEASE` placeholder, verification is * skipped and a one-time warning is logged. This keeps developers unblocked * before the real hashes land while making the gap impossible to miss. From c1696f59d782892128b5f03c0bc10efbe306527e Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 08:30:47 -0700 Subject: [PATCH 06/11] feat(cactus): assert catalog SHA-256 hex format at module load Document the placeholder semantics on CactusAssetSpec.sha256 so the "TODO_FILL_AT_RELEASE" contract is discoverable from the type itself, and put assertHexSha256 to work by validating every non-placeholder entry the moment the catalog module loads. Catalog-author bugs (wrong length, uppercase, non-hex chars) now surface immediately at import time instead of on first verification. --- .../src/ai/common/Cactus_ModelCatalog.ts | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts b/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts index ea31a1823..635b5eb73 100644 --- a/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts +++ b/providers/cactus/src/ai/common/Cactus_ModelCatalog.ts @@ -10,6 +10,7 @@ import { CACTUS_DEFAULT_REVISION, CACTUS_NEEDLE_26M, } from "./Cactus_Constants"; +import { CACTUS_HASH_PLACEHOLDER } from "./Cactus_Integrity"; /** * A single asset file in a Cactus model catalog entry. @@ -20,7 +21,14 @@ import { */ export interface CactusAssetSpec { readonly filename: string; - /** Lowercase hex SHA-256, exactly 64 characters. */ + /** + * Lowercase hex SHA-256, exactly 64 characters. + * + * The literal string `"TODO_FILL_AT_RELEASE"` is accepted as a placeholder + * during pre-release development; in that case `verifySha256` skips the + * check and logs a one-time warning. The placeholder MUST be replaced + * with a real hash before a tagged release. + */ readonly sha256: string; /** Expected byte length — used as a cheap pre-check before hashing. */ readonly size: number; @@ -43,8 +51,10 @@ export interface CactusCatalogEntry { /** * Asserts that `s` is a lowercase hex SHA-256 (64 hex chars). * - * Used at catalog load time to surface malformed entries before any - * verification call sees them. + * Invoked at module-load time on every non-placeholder catalog entry (see + * the bottom of this file) so malformed hashes surface immediately as an + * import-time error rather than the first time `verifySha256` runs against + * fetched bytes. */ export function assertHexSha256(s: string, ctxLabel?: string): asserts s is string { if (typeof s !== "string" || s.length !== 64 || !/^[0-9a-f]{64}$/.test(s)) { @@ -70,17 +80,17 @@ export const CACTUS_CATALOG: readonly CactusCatalogEntry[] = [ // a clear warning is logged so this can never silently ship to release. weights: { filename: "needle.safetensors", - sha256: "TODO_FILL_AT_RELEASE", + sha256: CACTUS_HASH_PLACEHOLDER, size: 0, }, vocab: { filename: "vocab.txt", - sha256: "TODO_FILL_AT_RELEASE", + sha256: CACTUS_HASH_PLACEHOLDER, size: 0, }, config: { filename: "config.json", - sha256: "TODO_FILL_AT_RELEASE", + sha256: CACTUS_HASH_PLACEHOLDER, size: 0, }, }, @@ -105,3 +115,18 @@ export function cactusAssetUrl( typeof filenameOrSpec === "string" ? filenameOrSpec : filenameOrSpec.filename; return `https://huggingface.co/${entry.hf_repo}/resolve/${entry.revision}/${filename}`; } + +// ============================================================================ +// Module-load invariant: every non-placeholder catalog entry has a valid +// 64-char lowercase hex SHA-256. Catches catalog-author bugs immediately. +// +// Placeholder entries are intentionally skipped — `verifySha256` warns and +// no-ops on them during pre-release development. +// ============================================================================ +for (const entry of CACTUS_CATALOG) { + for (const asset of assetSpecsOf(entry)) { + if (asset.sha256 !== CACTUS_HASH_PLACEHOLDER) { + assertHexSha256(asset.sha256, `${entry.model_id}/${asset.filename}`); + } + } +} From 8507c4fd46e777b5d529c67a6a4d9090e4c14d8a Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 08:41:41 -0700 Subject: [PATCH 07/11] fix(cactus): satisfy StreamPhase.progress required + tighten sha256 buffer types build-types was failing with two distinct shape mismatches: 1. emit({ type: "phase", message }) in Cactus_Download(.browser).ts omits `progress`, but StreamPhase declares `progress: number | undefined` (required, not optional). Re-emit with `progress: undefined` for the error path so the discriminated-union members keep their explicit shape. 2. sha256Hex / verifySha256 pass a `Uint8Array` (the default of the unparameterized `Uint8Array` since the recent lib.dom tightening) into crypto.subtle.digest, which now expects `ArrayBufferView` / a concrete `BufferSource`. Copy incoming bytes into a fresh `ArrayBuffer` and pass that buffer directly to digest, then read the result back through a Uint8Array view. The hash result is unchanged. --- .../cactus/src/ai/common/Cactus_Download.browser.ts | 3 +++ providers/cactus/src/ai/common/Cactus_Download.ts | 3 +++ providers/cactus/src/ai/common/Cactus_Integrity.ts | 12 ++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Download.browser.ts b/providers/cactus/src/ai/common/Cactus_Download.browser.ts index 4cdedbb68..74b8e9aeb 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.browser.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.browser.ts @@ -38,9 +38,12 @@ export const Cactus_Download: AiProviderRunFn< // Surface whatever the integrity layer phrased — it knows whether the // mismatch was a SHA-256 digest or a byte-length pre-check, and the // error message is already shaped correctly for both. + // StreamPhase.progress is required (number | undefined); pass undefined + // on the error path because there is no meaningful percentage to report. emit({ type: "phase", message: err instanceof CactusIntegrityError ? err.message : String(err), + progress: undefined, }); throw err; } diff --git a/providers/cactus/src/ai/common/Cactus_Download.ts b/providers/cactus/src/ai/common/Cactus_Download.ts index 81a6bbe75..d5fbe5bd7 100644 --- a/providers/cactus/src/ai/common/Cactus_Download.ts +++ b/providers/cactus/src/ai/common/Cactus_Download.ts @@ -38,9 +38,12 @@ export const Cactus_Download: AiProviderRunFn< // Surface whatever the integrity layer phrased — it knows whether the // mismatch was a SHA-256 digest or a byte-length pre-check, and the // error message is already shaped correctly for both. + // StreamPhase.progress is required (number | undefined); pass undefined + // on the error path because there is no meaningful percentage to report. emit({ type: "phase", message: err instanceof CactusIntegrityError ? err.message : String(err), + progress: undefined, }); throw err; } diff --git a/providers/cactus/src/ai/common/Cactus_Integrity.ts b/providers/cactus/src/ai/common/Cactus_Integrity.ts index aa2f7e3c5..2248e06e5 100644 --- a/providers/cactus/src/ai/common/Cactus_Integrity.ts +++ b/providers/cactus/src/ai/common/Cactus_Integrity.ts @@ -43,8 +43,16 @@ export class CactusIntegrityError extends Error { } export async function sha256Hex(bytes: Uint8Array | ArrayBuffer): Promise { - const input = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes); - const digest = await globalThis.crypto.subtle.digest("SHA-256", input); + // Copy into a fresh ArrayBuffer so we hand crypto.subtle.digest a concrete + // `BufferSource` whose backing buffer is `ArrayBuffer` (not `ArrayBufferLike`). + // The recent lib.dom tightening on Uint8Array's default generic argument made + // the previous `new Uint8Array(bytes)` path no longer assignable to digest's + // parameter type. + const src = + bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes as ArrayBuffer); + const buf = new ArrayBuffer(src.byteLength); + new Uint8Array(buf).set(src); + const digest = await globalThis.crypto.subtle.digest("SHA-256", buf); const view = new Uint8Array(digest); let s = ""; for (let i = 0; i < view.length; i++) { From 809c03b95d2787545879f1b78ee3dce79414c77f Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 11:21:14 -0700 Subject: [PATCH 08/11] fix(cactus): materialize concrete ArrayBuffer in Integrity test (build fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "accepts ArrayBuffer input" test was passing `asciiBytes("abc").buffer` to sha256Hex. `Uint8Array.prototype.buffer` is typed `ArrayBufferLike` in the lib.dom.d.ts version this repo pulls in — meaning TypeScript treats it as potentially `SharedArrayBuffer`, which is missing properties (`resizable`, `resize`, `detached`, `transfer`, `transferToFixedLength`) that `ArrayBuffer` requires. The test still wants to exercise sha256Hex's ArrayBuffer branch, so build a fresh concrete `ArrayBuffer` via `new ArrayBuffer(n)` and copy the ASCII bytes into it before calling sha256Hex. Same coverage, no ArrayBufferLike leakage into the call site. --- .../src/ai/common/__tests__/Cactus_Integrity.test.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/providers/cactus/src/ai/common/__tests__/Cactus_Integrity.test.ts b/providers/cactus/src/ai/common/__tests__/Cactus_Integrity.test.ts index 144c9146f..356ae5b58 100644 --- a/providers/cactus/src/ai/common/__tests__/Cactus_Integrity.test.ts +++ b/providers/cactus/src/ai/common/__tests__/Cactus_Integrity.test.ts @@ -29,7 +29,12 @@ describe("sha256Hex", () => { }); it("accepts ArrayBuffer input", async () => { - const buf = asciiBytes("abc").buffer; + // `Uint8Array.prototype.buffer` is typed `ArrayBufferLike` in newer + // lib.dom.d.ts (it can be SharedArrayBuffer). Materialize a concrete + // ArrayBuffer so the test exercises that branch of sha256Hex's input. + const src = asciiBytes("abc"); + const buf = new ArrayBuffer(src.byteLength); + new Uint8Array(buf).set(src); const hex = await sha256Hex(buf); expect(hex).toBe(SHA256_ABC); }); From 3ae2eb8b76b035e235d1e149395d9c13a2e891f9 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 13:31:49 -0700 Subject: [PATCH 09/11] fix(cactus): add safeJoinUnderRoot containment check for CodeQL js/path-injection sanitizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL's `js/path-injection` query couldn't recognize the regex-based allowlist as a sanitizer — `model_id` from `provider_config.model_id` was still flagged as flowing into `path.resolve(models_dir, model_id)` and the subsequent `fs.*` calls. Add `safeJoinUnderRoot(root, ...segments)` that normalizes with `path.resolve` and verifies containment via `startsWith(resolvedRoot + sep)` — the canonical sanitizer pattern CodeQL recognizes for the js/path-injection query. Apply at every fs entry point in this module: - `resolveModelDir`: now safe-joins `model_id` onto a resolved `models_dir` root, throws on escape. - `getNodeAssetCacheInfo`: each `fs.stat(path.join(resolvedDir, filename))` becomes `fs.stat(safeJoinUnderRoot(resolvedDir, filename))`. - `fetchAssetBytesNode`: `filePath = safeJoinUnderRoot(resolvedDir, spec.filename)`. `tmpPath` is a sibling string concat, inheriting the contained `filePath`. - `removeNodeCacheDir`: explicit `resolveModelDir` call locally so the sanitizer trace is visible per-fs-call. The character allowlists (assertSafeModelId / assertSafeFilename) remain as fast-fail defense in depth — they reject malformed input at the boundary before any path operation runs. --- .../cactus/src/ai/common/Cactus_Runtime.ts | 69 ++++++++++++++++--- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 59355affe..77af1634d 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -29,15 +29,26 @@ export interface CactusModelCacheInfo { } // ============================================================================ -// Path-safety allowlists (defense-in-depth) +// Path-safety helpers (defense-in-depth + CodeQL-recognized sanitizer) // // `model_id` originates from user-supplied `provider_config.model_id` and // `filename` originates from the (effectively trusted) catalog. The catalog // lookup in `getCactusCatalogEntry` already restricts `model_id` to known -// values, but static analyzers (CodeQL) cannot see through that lookup, so -// we re-enforce explicit character allowlists at every filesystem entry -// point. Both regexes reject path separators, `..`, NUL, and any other -// shell/path-special characters. +// values, but static analyzers cannot see through that lookup. +// +// Two layers of defense are applied at every filesystem entry point: +// +// 1. Character allowlists (`assertSafeModelId`, `assertSafeFilename`) +// reject separators, `..`, NUL, and any shell/path-special characters +// at the boundary. Fast-fail on malformed input. +// +// 2. `safeJoinUnderRoot` normalizes the candidate path with `path.resolve` +// and verifies it stays within the resolved root via `startsWith`. +// This is the canonical CodeQL-recognized sanitizer for the +// `js/path-injection` query — the analyzer cannot infer containment +// from regex allowlists upstream, so every filesystem entry point +// uses this helper to make the containment invariant explicit at +// the point of use. // ============================================================================ const MODEL_ID_RE = /^[A-Za-z0-9_-]{1,64}$/; @@ -68,6 +79,29 @@ function assertSafeFilename(filename: string): void { } } +/** + * Returns `path.resolve(root, ...segments)` only when the resolved result + * is contained within (or equal to) the resolved root. Throws otherwise. + * + * This implements the recommended path-injection mitigation: + * normalize with `path.resolve` (which collapses `..` segments) and then + * verify the result begins with the resolved root. CodeQL's + * `js/path-injection` query recognizes this pattern as a sanitizer. + */ +function safeJoinUnderRoot(root: string, ...segments: string[]): string { + const resolvedRoot = path.resolve(root); + const candidate = path.resolve(resolvedRoot, ...segments); + if ( + candidate !== resolvedRoot && + !candidate.startsWith(resolvedRoot + path.sep) + ) { + throw new Error( + `Path escape detected: ${JSON.stringify(candidate)} is not within ${JSON.stringify(resolvedRoot)}` + ); + } + return candidate; +} + let _sdk: NeedleSdkModule | undefined; let _sdkInitPromise: Promise | undefined; @@ -115,9 +149,16 @@ function modelsDirOf(model: CactusModelConfig): string { function resolveModelDir(models_dir: string, model_id: string): string { assertSafeModelId(model_id); - return models_dir.startsWith("~/") - ? path.join(process.env.HOME ?? process.env.USERPROFILE ?? ".", models_dir.slice(2), model_id) - : path.resolve(models_dir, model_id); + // First resolve the models root (handle `~/` expansion), then safe-join + // the model_id onto it. `safeJoinUnderRoot` is the CodeQL-recognized + // sanitizer. + const base = models_dir.startsWith("~/") + ? path.resolve( + process.env.HOME ?? process.env.USERPROFILE ?? ".", + models_dir.slice(2) + ) + : path.resolve(models_dir); + return safeJoinUnderRoot(base, model_id); } function assetFilenames(entry: CactusCatalogEntry): string[] { @@ -152,7 +193,9 @@ async function getNodeAssetCacheInfo( filenames.map(async (filename) => { assertSafeFilename(filename); try { - const stat = await fs.stat(path.join(resolvedDir, filename)); + // safeJoinUnderRoot makes the containment check explicit so CodeQL + // can trace the sanitizer for the `js/path-injection` query. + const stat = await fs.stat(safeJoinUnderRoot(resolvedDir, filename)); return { filename, size: stat.size, cached: true }; } catch { return { filename, size: undefined, cached: false }; @@ -249,7 +292,8 @@ async function fetchAssetBytesNode( assertSafeModelId(model_id); assertSafeFilename(spec.filename); const resolvedDir = resolveModelDir(models_dir, model_id); - const filePath = path.join(resolvedDir, spec.filename); + // safeJoinUnderRoot keeps the CodeQL sanitizer trace local to each fs call. + const filePath = safeJoinUnderRoot(resolvedDir, spec.filename); try { const buf = await fs.readFile(filePath); const bytes = new Uint8Array(buf); @@ -284,6 +328,8 @@ async function fetchAssetBytesNode( // Verify BEFORE writing the tmp file — never atomically promote unverified bytes. await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); await fs.mkdir(resolvedDir, { recursive: true }); + // tmpPath is a sibling of filePath inside the (verified-contained) resolvedDir, + // so it inherits the containment property from filePath above. const tmpPath = `${filePath}.tmp`; try { await fs.writeFile(tmpPath, bytes); @@ -481,6 +527,9 @@ async function removeNodeCacheDir(model: CactusModelConfig, model_id: string): P if (hasBrowserCacheStorage()) return; assertSafeModelId(model_id); const models_dir = modelsDirOf(model); + // resolveModelDir already passes through safeJoinUnderRoot, so the + // returned path is contained within `models_dir`. Re-verify here so the + // CodeQL sanitizer trace is local to this fs entry point. const resolvedDir = resolveModelDir(models_dir, model_id); await fs.rm(resolvedDir, { recursive: true, force: true }); } From 4214fa08a92be0d03428fe18b52009350a07e277 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 13:38:20 -0700 Subject: [PATCH 10/11] fix(cactus): inline path-injection sanitizer at every fs call site CodeQL's js/path-injection query doesn't trace through user-defined helper functions like safeJoinUnderRoot. The sanitizer pattern (path.resolve + startsWith containment check) must be visible in the same function scope as the fs call for the analyzer to recognize it. Inline the check at each fs call site in getNodeAssetCacheInfo, fetchAssetBytesNode, and removeNodeCacheDir. The safeJoinUnderRoot helper stays in place for resolveModelDir and the assertSafe* allowlists remain as fast-fail defense-in-depth, but every fs call site now has its own inline sanitizer that CodeQL can trace. --- .../cactus/src/ai/common/Cactus_Runtime.ts | 163 ++++++++++++++---- 1 file changed, 133 insertions(+), 30 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 77af1634d..5fbe71cc6 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -42,13 +42,13 @@ export interface CactusModelCacheInfo { // reject separators, `..`, NUL, and any shell/path-special characters // at the boundary. Fast-fail on malformed input. // -// 2. `safeJoinUnderRoot` normalizes the candidate path with `path.resolve` -// and verifies it stays within the resolved root via `startsWith`. -// This is the canonical CodeQL-recognized sanitizer for the -// `js/path-injection` query — the analyzer cannot infer containment -// from regex allowlists upstream, so every filesystem entry point -// uses this helper to make the containment invariant explicit at -// the point of use. +// 2. An inline `path.resolve` + `startsWith` containment check is +// duplicated at every `fs.*` call site. CodeQL's `js/path-injection` +// query does not trace through user-defined helper functions, so the +// sanitizer pattern must be visible in the same function scope as +// the filesystem call. `safeJoinUnderRoot` is retained for callers +// (like `resolveModelDir`) where local CodeQL recognition is not +// required. // ============================================================================ const MODEL_ID_RE = /^[A-Za-z0-9_-]{1,64}$/; @@ -85,8 +85,12 @@ function assertSafeFilename(filename: string): void { * * This implements the recommended path-injection mitigation: * normalize with `path.resolve` (which collapses `..` segments) and then - * verify the result begins with the resolved root. CodeQL's - * `js/path-injection` query recognizes this pattern as a sanitizer. + * verify the result begins with the resolved root. + * + * NOTE: CodeQL's `js/path-injection` query does NOT trace through this + * helper — the sanitizer pattern must appear inline at each fs call site + * to satisfy the analyzer. This helper is retained as defense-in-depth + * and for callers where CodeQL recognition is not required. */ function safeJoinUnderRoot(root: string, ...segments: string[]): string { const resolvedRoot = path.resolve(root); @@ -150,8 +154,8 @@ function modelsDirOf(model: CactusModelConfig): string { function resolveModelDir(models_dir: string, model_id: string): string { assertSafeModelId(model_id); // First resolve the models root (handle `~/` expansion), then safe-join - // the model_id onto it. `safeJoinUnderRoot` is the CodeQL-recognized - // sanitizer. + // the model_id onto it. `safeJoinUnderRoot` provides defense-in-depth + // containment for non-CodeQL-recognized callers. const base = models_dir.startsWith("~/") ? path.resolve( process.env.HOME ?? process.env.USERPROFILE ?? ".", @@ -188,14 +192,34 @@ async function getNodeAssetCacheInfo( signal: AbortSignal | undefined ): Promise { const filenames = assetFilenames(entry); - const resolvedDir = resolveModelDir(modelsDirOf(model), entry.model_id); + const models_dir = modelsDirOf(model); + const model_id = entry.model_id; + assertSafeModelId(model_id); + // Compute the resolved model dir inline (not via resolveModelDir) so + // CodeQL's js/path-injection query can trace the sanitizer locally. + const safeRoot = models_dir.startsWith("~/") + ? path.resolve( + process.env.HOME ?? process.env.USERPROFILE ?? ".", + models_dir.slice(2) + ) + : path.resolve(models_dir); + const resolvedDir = path.resolve(safeRoot, model_id); + if (resolvedDir !== safeRoot && !resolvedDir.startsWith(safeRoot + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(resolvedDir)} is not within ${JSON.stringify(safeRoot)}` + ); + } const stats = await Promise.all( filenames.map(async (filename) => { assertSafeFilename(filename); + // Inline sanitizer for CodeQL js/path-injection (helper functions + // aren't traced through by the query — re-check at the fs call site). + const target = path.resolve(resolvedDir, filename); + if (target !== resolvedDir && !target.startsWith(resolvedDir + path.sep)) { + return { filename, size: undefined, cached: false }; + } try { - // safeJoinUnderRoot makes the containment check explicit so CodeQL - // can trace the sanitizer for the `js/path-injection` query. - const stat = await fs.stat(safeJoinUnderRoot(resolvedDir, filename)); + const stat = await fs.stat(target); return { filename, size: stat.size, cached: true }; } catch { return { filename, size: undefined, cached: false }; @@ -291,11 +315,37 @@ async function fetchAssetBytesNode( ): Promise { assertSafeModelId(model_id); assertSafeFilename(spec.filename); - const resolvedDir = resolveModelDir(models_dir, model_id); - // safeJoinUnderRoot keeps the CodeQL sanitizer trace local to each fs call. - const filePath = safeJoinUnderRoot(resolvedDir, spec.filename); + // Compute the resolved model dir inline (not via resolveModelDir) so + // CodeQL's js/path-injection query can trace the sanitizer locally. + const safeRoot = models_dir.startsWith("~/") + ? path.resolve( + process.env.HOME ?? process.env.USERPROFILE ?? ".", + models_dir.slice(2) + ) + : path.resolve(models_dir); + const resolvedDir = path.resolve(safeRoot, model_id); + if (resolvedDir !== safeRoot && !resolvedDir.startsWith(safeRoot + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(resolvedDir)} is not within ${JSON.stringify(safeRoot)}` + ); + } + // Inline sanitizer for CodeQL js/path-injection — duplicated at every + // fs call site below since the query doesn't trace through helpers. + const filePath = path.resolve(resolvedDir, spec.filename); + if (filePath !== resolvedDir && !filePath.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(filePath)} is not within ${JSON.stringify(resolvedDir)}` + ); + } try { - const buf = await fs.readFile(filePath); + // Re-resolve at the call site so CodeQL sees the sanitizer locally. + const readPath = path.resolve(resolvedDir, spec.filename); + if (readPath !== resolvedDir && !readPath.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(readPath)} is not within ${JSON.stringify(resolvedDir)}` + ); + } + const buf = await fs.readFile(readPath); const bytes = new Uint8Array(buf); try { await verifySha256(bytes, spec.sha256, { url: `file:${filePath}`, filename: spec.filename }); @@ -303,7 +353,13 @@ async function fetchAssetBytesNode( } catch (err) { if (err instanceof CactusIntegrityError) { // On-disk asset is corrupt; evict and fall through to network. - await fs.unlink(filePath).catch(() => {}); + const unlinkPath = path.resolve(resolvedDir, spec.filename); + if (unlinkPath !== resolvedDir && !unlinkPath.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(unlinkPath)} is not within ${JSON.stringify(resolvedDir)}` + ); + } + await fs.unlink(unlinkPath).catch(() => {}); } else { throw err; } @@ -327,17 +383,54 @@ async function fetchAssetBytesNode( } // Verify BEFORE writing the tmp file — never atomically promote unverified bytes. await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); - await fs.mkdir(resolvedDir, { recursive: true }); - // tmpPath is a sibling of filePath inside the (verified-contained) resolvedDir, - // so it inherits the containment property from filePath above. + // Inline sanitizer for the mkdir target — CodeQL needs the check local. + const mkdirTarget = path.resolve(safeRoot, model_id); + if (mkdirTarget !== safeRoot && !mkdirTarget.startsWith(safeRoot + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(mkdirTarget)} is not within ${JSON.stringify(safeRoot)}` + ); + } + await fs.mkdir(mkdirTarget, { recursive: true }); + // tmpPath is a sibling of filePath inside the (verified-contained) resolvedDir. + // The string-concat `${filePath}.tmp` inherits containment from filePath, but + // we also recompute via path.resolve below at each fs call so CodeQL sees the + // sanitizer locally. const tmpPath = `${filePath}.tmp`; try { - await fs.writeFile(tmpPath, bytes); - await fs.rename(tmpPath, filePath); + // Recompute the write target via path.resolve so the sanitizer is local. + const writeTarget = path.resolve(resolvedDir, `${spec.filename}.tmp`); + if (writeTarget !== resolvedDir && !writeTarget.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(writeTarget)} is not within ${JSON.stringify(resolvedDir)}` + ); + } + await fs.writeFile(writeTarget, bytes); + // Recompute both rename endpoints locally for CodeQL. + const renameFrom = path.resolve(resolvedDir, `${spec.filename}.tmp`); + if (renameFrom !== resolvedDir && !renameFrom.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(renameFrom)} is not within ${JSON.stringify(resolvedDir)}` + ); + } + const renameTo = path.resolve(resolvedDir, spec.filename); + if (renameTo !== resolvedDir && !renameTo.startsWith(resolvedDir + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(renameTo)} is not within ${JSON.stringify(resolvedDir)}` + ); + } + await fs.rename(renameFrom, renameTo); } catch (err) { - await fs.unlink(tmpPath).catch(() => {}); + // Recompute the cleanup target locally for CodeQL. + const cleanupTarget = path.resolve(resolvedDir, `${spec.filename}.tmp`); + if (cleanupTarget !== resolvedDir && !cleanupTarget.startsWith(resolvedDir + path.sep)) { + throw err; + } + await fs.unlink(cleanupTarget).catch(() => {}); throw err; } + // Silence unused-binding warning: `tmpPath` is retained for clarity in the + // atomicity contract documented above. + void tmpPath; return bytes; } @@ -527,10 +620,20 @@ async function removeNodeCacheDir(model: CactusModelConfig, model_id: string): P if (hasBrowserCacheStorage()) return; assertSafeModelId(model_id); const models_dir = modelsDirOf(model); - // resolveModelDir already passes through safeJoinUnderRoot, so the - // returned path is contained within `models_dir`. Re-verify here so the - // CodeQL sanitizer trace is local to this fs entry point. - const resolvedDir = resolveModelDir(models_dir, model_id); + // Compute the resolved model dir inline (not via resolveModelDir) so + // CodeQL's js/path-injection query can trace the sanitizer locally. + const safeRoot = models_dir.startsWith("~/") + ? path.resolve( + process.env.HOME ?? process.env.USERPROFILE ?? ".", + models_dir.slice(2) + ) + : path.resolve(models_dir); + const resolvedDir = path.resolve(safeRoot, model_id); + if (resolvedDir !== safeRoot && !resolvedDir.startsWith(safeRoot + path.sep)) { + throw new Error( + `Path escape detected: ${JSON.stringify(resolvedDir)} is not within ${JSON.stringify(safeRoot)}` + ); + } await fs.rm(resolvedDir, { recursive: true, force: true }); } From 4ae81323e548c8cdec1cf8dd113712fcb127cf00 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 13:41:09 -0700 Subject: [PATCH 11/11] fix(cactus): remove orphaned resolveModelDir/safeJoinUnderRoot/tmpPath (build fix) The inline-sanitizer commit (4214fa0) replaced every call to `resolveModelDir` and `safeJoinUnderRoot` with the inline `path.resolve` + `startsWith` containment check, but left both helpers defined. `noUnusedLocals` then failed the build: 'resolveModelDir' is declared but its value is never read Remove both helpers and the now-unused `tmpPath` binding (the inline `writeTarget` / `renameFrom` recompute the same path). Update the path-safety section header to reflect the inline-only approach. Behavior unchanged: the inline sanitizer at every fs call site, plus the `assertSafeModelId` / `assertSafeFilename` allowlists, remain in place. --- .../cactus/src/ai/common/Cactus_Runtime.ts | 90 +++++-------------- 1 file changed, 20 insertions(+), 70 deletions(-) diff --git a/providers/cactus/src/ai/common/Cactus_Runtime.ts b/providers/cactus/src/ai/common/Cactus_Runtime.ts index 5fbe71cc6..5f99cef72 100644 --- a/providers/cactus/src/ai/common/Cactus_Runtime.ts +++ b/providers/cactus/src/ai/common/Cactus_Runtime.ts @@ -29,7 +29,7 @@ export interface CactusModelCacheInfo { } // ============================================================================ -// Path-safety helpers (defense-in-depth + CodeQL-recognized sanitizer) +// Path-safety (defense-in-depth + CodeQL-recognized inline sanitizer) // // `model_id` originates from user-supplied `provider_config.model_id` and // `filename` originates from the (effectively trusted) catalog. The catalog @@ -43,12 +43,12 @@ export interface CactusModelCacheInfo { // at the boundary. Fast-fail on malformed input. // // 2. An inline `path.resolve` + `startsWith` containment check is -// duplicated at every `fs.*` call site. CodeQL's `js/path-injection` -// query does not trace through user-defined helper functions, so the -// sanitizer pattern must be visible in the same function scope as -// the filesystem call. `safeJoinUnderRoot` is retained for callers -// (like `resolveModelDir`) where local CodeQL recognition is not -// required. +// duplicated immediately before every `fs.*` call. CodeQL's +// `js/path-injection` query does NOT trace through user-defined +// helper functions, so the sanitizer pattern must appear in the +// same function scope as the filesystem call. The cost of the +// duplication is a few string comparisons; the benefit is that the +// analyzer sees every fs call as locally sanitized. // ============================================================================ const MODEL_ID_RE = /^[A-Za-z0-9_-]{1,64}$/; @@ -79,33 +79,6 @@ function assertSafeFilename(filename: string): void { } } -/** - * Returns `path.resolve(root, ...segments)` only when the resolved result - * is contained within (or equal to) the resolved root. Throws otherwise. - * - * This implements the recommended path-injection mitigation: - * normalize with `path.resolve` (which collapses `..` segments) and then - * verify the result begins with the resolved root. - * - * NOTE: CodeQL's `js/path-injection` query does NOT trace through this - * helper — the sanitizer pattern must appear inline at each fs call site - * to satisfy the analyzer. This helper is retained as defense-in-depth - * and for callers where CodeQL recognition is not required. - */ -function safeJoinUnderRoot(root: string, ...segments: string[]): string { - const resolvedRoot = path.resolve(root); - const candidate = path.resolve(resolvedRoot, ...segments); - if ( - candidate !== resolvedRoot && - !candidate.startsWith(resolvedRoot + path.sep) - ) { - throw new Error( - `Path escape detected: ${JSON.stringify(candidate)} is not within ${JSON.stringify(resolvedRoot)}` - ); - } - return candidate; -} - let _sdk: NeedleSdkModule | undefined; let _sdkInitPromise: Promise | undefined; @@ -151,20 +124,6 @@ function modelsDirOf(model: CactusModelConfig): string { return model.provider_config.models_dir ?? CACTUS_DEFAULT_MODELS_DIR; } -function resolveModelDir(models_dir: string, model_id: string): string { - assertSafeModelId(model_id); - // First resolve the models root (handle `~/` expansion), then safe-join - // the model_id onto it. `safeJoinUnderRoot` provides defense-in-depth - // containment for non-CodeQL-recognized callers. - const base = models_dir.startsWith("~/") - ? path.resolve( - process.env.HOME ?? process.env.USERPROFILE ?? ".", - models_dir.slice(2) - ) - : path.resolve(models_dir); - return safeJoinUnderRoot(base, model_id); -} - function assetFilenames(entry: CactusCatalogEntry): string[] { return assetSpecsOf(entry).map((s) => s.filename); } @@ -195,8 +154,8 @@ async function getNodeAssetCacheInfo( const models_dir = modelsDirOf(model); const model_id = entry.model_id; assertSafeModelId(model_id); - // Compute the resolved model dir inline (not via resolveModelDir) so - // CodeQL's js/path-injection query can trace the sanitizer locally. + // Compute the resolved model dir inline so CodeQL's js/path-injection + // query can trace the sanitizer locally. const safeRoot = models_dir.startsWith("~/") ? path.resolve( process.env.HOME ?? process.env.USERPROFILE ?? ".", @@ -212,8 +171,7 @@ async function getNodeAssetCacheInfo( const stats = await Promise.all( filenames.map(async (filename) => { assertSafeFilename(filename); - // Inline sanitizer for CodeQL js/path-injection (helper functions - // aren't traced through by the query — re-check at the fs call site). + // Inline sanitizer at the fs call site. const target = path.resolve(resolvedDir, filename); if (target !== resolvedDir && !target.startsWith(resolvedDir + path.sep)) { return { filename, size: undefined, cached: false }; @@ -315,8 +273,8 @@ async function fetchAssetBytesNode( ): Promise { assertSafeModelId(model_id); assertSafeFilename(spec.filename); - // Compute the resolved model dir inline (not via resolveModelDir) so - // CodeQL's js/path-injection query can trace the sanitizer locally. + // Compute the resolved model dir inline so CodeQL's js/path-injection + // query can trace the sanitizer locally. const safeRoot = models_dir.startsWith("~/") ? path.resolve( process.env.HOME ?? process.env.USERPROFILE ?? ".", @@ -329,8 +287,8 @@ async function fetchAssetBytesNode( `Path escape detected: ${JSON.stringify(resolvedDir)} is not within ${JSON.stringify(safeRoot)}` ); } - // Inline sanitizer for CodeQL js/path-injection — duplicated at every - // fs call site below since the query doesn't trace through helpers. + // Used for the error-context URL only — not for any fs.* call (those + // recompute path.resolve locally so CodeQL sees the inline sanitizer). const filePath = path.resolve(resolvedDir, spec.filename); if (filePath !== resolvedDir && !filePath.startsWith(resolvedDir + path.sep)) { throw new Error( @@ -383,7 +341,7 @@ async function fetchAssetBytesNode( } // Verify BEFORE writing the tmp file — never atomically promote unverified bytes. await verifySha256(bytes, spec.sha256, { url, filename: spec.filename }); - // Inline sanitizer for the mkdir target — CodeQL needs the check local. + // Inline sanitizer for the mkdir target. const mkdirTarget = path.resolve(safeRoot, model_id); if (mkdirTarget !== safeRoot && !mkdirTarget.startsWith(safeRoot + path.sep)) { throw new Error( @@ -391,13 +349,10 @@ async function fetchAssetBytesNode( ); } await fs.mkdir(mkdirTarget, { recursive: true }); - // tmpPath is a sibling of filePath inside the (verified-contained) resolvedDir. - // The string-concat `${filePath}.tmp` inherits containment from filePath, but - // we also recompute via path.resolve below at each fs call so CodeQL sees the - // sanitizer locally. - const tmpPath = `${filePath}.tmp`; + // Atomic write: write to a sibling `.tmp` path, then rename. Each fs + // call below recomputes its path via path.resolve so CodeQL sees the + // inline sanitizer at every call site. try { - // Recompute the write target via path.resolve so the sanitizer is local. const writeTarget = path.resolve(resolvedDir, `${spec.filename}.tmp`); if (writeTarget !== resolvedDir && !writeTarget.startsWith(resolvedDir + path.sep)) { throw new Error( @@ -405,7 +360,6 @@ async function fetchAssetBytesNode( ); } await fs.writeFile(writeTarget, bytes); - // Recompute both rename endpoints locally for CodeQL. const renameFrom = path.resolve(resolvedDir, `${spec.filename}.tmp`); if (renameFrom !== resolvedDir && !renameFrom.startsWith(resolvedDir + path.sep)) { throw new Error( @@ -420,7 +374,6 @@ async function fetchAssetBytesNode( } await fs.rename(renameFrom, renameTo); } catch (err) { - // Recompute the cleanup target locally for CodeQL. const cleanupTarget = path.resolve(resolvedDir, `${spec.filename}.tmp`); if (cleanupTarget !== resolvedDir && !cleanupTarget.startsWith(resolvedDir + path.sep)) { throw err; @@ -428,9 +381,6 @@ async function fetchAssetBytesNode( await fs.unlink(cleanupTarget).catch(() => {}); throw err; } - // Silence unused-binding warning: `tmpPath` is retained for clarity in the - // atomicity contract documented above. - void tmpPath; return bytes; } @@ -620,8 +570,8 @@ async function removeNodeCacheDir(model: CactusModelConfig, model_id: string): P if (hasBrowserCacheStorage()) return; assertSafeModelId(model_id); const models_dir = modelsDirOf(model); - // Compute the resolved model dir inline (not via resolveModelDir) so - // CodeQL's js/path-injection query can trace the sanitizer locally. + // Compute the resolved model dir inline so CodeQL's js/path-injection + // query can trace the sanitizer locally. const safeRoot = models_dir.startsWith("~/") ? path.resolve( process.env.HOME ?? process.env.USERPROFILE ?? ".",