From a911c5467642edc885714f5225baea7a345a6317 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 00:33:44 +0000 Subject: [PATCH 1/8] feat(ai): IBackendsTransport interface + provider package scaffolding --- bun.lock | 54 +++++ packages/ai/src/provider-utils.ts | 1 + .../src/provider-utils/IBackendsTransport.ts | 120 ++++++++++ packages/test/package.json | 3 + .../LocalBackendsProviderContracts.test.ts | 197 ++++++++++++++++ .../test/ai/IBackendsTransport.types.test.ts | 75 +++++++ providers/llamacpp-server/package.json | 69 ++++++ providers/llamacpp-server/src/ai-runtime.ts | 9 + providers/llamacpp-server/src/ai.ts | 9 + .../src/ai/LlamaCppServerProvider.ts | 210 ++++++++++++++++++ .../src/ai/common/LlamaCppServer_Constants.ts | 7 + providers/llamacpp-server/src/ai/index.ts | 11 + .../src/ai/registerLlamaCppServer.ts | 27 +++ providers/llamacpp-server/src/ai/runtime.ts | 9 + providers/llamacpp-server/tsconfig.json | 29 +++ providers/mlx/package.json | 69 ++++++ providers/mlx/src/ai-runtime.ts | 9 + providers/mlx/src/ai.ts | 9 + providers/mlx/src/ai/MlxProvider.ts | 76 +++++++ providers/mlx/src/ai/common/Mlx_Constants.ts | 9 + providers/mlx/src/ai/index.ts | 11 + providers/mlx/src/ai/registerMlx.ts | 15 ++ providers/mlx/src/ai/runtime.ts | 9 + providers/mlx/tsconfig.json | 29 +++ .../stable-diffusion-server/package.json | 69 ++++++ .../stable-diffusion-server/src/ai-runtime.ts | 9 + providers/stable-diffusion-server/src/ai.ts | 9 + .../src/ai/StableDiffusionCppProvider.ts | 156 +++++++++++++ .../ai/common/StableDiffusionCpp_Constants.ts | 7 + .../stable-diffusion-server/src/ai/index.ts | 11 + .../src/ai/registerStableDiffusionCpp.ts | 28 +++ .../stable-diffusion-server/src/ai/runtime.ts | 9 + .../stable-diffusion-server/tsconfig.json | 29 +++ 33 files changed, 1393 insertions(+) create mode 100644 packages/ai/src/provider-utils/IBackendsTransport.ts create mode 100644 packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts create mode 100644 packages/test/src/test/ai/IBackendsTransport.types.test.ts create mode 100644 providers/llamacpp-server/package.json create mode 100644 providers/llamacpp-server/src/ai-runtime.ts create mode 100644 providers/llamacpp-server/src/ai.ts create mode 100644 providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts create mode 100644 providers/llamacpp-server/src/ai/index.ts create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServer.ts create mode 100644 providers/llamacpp-server/src/ai/runtime.ts create mode 100644 providers/llamacpp-server/tsconfig.json create mode 100644 providers/mlx/package.json create mode 100644 providers/mlx/src/ai-runtime.ts create mode 100644 providers/mlx/src/ai.ts create mode 100644 providers/mlx/src/ai/MlxProvider.ts create mode 100644 providers/mlx/src/ai/common/Mlx_Constants.ts create mode 100644 providers/mlx/src/ai/index.ts create mode 100644 providers/mlx/src/ai/registerMlx.ts create mode 100644 providers/mlx/src/ai/runtime.ts create mode 100644 providers/mlx/tsconfig.json create mode 100644 providers/stable-diffusion-server/package.json create mode 100644 providers/stable-diffusion-server/src/ai-runtime.ts create mode 100644 providers/stable-diffusion-server/src/ai.ts create mode 100644 providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts create mode 100644 providers/stable-diffusion-server/src/ai/index.ts create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts create mode 100644 providers/stable-diffusion-server/src/ai/runtime.ts create mode 100644 providers/stable-diffusion-server/tsconfig.json diff --git a/bun.lock b/bun.lock index bbef45094..0177b1e86 100644 --- a/bun.lock +++ b/bun.lock @@ -301,13 +301,16 @@ "@workglow/javascript": "workspace:*", "@workglow/job-queue": "workspace:*", "@workglow/knowledge-base": "workspace:*", + "@workglow/llamacpp-server": "workspace:*", "@workglow/mcp": "workspace:*", + "@workglow/mlx": "workspace:*", "@workglow/node-llama-cpp": "workspace:*", "@workglow/ollama": "workspace:*", "@workglow/openai": "workspace:*", "@workglow/playwright": "workspace:*", "@workglow/postgres": "workspace:*", "@workglow/sqlite": "workspace:*", + "@workglow/stable-diffusion-server": "workspace:*", "@workglow/storage": "workspace:*", "@workglow/supabase": "workspace:*", "@workglow/task-graph": "workspace:*", @@ -541,6 +544,36 @@ "@workglow/util": "workspace:*", }, }, + "providers/llamacpp-server": { + "name": "@workglow/llamacpp-server", + "version": "0.0.1", + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*", + }, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*", + }, + }, + "providers/mlx": { + "name": "@workglow/mlx", + "version": "0.0.1", + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*", + }, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*", + }, + }, "providers/node-llama-cpp": { "name": "@workglow/node-llama-cpp", "version": "0.3.5", @@ -666,6 +699,21 @@ "better-sqlite3", ], }, + "providers/stable-diffusion-server": { + "name": "@workglow/stable-diffusion-server", + "version": "0.0.1", + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*", + }, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*", + }, + }, "providers/supabase": { "name": "@workglow/supabase", "version": "0.3.5", @@ -1405,8 +1453,12 @@ "@workglow/knowledge-base": ["@workglow/knowledge-base@workspace:packages/knowledge-base"], + "@workglow/llamacpp-server": ["@workglow/llamacpp-server@workspace:providers/llamacpp-server"], + "@workglow/mcp": ["@workglow/mcp@workspace:packages/mcp"], + "@workglow/mlx": ["@workglow/mlx@workspace:providers/mlx"], + "@workglow/node-llama-cpp": ["@workglow/node-llama-cpp@workspace:providers/node-llama-cpp"], "@workglow/ollama": ["@workglow/ollama@workspace:providers/ollama"], @@ -1419,6 +1471,8 @@ "@workglow/sqlite": ["@workglow/sqlite@workspace:providers/sqlite"], + "@workglow/stable-diffusion-server": ["@workglow/stable-diffusion-server@workspace:providers/stable-diffusion-server"], + "@workglow/storage": ["@workglow/storage@workspace:packages/storage"], "@workglow/supabase": ["@workglow/supabase@workspace:providers/supabase"], diff --git a/packages/ai/src/provider-utils.ts b/packages/ai/src/provider-utils.ts index ba076f02c..832ec1970 100644 --- a/packages/ai/src/provider-utils.ts +++ b/packages/ai/src/provider-utils.ts @@ -23,3 +23,4 @@ export * from "./provider-utils/imageOutputHelpers"; export * from "./provider-utils/BaseCloudProvider"; export * from "./provider-utils/CloudProviderClient"; export * from "./provider-utils/OpenAIShapedChat"; +export * from "./provider-utils/IBackendsTransport"; diff --git a/packages/ai/src/provider-utils/IBackendsTransport.ts b/packages/ai/src/provider-utils/IBackendsTransport.ts new file mode 100644 index 000000000..f4833482a --- /dev/null +++ b/packages/ai/src/provider-utils/IBackendsTransport.ts @@ -0,0 +1,120 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// ──────────────────────────────────────────────────────────────────────────── +// IBackendsTransport — renderer-side abstraction over backend transport +// +// Provider packages (libs) consume ONLY this interface. No platform-specific +// imports are permitted here; concrete implementations live elsewhere. +// ──────────────────────────────────────────────────────────────────────────── + +/** + * Request payload for `IBackendsTransport.ensureRunning`. + * + * `backend` is a plain string (not the BackendName union) so that provider + * packages do not need to know the closed set of backend identifiers. The + * transport implementation resolves it to the host's concrete backend identifier. + */ +export interface IEnsureRunningRequest { + /** Backend identifier, e.g. "llamacpp-server". */ + readonly backend: string; + /** Absolute path to the model file. */ + readonly modelPath: string; + /** + * Backend-specific runtime options forwarded to the broker as opaque JSON. + * llamacpp uses `{ ctx: number }`; sd-cpp passes an empty object `{}`; + * future backends define their own schema. + */ + readonly opts: Readonly>; +} + +/** + * Handle returned by a successful `ensureRunning` call. + * + * Callers MUST call `release()` when done to decrement the broker's refcount. + * After all handles for a backend are released, the broker may shut down the + * backend process after its idle timeout. + */ +export interface IRunningHandle { + /** Base URL of the running backend, e.g. "http://127.0.0.1:8765". */ + readonly url: string; + /** + * Decrements the broker's refcount for this handle. The backend may shut + * down after the broker's idle timeout if refcount reaches zero. + * + * The returned promise resolves once the release message has been posted + * to the port; the broker does not acknowledge. Errors posting (e.g. port + * closed) reject. + */ + readonly release: () => Promise; +} + +/** + * Status snapshot for a backend. + * + * Mirrors the host transport's backend status snapshot shape without coupling + * this shared interface to any package-private implementation path. + */ +export interface IBackendStatus { + readonly state: "not-installed" | "installed" | "running" | "error"; + readonly message: string | undefined; + readonly pinnedVersion: string | undefined; +} + +/** + * Renderer-side transport abstraction for the backends broker. + * + * Concrete implementations obtain a channel from their host environment and + * speak whatever request/response protocol that host defines. + * + * Provider packages import ONLY this interface — no platform-specific imports. + */ +export interface IBackendsTransport { + /** + * Acquire (or share) a running backend. Resolves once the backend is healthy. + * + * Multiple callers requesting the same `(backend, modelPath, opts)` triple + * will share one process via the broker's refcounting. `release()` on the + * returned handle decrements the refcount. + */ + ensureRunning(req: IEnsureRunningRequest): Promise; + + /** + * Subscribe to status updates for a backend. + * + * The callback fires on every subsequent broker `status` event; callers + * wanting an initial snapshot must invoke `list()`. Subscriptions persist + * across port reconnects. Implementations MUST be idempotent: calling the + * returned unsubscribe twice is a no-op; subscribing the same callback + * twice is allowed and de-duplicated. + * + * @returns An unsubscribe function. Call it to stop receiving updates. + */ + subscribeStatus(backend: string, callback: (status: IBackendStatus) => void): () => void; + + /** + * Install a backend (download + verify + extract). Resolves when the backend + * reaches the "installed" state. Rejects on download / verification failure. + * + * Progress is reported via the optional callback as `(bytesReceived, totalBytes)`. + * `total` may be 0 if the content-length is unknown. + */ + install(backend: string, onProgress?: (bytes: number, total: number) => void): Promise; + + /** + * Fire-and-forget request for the broker to broadcast a `status` + * event for every backend in its registry. Resolves once the request + * has been posted (the broker does not send a discrete reply). + */ + list(): Promise; + + /** + * Removes the backend's installed binary. In v1 the broker rejects + * this with an error; callers should handle the rejection. Future + * versions may implement teardown semantics. + */ + uninstall(backend: string): Promise; +} diff --git a/packages/test/package.json b/packages/test/package.json index c52ace208..faf78286a 100644 --- a/packages/test/package.json +++ b/packages/test/package.json @@ -47,13 +47,16 @@ "@workglow/javascript": "workspace:*", "@workglow/job-queue": "workspace:*", "@workglow/knowledge-base": "workspace:*", + "@workglow/llamacpp-server": "workspace:*", "@workglow/mcp": "workspace:*", + "@workglow/mlx": "workspace:*", "@workglow/node-llama-cpp": "workspace:*", "@workglow/ollama": "workspace:*", "@workglow/openai": "workspace:*", "@workglow/playwright": "workspace:*", "@workglow/postgres": "workspace:*", "@workglow/sqlite": "workspace:*", + "@workglow/stable-diffusion-server": "workspace:*", "@workglow/storage": "workspace:*", "@workglow/supabase": "workspace:*", "@workglow/task-graph": "workspace:*", diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts new file mode 100644 index 000000000..7e0f04c5d --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts @@ -0,0 +1,197 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn } from "@workglow/ai"; +import { + AiProviderRegistry, + createEmitQueue, + getAiProviderRegistry, + setAiProviderRegistry, +} from "@workglow/ai"; +import type { + IBackendStatus, + IBackendsTransport, + IEnsureRunningRequest, + IRunningHandle, +} from "@workglow/ai/provider-utils"; +import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; +import { LlamaCppServerProvider } from "@workglow/llamacpp-server/ai"; +import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("@workglow/ai/provider-utils", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + pngBytesToImageValue: vi.fn(), + }; +}); + +const originalFetch = globalThis.fetch; +const MOCK_IMAGE = { kind: "mock-image" } as const; + +interface ITransportStub { + readonly ensureRunning: ReturnType; + readonly release: ReturnType; + readonly transport: IBackendsTransport; +} + +function createTransportStub(url = "http://127.0.0.1:8765"): ITransportStub { + const release = vi.fn(async (): Promise => undefined); + const ensureRunning = vi.fn( + async (_req: IEnsureRunningRequest): Promise => ({ + url, + release, + }) + ); + const transport: IBackendsTransport = { + ensureRunning, + subscribeStatus: ( + _backend: string, + _callback: (status: IBackendStatus) => void + ): (() => void) => { + return (): void => undefined; + }, + install: async (_backend: string): Promise => undefined, + list: async (): Promise => undefined, + uninstall: async (_backend: string): Promise => undefined, + }; + + return { ensureRunning, release, transport }; +} + +async function runProviderStream( + runFn: AiProviderRunFn, + input: Record, + model: Record, + timeoutMs = 100 +): Promise { + const q = createEmitQueue(); + const events: unknown[] = []; + const controller = new AbortController(); + + const runPromise = runFn(input as any, model as any, controller.signal, (event: unknown) => + q.push(event) + ).then( + () => q.close(), + (error: unknown) => q.fail(error) + ); + const consumePromise = (async (): Promise => { + for await (const event of q.iterable) { + events.push(event); + } + })(); + + await Promise.race([ + Promise.all([runPromise, consumePromise]), + new Promise((_, reject) => { + setTimeout(() => reject(new Error(`timed out after ${timeoutMs}ms`)), timeoutMs); + }), + ]); + + return events; +} + +describe("local backend provider stream contracts", () => { + beforeEach(() => { + setAiProviderRegistry(new AiProviderRegistry()); + globalThis.fetch = originalFetch; + vi.clearAllMocks(); + vi.mocked(pngBytesToImageValue).mockResolvedValue(MOCK_IMAGE as any); + }); + + afterEach(() => { + globalThis.fetch = originalFetch; + }); + + it("llama.cpp stops after [DONE] even if the server keeps the stream open", async () => { + const { release, transport } = createTransportStub(); + const provider = new LlamaCppServerProvider({ transport }); + await provider.register(); + + const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["text.generation"]); + expect(runFn).toBeDefined(); + + const payload = new TextEncoder().encode( + 'data: {"choices":[{"delta":{"content":"hello"}}]}\n\ndata: [DONE]\n\n' + ); + + let resolvePendingRead: + | ((value: { readonly done: boolean; readonly value?: Uint8Array }) => void) + | undefined; + let readCount = 0; + const reader = { + read: vi.fn(() => { + readCount += 1; + if (readCount === 1) { + return Promise.resolve({ done: false, value: payload }); + } + return new Promise<{ readonly done: boolean; readonly value?: Uint8Array }>((resolve) => { + resolvePendingRead = resolve; + }); + }), + cancel: vi.fn(async (): Promise => { + resolvePendingRead?.({ done: true }); + }), + releaseLock: vi.fn((): void => undefined), + }; + + globalThis.fetch = vi.fn( + async () => + ({ + ok: true, + body: { getReader: () => reader }, + }) as unknown as Response + ) as unknown as typeof fetch; + + const events = await runProviderStream( + runFn!, + { prompt: "hello" }, + { model_id: "/models/llama.gguf" } + ); + + expect(events).toEqual([ + { type: "text-delta", port: "text", textDelta: "hello" }, + { type: "finish", data: {} }, + ]); + expect(reader.releaseLock).toHaveBeenCalledTimes(1); + expect(release).toHaveBeenCalledTimes(1); + }); + + it("stable-diffusion emits the generated image as a snapshot before finish", async () => { + const { release, transport } = createTransportStub(); + const provider = new StableDiffusionCppProvider({ transport }); + await provider.register(); + + const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["image.generation"]); + expect(runFn).toBeDefined(); + + globalThis.fetch = vi.fn( + async () => + ({ + ok: true, + json: async (): Promise<{ images: string[] }> => ({ + images: [ + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/aFEAAAAASUVORK5CYII=", + ], + }), + }) as unknown as Response + ) as unknown as typeof fetch; + + const events = await runProviderStream( + runFn!, + { prompt: "draw a cat" }, + { model_id: "/models/stable-diffusion.gguf" } + ); + + expect(events).toEqual([ + { type: "snapshot", data: { image: MOCK_IMAGE } }, + { type: "finish", data: {} }, + ]); + expect(pngBytesToImageValue).toHaveBeenCalledTimes(1); + expect(release).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/test/src/test/ai/IBackendsTransport.types.test.ts b/packages/test/src/test/ai/IBackendsTransport.types.test.ts new file mode 100644 index 000000000..d002ae162 --- /dev/null +++ b/packages/test/src/test/ai/IBackendsTransport.types.test.ts @@ -0,0 +1,75 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// ──────────────────────────────────────────────────────────────────────────── +// Compile-time conformance tests for IBackendsTransport. +// +// These tests run via the test runner but their value is in `tsc` accepting +// (or rejecting) the declarations below. No runtime assertions of substance: +// if the file compiles, the contract holds. +// +// Lives under packages/test/src/test/ai/ so that scripts/test.ts picks it up +// (the runner only scans packages/test/src/test). +// ──────────────────────────────────────────────────────────────────────────── + +import type { + IBackendsTransport, + IBackendStatus, + IEnsureRunningRequest, + IRunningHandle, +} from "@workglow/ai/provider-utils"; +import { expect, test } from "vitest"; + +// `opts` is open — accepts the historic llamacpp shape … +const _checkOptsWithCtx: IEnsureRunningRequest["opts"] = { ctx: 4096 }; +// … the empty shape that sd-cpp uses today (no per-run options) … +const _checkOptsEmpty: IEnsureRunningRequest["opts"] = {}; +// … and arbitrary shapes future backends may define. +const _checkOptsArbitrary: IEnsureRunningRequest["opts"] = { foo: "bar", n: 42 }; + +// Interface exposes `list` and `uninstall` alongside the existing methods. +type _Methods = keyof IBackendsTransport; +const _hasList: _Methods = "list"; +const _hasUninstall: _Methods = "uninstall"; + +// Structural conformance using explicit parameter signatures: TypeScript +// allows assigning `() => X` to `(arg: T) => X`, so a zero-arg dummy would +// silently accept a parameter-list change. Spelling each signature out +// forces a typecheck failure on any rename, type change, or return-type +// change to a method of `IBackendsTransport`. +const _conforms: IBackendsTransport = { + ensureRunning: (_req: IEnsureRunningRequest): Promise => { + return Promise.resolve({ + url: "http://127.0.0.1:0", + release: (): Promise => Promise.resolve(), + }); + }, + subscribeStatus: ( + _backend: string, + _callback: (status: IBackendStatus) => void + ): (() => void) => { + return (): void => undefined; + }, + install: ( + _backend: string, + _onProgress?: (bytes: number, total: number) => void + ): Promise => Promise.resolve(), + list: (): Promise => Promise.resolve(), + uninstall: (_backend: string): Promise => Promise.resolve(), +}; + +// Silence `no-unused-vars` / `noUnusedLocals` on the type-only assertions. +void _checkOptsWithCtx; +void _checkOptsEmpty; +void _checkOptsArbitrary; +void _hasList; +void _hasUninstall; +void _conforms; + +// Vitest requires at least one runtime test in the file. +test("IBackendsTransport conformance compiles", () => { + expect(true).toBe(true); +}); diff --git a/providers/llamacpp-server/package.json b/providers/llamacpp-server/package.json new file mode 100644 index 000000000..21f794b3a --- /dev/null +++ b/providers/llamacpp-server/package.json @@ -0,0 +1,69 @@ +{ + "name": "@workglow/llamacpp-server", + "type": "module", + "sideEffects": false, + "version": "0.0.1", + "repository": { + "type": "git", + "url": "https://github.com/workglow-dev/libs.git", + "directory": "providers/llamacpp-server" + }, + "description": "OpenAI-compatible HTTP client for an external or embedded llama-server", + "scripts": { + "watch": "concurrently -c 'auto' 'bun:watch-*'", + "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "watch-types": "tsc --watch --preserveWatchOutput", + "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'", + "build-js": "bun run build-code", + "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo", + "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "build-types": "rm -f tsconfig.tsbuildinfo && tsgo", + "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" + }, + "exports": { + "./ai": { + "types": "./dist/ai.d.ts", + "import": "./dist/ai.js" + }, + "./ai-runtime": { + "types": "./dist/ai-runtime.d.ts", + "import": "./dist/ai-runtime.js" + } + }, + "dependencies": {}, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*" + }, + "peerDependenciesMeta": { + "@workglow/ai": { + "optional": false + }, + "@workglow/job-queue": { + "optional": false + }, + "@workglow/storage": { + "optional": false + }, + "@workglow/task-graph": { + "optional": false + }, + "@workglow/util": { + "optional": false + } + }, + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*" + }, + "files": [ + "dist", + "src/**/*.md" + ], + "publishConfig": { + "access": "public" + } +} diff --git a/providers/llamacpp-server/src/ai-runtime.ts b/providers/llamacpp-server/src/ai-runtime.ts new file mode 100644 index 000000000..a1fd9b608 --- /dev/null +++ b/providers/llamacpp-server/src/ai-runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/runtime"; diff --git a/providers/llamacpp-server/src/ai.ts b/providers/llamacpp-server/src/ai.ts new file mode 100644 index 000000000..2210c547d --- /dev/null +++ b/providers/llamacpp-server/src/ai.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/index"; diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts new file mode 100644 index 000000000..88c0eb394 --- /dev/null +++ b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts @@ -0,0 +1,210 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderPreviewRunFn, + AiProviderRunFn, + AiProviderRunFnRegistration, + Capability, + ModelConfig, + ModelRecord, + TextGenerationTaskInput, + TextGenerationTaskOutput, +} from "@workglow/ai"; +import { AiProvider } from "@workglow/ai"; +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants"; + +export interface ILlamaCppServerProviderOptions { + readonly transport: IBackendsTransport; + readonly externalUrl?: string; + /** + * Default context length passed to the broker when launching a backend. + * Picked per request; larger values trade RAM for prompt+output budget. + * Defaults to 4096 if unset. + */ + readonly defaultCtx?: number; +} + +/** + * OpenAI-compatible HTTP chat-completion provider that forwards requests to a + * running llama-server instance. If `externalUrl` is provided the server is + * assumed to already be running; otherwise the provider acquires a handle via + * `transport.ensureRunning` before each request and releases it afterwards. + * + * v1 scope: chat completion only. Other capabilities are not registered; the + * provider serves only chat completion in v1. + */ +export class LlamaCppServerProvider extends AiProvider { + readonly name = LOCAL_LLAMACPP_SERVER; + readonly displayName = "Local llama-server (HTTP)"; + readonly isLocal = true; + readonly supportsBrowser = false; + + constructor(options: ILlamaCppServerProviderOptions) { + const runFns: readonly AiProviderRunFnRegistration< + TextGenerationTaskInput, + TextGenerationTaskOutput, + ModelConfig + >[] = [ + { + serves: ["text.generation"] as readonly Capability[], + runFn: createLlamaCppServerTextGenerationStream(options) as AiProviderRunFn< + TextGenerationTaskInput, + TextGenerationTaskOutput, + ModelConfig + >, + }, + ]; + + const previewTasks: Record< + string, + AiProviderPreviewRunFn + > = {}; + + super(runFns, previewTasks); + } + + override inferCapabilities(model: ModelRecord): readonly Capability[] { + return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"]; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Chat-completion run-fn +// ───────────────────────────────────────────────────────────────────────────── + +interface UnifiedTextGenerationInput extends TextGenerationTaskInput { + readonly messages?: readonly { readonly role: string; readonly content: string }[]; + readonly systemPrompt?: string; +} + +/** + * Build and stream a chat-completion request against a llama-server + * `/v1/chat/completions` endpoint. + * + * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0` + * so {@link AiChatTask} (chat path) and {@link TextGenerationTask} + * (prompt-only path) share the same registered run-fn, consistent with + * the pattern used across workglow providers. + */ +function createLlamaCppServerTextGenerationStream( + options: ILlamaCppServerProviderOptions +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + + const unified = input as UnifiedTextGenerationInput; + const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0; + + const messages = hasMessages + ? [ + ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []), + ...unified.messages!.map((m) => ({ role: m.role, content: m.content })), + ] + : [{ role: "user", content: input.prompt }]; + + const body = JSON.stringify({ + model: model?.model_id ?? "", + messages, + stream: true, + ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}), + ...(input.temperature !== undefined ? { temperature: input.temperature } : {}), + ...(input.topP !== undefined ? { top_p: input.topP } : {}), + ...(input.frequencyPenalty !== undefined + ? { frequency_penalty: input.frequencyPenalty } + : {}), + ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}), + }); + + // Acquire base URL — either from external override or via transport. + let baseUrl: string; + let handle: IRunningHandle | undefined; + + if (options.externalUrl) { + baseUrl = options.externalUrl.replace(/\/$/, ""); + } else { + if (!model?.model_id) { + throw new Error("LlamaCppServerProvider: model.model_id is required to acquire a backend"); + } + handle = await options.transport.ensureRunning({ + backend: "llamacpp-server", + modelPath: model.model_id, + opts: { ctx: options.defaultCtx ?? 4096 }, + }); + baseUrl = handle.url.replace(/\/$/, ""); + } + + try { + signal?.throwIfAborted?.(); + + const response = await fetch(`${baseUrl}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServerProvider: HTTP ${response.status} from /v1/chat/completions — ${text}` + ); + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error("LlamaCppServerProvider: response body is null"); + } + + const decoder = new TextDecoder(); + let buffer = ""; + + try { + let sawDone = false; + while (!sawDone) { + signal?.throwIfAborted?.(); + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.startsWith("data:")) continue; + const data = trimmed.slice(5).trim(); + if (data === "[DONE]") { + sawDone = true; + await reader.cancel().catch(() => undefined); + break; + } + if (!data) continue; + + let chunk: { choices?: { delta?: { content?: string } }[] }; + try { + chunk = JSON.parse(data) as typeof chunk; + } catch { + continue; + } + + const delta = chunk.choices?.[0]?.delta?.content; + if (delta) { + emit({ type: "text-delta", port: "text", textDelta: delta }); + } + } + } + } finally { + reader.releaseLock(); + } + + emit({ type: "finish", data: {} as TextGenerationTaskOutput }); + } finally { + await handle?.release(); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts new file mode 100644 index 000000000..53cc7a8ee --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts @@ -0,0 +1,7 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +export const LOCAL_LLAMACPP_SERVER = "LOCAL_LLAMACPP_SERVER"; diff --git a/providers/llamacpp-server/src/ai/index.ts b/providers/llamacpp-server/src/ai/index.ts new file mode 100644 index 000000000..4f3d7f42a --- /dev/null +++ b/providers/llamacpp-server/src/ai/index.ts @@ -0,0 +1,11 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./common/LlamaCppServer_Constants"; +export * from "./LlamaCppServerProvider"; +export * from "./registerLlamaCppServer"; diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts new file mode 100644 index 000000000..b0f4406aa --- /dev/null +++ b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts @@ -0,0 +1,27 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRegisterOptions } from "@workglow/ai"; +import type { IBackendsTransport } from "@workglow/ai/provider-utils"; +import { registerProviderInline } from "@workglow/ai/provider-utils"; +import { LlamaCppServerProvider } from "./LlamaCppServerProvider"; + +export interface IRegisterLlamaCppServerOptions extends AiProviderRegisterOptions { + readonly transport: IBackendsTransport; + readonly externalUrl?: string; + readonly defaultCtx?: number; +} + +export async function registerLlamaCppServer( + options: IRegisterLlamaCppServerOptions +): Promise { + const { transport, externalUrl, defaultCtx, ...registerOptions } = options; + await registerProviderInline( + new LlamaCppServerProvider({ transport, externalUrl, defaultCtx }), + "LlamaCppServer", + registerOptions + ); +} diff --git a/providers/llamacpp-server/src/ai/runtime.ts b/providers/llamacpp-server/src/ai/runtime.ts new file mode 100644 index 000000000..5a1f42e73 --- /dev/null +++ b/providers/llamacpp-server/src/ai/runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./registerLlamaCppServer"; diff --git a/providers/llamacpp-server/tsconfig.json b/providers/llamacpp-server/tsconfig.json new file mode 100644 index 000000000..4d9631e9b --- /dev/null +++ b/providers/llamacpp-server/tsconfig.json @@ -0,0 +1,29 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "composite": true, + "rootDir": "src", + "outDir": "dist", + "tsBuildInfoFile": "tsconfig.tsbuildinfo" + }, + "include": [ + "src/**/*" + ], + "references": [ + { + "path": "../../packages/util" + }, + { + "path": "../../packages/task-graph" + }, + { + "path": "../../packages/storage" + }, + { + "path": "../../packages/job-queue" + }, + { + "path": "../../packages/ai" + } + ] +} diff --git a/providers/mlx/package.json b/providers/mlx/package.json new file mode 100644 index 000000000..0c490acdd --- /dev/null +++ b/providers/mlx/package.json @@ -0,0 +1,69 @@ +{ + "name": "@workglow/mlx", + "type": "module", + "sideEffects": false, + "version": "0.0.1", + "repository": { + "type": "git", + "url": "https://github.com/workglow-dev/libs.git", + "directory": "providers/mlx" + }, + "description": "MLX provider stub — Python runtime not bundled in v1; see roadmap", + "scripts": { + "watch": "concurrently -c 'auto' 'bun:watch-*'", + "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "watch-types": "tsc --watch --preserveWatchOutput", + "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'", + "build-js": "bun run build-code", + "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo", + "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "build-types": "rm -f tsconfig.tsbuildinfo && tsgo", + "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" + }, + "exports": { + "./ai": { + "types": "./dist/ai.d.ts", + "import": "./dist/ai.js" + }, + "./ai-runtime": { + "types": "./dist/ai-runtime.d.ts", + "import": "./dist/ai-runtime.js" + } + }, + "dependencies": {}, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*" + }, + "peerDependenciesMeta": { + "@workglow/ai": { + "optional": false + }, + "@workglow/job-queue": { + "optional": false + }, + "@workglow/storage": { + "optional": false + }, + "@workglow/task-graph": { + "optional": false + }, + "@workglow/util": { + "optional": false + } + }, + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*" + }, + "files": [ + "dist", + "src/**/*.md" + ], + "publishConfig": { + "access": "public" + } +} diff --git a/providers/mlx/src/ai-runtime.ts b/providers/mlx/src/ai-runtime.ts new file mode 100644 index 000000000..a1fd9b608 --- /dev/null +++ b/providers/mlx/src/ai-runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/runtime"; diff --git a/providers/mlx/src/ai.ts b/providers/mlx/src/ai.ts new file mode 100644 index 000000000..2210c547d --- /dev/null +++ b/providers/mlx/src/ai.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/index"; diff --git a/providers/mlx/src/ai/MlxProvider.ts b/providers/mlx/src/ai/MlxProvider.ts new file mode 100644 index 000000000..a73a87aac --- /dev/null +++ b/providers/mlx/src/ai/MlxProvider.ts @@ -0,0 +1,76 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// TODO(roadmap): when Python runtime bundling lands, replace this stub with a real MLX provider. + +import type { + AiProviderPreviewRunFn, + AiProviderRunFn, + AiProviderRunFnRegistration, + Capability, + ModelConfig, + ModelRecord, + TextGenerationTaskInput, + TextGenerationTaskOutput, +} from "@workglow/ai"; +import { AiProvider } from "@workglow/ai"; +import { LOCAL_MLX } from "./common/Mlx_Constants"; + +/** + * MLX provider stub. + * + * The MLX runtime requires a Python environment which is not bundled in v1. + * The provider registers cleanly so the UI can list it, but all inference + * calls throw immediately. See roadmap for Python runtime bundling plans. + */ +export class MlxProvider extends AiProvider { + readonly name = LOCAL_MLX; + readonly displayName = "Local MLX (Apple Silicon)"; + readonly isLocal = true; + readonly supportsBrowser = false; + + constructor() { + const runFns: readonly AiProviderRunFnRegistration< + TextGenerationTaskInput, + TextGenerationTaskOutput, + ModelConfig + >[] = [ + { + serves: ["text.generation"] as readonly Capability[], + runFn: mlxNotAvailableRunFn as AiProviderRunFn< + TextGenerationTaskInput, + TextGenerationTaskOutput, + ModelConfig + >, + }, + ]; + + const previewTasks: Record< + string, + AiProviderPreviewRunFn + > = {}; + + super(runFns, previewTasks); + } + + override inferCapabilities(model: ModelRecord): readonly Capability[] { + return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"]; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Stub run-fn — always throws +// ───────────────────────────────────────────────────────────────────────────── + +const mlxNotAvailableRunFn: AiProviderRunFn< + TextGenerationTaskInput, + TextGenerationTaskOutput, + ModelConfig +> = async (_input, _model, _signal, _emit) => { + throw new Error( + "MLX provider not available: Python runtime not bundled in v1; see roadmap." + ); +}; diff --git a/providers/mlx/src/ai/common/Mlx_Constants.ts b/providers/mlx/src/ai/common/Mlx_Constants.ts new file mode 100644 index 000000000..0c8268ac3 --- /dev/null +++ b/providers/mlx/src/ai/common/Mlx_Constants.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// TODO(roadmap): when Python runtime bundling lands, replace this stub with a real MLX provider. + +export const LOCAL_MLX = "LOCAL_MLX"; diff --git a/providers/mlx/src/ai/index.ts b/providers/mlx/src/ai/index.ts new file mode 100644 index 000000000..c5e3ca5f3 --- /dev/null +++ b/providers/mlx/src/ai/index.ts @@ -0,0 +1,11 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./common/Mlx_Constants"; +export * from "./MlxProvider"; +export * from "./registerMlx"; diff --git a/providers/mlx/src/ai/registerMlx.ts b/providers/mlx/src/ai/registerMlx.ts new file mode 100644 index 000000000..ce0cbd3bc --- /dev/null +++ b/providers/mlx/src/ai/registerMlx.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRegisterOptions } from "@workglow/ai"; +import { registerProviderInline } from "@workglow/ai/provider-utils"; +import { MlxProvider } from "./MlxProvider"; + +export interface IRegisterMlxOptions extends AiProviderRegisterOptions {} + +export async function registerMlx(options: IRegisterMlxOptions): Promise { + await registerProviderInline(new MlxProvider(), "Mlx", options); +} diff --git a/providers/mlx/src/ai/runtime.ts b/providers/mlx/src/ai/runtime.ts new file mode 100644 index 000000000..78cd855e6 --- /dev/null +++ b/providers/mlx/src/ai/runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./registerMlx"; diff --git a/providers/mlx/tsconfig.json b/providers/mlx/tsconfig.json new file mode 100644 index 000000000..4d9631e9b --- /dev/null +++ b/providers/mlx/tsconfig.json @@ -0,0 +1,29 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "composite": true, + "rootDir": "src", + "outDir": "dist", + "tsBuildInfoFile": "tsconfig.tsbuildinfo" + }, + "include": [ + "src/**/*" + ], + "references": [ + { + "path": "../../packages/util" + }, + { + "path": "../../packages/task-graph" + }, + { + "path": "../../packages/storage" + }, + { + "path": "../../packages/job-queue" + }, + { + "path": "../../packages/ai" + } + ] +} diff --git a/providers/stable-diffusion-server/package.json b/providers/stable-diffusion-server/package.json new file mode 100644 index 000000000..b3718e830 --- /dev/null +++ b/providers/stable-diffusion-server/package.json @@ -0,0 +1,69 @@ +{ + "name": "@workglow/stable-diffusion-server", + "type": "module", + "sideEffects": false, + "version": "0.0.1", + "repository": { + "type": "git", + "url": "https://github.com/workglow-dev/libs.git", + "directory": "providers/stable-diffusion-server" + }, + "description": "Local sd.cpp HTTP client for an external or embedded stable-diffusion.cpp server", + "scripts": { + "watch": "concurrently -c 'auto' 'bun:watch-*'", + "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "watch-types": "tsc --watch --preserveWatchOutput", + "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'", + "build-js": "bun run build-code", + "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo", + "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "build-types": "rm -f tsconfig.tsbuildinfo && tsgo", + "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" + }, + "exports": { + "./ai": { + "types": "./dist/ai.d.ts", + "import": "./dist/ai.js" + }, + "./ai-runtime": { + "types": "./dist/ai-runtime.d.ts", + "import": "./dist/ai-runtime.js" + } + }, + "dependencies": {}, + "peerDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/job-queue": "workspace:*", + "@workglow/storage": "workspace:*", + "@workglow/task-graph": "workspace:*", + "@workglow/util": "workspace:*" + }, + "peerDependenciesMeta": { + "@workglow/ai": { + "optional": false + }, + "@workglow/job-queue": { + "optional": false + }, + "@workglow/storage": { + "optional": false + }, + "@workglow/task-graph": { + "optional": false + }, + "@workglow/util": { + "optional": false + } + }, + "devDependencies": { + "@workglow/ai": "workspace:*", + "@workglow/util": "workspace:*" + }, + "files": [ + "dist", + "src/**/*.md" + ], + "publishConfig": { + "access": "public" + } +} \ No newline at end of file diff --git a/providers/stable-diffusion-server/src/ai-runtime.ts b/providers/stable-diffusion-server/src/ai-runtime.ts new file mode 100644 index 000000000..a1fd9b608 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai-runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/runtime"; diff --git a/providers/stable-diffusion-server/src/ai.ts b/providers/stable-diffusion-server/src/ai.ts new file mode 100644 index 000000000..2210c547d --- /dev/null +++ b/providers/stable-diffusion-server/src/ai.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/index"; diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts new file mode 100644 index 000000000..2d38c2e99 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts @@ -0,0 +1,156 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderPreviewRunFn, + AiProviderRunFn, + AiProviderRunFnRegistration, + Capability, + ImageGenerateTaskInput, + ImageGenerateTaskOutput, + ModelConfig, + ModelRecord, +} from "@workglow/ai"; +import { AiProvider } from "@workglow/ai"; +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; +import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants"; + +/** + * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img` + * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used + * by OpenAI-compatible builds. Configurable so callers can switch without + * forking the provider while the Phase-8 integration spike is pending. + */ +export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations"; + +export interface IStableDiffusionCppProviderOptions { + readonly transport: IBackendsTransport; + readonly externalUrl?: string; + readonly endpoint?: StableDiffusionCppEndpoint; +} + +/** + * HTTP client for a local stable-diffusion.cpp server. If `externalUrl` is + * provided the server is assumed to already be running; otherwise the provider + * acquires a handle via `transport.ensureRunning` before each request and + * releases it afterwards. + * + * v1 scope: text-to-image only. Other capabilities are not registered; the + * provider serves only image generation in v1. + */ +export class StableDiffusionCppProvider extends AiProvider { + readonly name = LOCAL_STABLE_DIFFUSION_CPP; + readonly displayName = "Local stable-diffusion.cpp (HTTP)"; + readonly isLocal = true; + readonly supportsBrowser = false; + + constructor(options: IStableDiffusionCppProviderOptions) { + const runFns: readonly AiProviderRunFnRegistration< + ImageGenerateTaskInput, + ImageGenerateTaskOutput, + ModelConfig + >[] = [ + { + serves: ["image.generation"] as readonly Capability[], + runFn: createStableDiffusionCppImageGenerateRunFn(options) as AiProviderRunFn< + ImageGenerateTaskInput, + ImageGenerateTaskOutput, + ModelConfig + >, + }, + ]; + + const previewTasks: Record< + string, + AiProviderPreviewRunFn + > = {}; + + super(runFns, previewTasks); + } + + override inferCapabilities(model: ModelRecord): readonly Capability[] { + return (model.capabilities as readonly Capability[] | undefined) ?? ["image.generation"]; + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Image-generation run-fn +// ───────────────────────────────────────────────────────────────────────────── + +/** + * One-shot run-fn for text-to-image generation via stable-diffusion.cpp HTTP server. + * + * Endpoint is selected via {@link IStableDiffusionCppProviderOptions.endpoint} + * (defaults to `/txt2img`). Request: `POST ` with `{ "prompt": "..." }`. + * Response: `{ "images": ["", ...] }` — the first image is used. + */ +function createStableDiffusionCppImageGenerateRunFn( + options: IStableDiffusionCppProviderOptions +): AiProviderRunFn { + const endpoint = options.endpoint ?? "/txt2img"; + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + + const body = JSON.stringify({ prompt: input.prompt }); + + // Acquire base URL — either from external override or via transport. + let baseUrl: string; + let handle: IRunningHandle | undefined; + + if (options.externalUrl) { + baseUrl = options.externalUrl.replace(/\/$/, ""); + } else { + if (!model?.model_id) { + throw new Error( + "StableDiffusionCppProvider: model.model_id is required to acquire a backend" + ); + } + handle = await options.transport.ensureRunning({ + backend: "stable-diffusion-server", + modelPath: model.model_id, + opts: {}, + }); + baseUrl = handle.url.replace(/\/$/, ""); + } + + try { + signal?.throwIfAborted?.(); + + const response = await fetch(`${baseUrl}${endpoint}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `StableDiffusionCppProvider: HTTP ${response.status} from ${endpoint} — ${text}` + ); + } + + const json = (await response.json()) as { images?: string[] }; + const base64 = json.images?.[0]; + if (!base64) { + throw new Error("StableDiffusionCppProvider: response contained no images"); + } + + // Decode base64 PNG bytes platform-neutrally and wrap in an ImageValue. + // Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic. + const binary = atob(base64); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i); + const image = await pngBytesToImageValue(bytes, "png"); + + emit({ type: "snapshot", data: { image } }); + emit({ type: "finish", data: {} as ImageGenerateTaskOutput }); + } finally { + await handle?.release(); + } + }; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts new file mode 100644 index 000000000..56c976ccd --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts @@ -0,0 +1,7 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +export const LOCAL_STABLE_DIFFUSION_CPP = "LOCAL_STABLE_DIFFUSION_CPP"; diff --git a/providers/stable-diffusion-server/src/ai/index.ts b/providers/stable-diffusion-server/src/ai/index.ts new file mode 100644 index 000000000..5df0a99bc --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/index.ts @@ -0,0 +1,11 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./common/StableDiffusionCpp_Constants"; +export * from "./StableDiffusionCppProvider"; +export * from "./registerStableDiffusionCpp"; diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts new file mode 100644 index 000000000..4b5db64dc --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRegisterOptions } from "@workglow/ai"; +import type { IBackendsTransport } from "@workglow/ai/provider-utils"; +import { registerProviderInline } from "@workglow/ai/provider-utils"; +import type { StableDiffusionCppEndpoint } from "./StableDiffusionCppProvider"; +import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider"; + +export interface IRegisterStableDiffusionCppOptions extends AiProviderRegisterOptions { + readonly transport: IBackendsTransport; + readonly externalUrl?: string; + readonly endpoint?: StableDiffusionCppEndpoint; +} + +export async function registerStableDiffusionCpp( + options: IRegisterStableDiffusionCppOptions +): Promise { + const { transport, externalUrl, endpoint, ...registerOptions } = options; + await registerProviderInline( + new StableDiffusionCppProvider({ transport, externalUrl, endpoint }), + "StableDiffusionCpp", + registerOptions + ); +} diff --git a/providers/stable-diffusion-server/src/ai/runtime.ts b/providers/stable-diffusion-server/src/ai/runtime.ts new file mode 100644 index 000000000..094645c24 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/runtime.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./registerStableDiffusionCpp"; diff --git a/providers/stable-diffusion-server/tsconfig.json b/providers/stable-diffusion-server/tsconfig.json new file mode 100644 index 000000000..4d9631e9b --- /dev/null +++ b/providers/stable-diffusion-server/tsconfig.json @@ -0,0 +1,29 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "composite": true, + "rootDir": "src", + "outDir": "dist", + "tsBuildInfoFile": "tsconfig.tsbuildinfo" + }, + "include": [ + "src/**/*" + ], + "references": [ + { + "path": "../../packages/util" + }, + { + "path": "../../packages/task-graph" + }, + { + "path": "../../packages/storage" + }, + { + "path": "../../packages/job-queue" + }, + { + "path": "../../packages/ai" + } + ] +} From 962dfc98f2cfb31475e409cec3c0d883d857a4fa Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Fri, 22 May 2026 22:55:32 +0000 Subject: [PATCH 2/8] ci: remove dependabot --- .github/dependabot.yml | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index e4a93835d..000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,14 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file - -version: 2 -updates: - - package-ecosystem: "bun" - directory: "/" - schedule: - interval: "weekly" - ignore: - - dependency-name: "@types/node" - - dependency-name: "@typescript/native-preview" From c5d5254aefe165674a887c86379ef96ff5b20f12 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 21:12:35 +0000 Subject: [PATCH 3/8] =?UTF-8?q?feat(llamacpp-server):=20@workglow/llamacpp?= =?UTF-8?q?-server=20provider=20=E2=80=94=20run-fns,=20registration,=20and?= =?UTF-8?q?=20barrels?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- providers/llamacpp-server/package.json | 14 +- .../llamacpp-server/src/ai-runtime.browser.ts | 9 + providers/llamacpp-server/src/ai.browser.ts | 9 + .../src/ai/LlamaCppServerProvider.ts | 222 +++--------------- .../src/ai/LlamaCppServerQueuedProvider.ts | 34 +++ .../ai/common/LlamaCppServer_Capabilities.ts | 80 +++++++ .../common/LlamaCppServer_CapabilitySets.ts | 38 +++ .../src/ai/common/LlamaCppServer_Client.ts | 161 +++++++++++++ .../src/ai/common/LlamaCppServer_Constants.ts | 3 + .../src/ai/common/LlamaCppServer_JobRunFns.ts | 57 +++++ .../src/ai/common/LlamaCppServer_ModelInfo.ts | 93 ++++++++ .../ai/common/LlamaCppServer_ModelSchema.ts | 85 +++++++ .../ai/common/LlamaCppServer_ModelSearch.ts | 58 +++++ .../src/ai/common/LlamaCppServer_ModelUtil.ts | 31 +++ .../ai/common/LlamaCppServer_TextEmbedding.ts | 64 +++++ .../common/LlamaCppServer_TextGeneration.ts | 103 ++++++++ .../ai/common/LlamaCppServer_TextRewriter.ts | 57 +++++ .../ai/common/LlamaCppServer_TextSummary.ts | 57 +++++ .../ai/common/LlamaCppServer_ToolCalling.ts | 138 +++++++++++ providers/llamacpp-server/src/ai/index.ts | 19 +- .../src/ai/registerLlamaCppServer.ts | 29 +-- .../src/ai/registerLlamaCppServerInline.ts | 28 +++ .../src/ai/registerLlamaCppServerWorker.ts | 29 +++ providers/llamacpp-server/src/ai/runtime.ts | 12 +- 24 files changed, 1216 insertions(+), 214 deletions(-) create mode 100644 providers/llamacpp-server/src/ai-runtime.browser.ts create mode 100644 providers/llamacpp-server/src/ai.browser.ts create mode 100644 providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts diff --git a/providers/llamacpp-server/package.json b/providers/llamacpp-server/package.json index 21f794b3a..e90c5ca42 100644 --- a/providers/llamacpp-server/package.json +++ b/providers/llamacpp-server/package.json @@ -12,20 +12,30 @@ "scripts": { "watch": "concurrently -c 'auto' 'bun:watch-*'", "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "watch-browser": "bun build --watch --no-clear-screen --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts", "watch-types": "tsc --watch --preserveWatchOutput", - "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'", - "build-js": "bun run build-code", + "build-package": "concurrently -c 'auto' -n 'code,browser,types' 'bun run build-code' 'bun run build-browser' 'bun run build-types'", + "build-js": "concurrently -c 'auto' -n 'code,browser' 'bun run build-code' 'bun run build-browser'", "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo", "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "build-browser": "bun build --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts", "build-types": "rm -f tsconfig.tsbuildinfo && tsgo", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" }, "exports": { "./ai": { + "browser": { + "types": "./dist/ai.d.ts", + "import": "./dist/ai.browser.js" + }, "types": "./dist/ai.d.ts", "import": "./dist/ai.js" }, "./ai-runtime": { + "browser": { + "types": "./dist/ai-runtime.d.ts", + "import": "./dist/ai-runtime.browser.js" + }, "types": "./dist/ai-runtime.d.ts", "import": "./dist/ai-runtime.js" } diff --git a/providers/llamacpp-server/src/ai-runtime.browser.ts b/providers/llamacpp-server/src/ai-runtime.browser.ts new file mode 100644 index 000000000..a1fd9b608 --- /dev/null +++ b/providers/llamacpp-server/src/ai-runtime.browser.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/runtime"; diff --git a/providers/llamacpp-server/src/ai.browser.ts b/providers/llamacpp-server/src/ai.browser.ts new file mode 100644 index 000000000..2210c547d --- /dev/null +++ b/providers/llamacpp-server/src/ai.browser.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/index"; diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts index 88c0eb394..8bc41a5e7 100644 --- a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts +++ b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts @@ -4,207 +4,41 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { - AiProviderPreviewRunFn, - AiProviderRunFn, - AiProviderRunFnRegistration, - Capability, - ModelConfig, - ModelRecord, - TextGenerationTaskInput, - TextGenerationTaskOutput, -} from "@workglow/ai"; -import { AiProvider } from "@workglow/ai"; -import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { createCloudProviderClass } from "@workglow/ai/provider-utils"; +import type { Capability, ModelRecord } from "@workglow/ai/worker"; +import { AiProvider } from "@workglow/ai/worker"; +import { + inferLlamaCppServerCapabilities, + llamaCppServerWorkerRunFnSpecs, +} from "./common/LlamaCppServer_Capabilities"; import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants"; - -export interface ILlamaCppServerProviderOptions { - readonly transport: IBackendsTransport; - readonly externalUrl?: string; - /** - * Default context length passed to the broker when launching a backend. - * Picked per request; larger values trade RAM for prompt+output budget. - * Defaults to 4096 if unset. - */ - readonly defaultCtx?: number; -} +import type { LlamaCppServerModelConfig } from "./common/LlamaCppServer_ModelSchema"; /** - * OpenAI-compatible HTTP chat-completion provider that forwards requests to a - * running llama-server instance. If `externalUrl` is provided the server is - * assumed to already be running; otherwise the provider acquires a handle via - * `transport.ensureRunning` before each request and releases it afterwards. + * Worker-server registration shell for llamacpp-server. Imports `AiProvider` + * from `@workglow/ai/worker` so the worker module graph stays self-contained. * - * v1 scope: chat completion only. Other capabilities are not registered; the - * provider serves only chat completion in v1. + * Both transport and externalUrl modes are supported. The `IBackendsTransport` + * is constructed inside the worker runtime by the caller (e.g., + * `MessagePortBackendsTransport` in the Builder's worker renderer) and held + * by closure inside the run-fns — no port transfer across the worker + * boundary. Worker registration is the primary production path; inline + * registration (`LlamaCppServerQueuedProvider`) is primarily a testing seam. */ -export class LlamaCppServerProvider extends AiProvider { - readonly name = LOCAL_LLAMACPP_SERVER; - readonly displayName = "Local llama-server (HTTP)"; - readonly isLocal = true; - readonly supportsBrowser = false; - - constructor(options: ILlamaCppServerProviderOptions) { - const runFns: readonly AiProviderRunFnRegistration< - TextGenerationTaskInput, - TextGenerationTaskOutput, - ModelConfig - >[] = [ - { - serves: ["text.generation"] as readonly Capability[], - runFn: createLlamaCppServerTextGenerationStream(options) as AiProviderRunFn< - TextGenerationTaskInput, - TextGenerationTaskOutput, - ModelConfig - >, - }, - ]; - - const previewTasks: Record< - string, - AiProviderPreviewRunFn - > = {}; - - super(runFns, previewTasks); +export class LlamaCppServerProvider extends createCloudProviderClass( + AiProvider, + { + name: LOCAL_LLAMACPP_SERVER, + displayName: "Local llama-server (HTTP)", + isLocal: true, + supportsBrowser: true, } - +) { override inferCapabilities(model: ModelRecord): readonly Capability[] { - return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"]; + return inferLlamaCppServerCapabilities(model); } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Chat-completion run-fn -// ───────────────────────────────────────────────────────────────────────────── - -interface UnifiedTextGenerationInput extends TextGenerationTaskInput { - readonly messages?: readonly { readonly role: string; readonly content: string }[]; - readonly systemPrompt?: string; -} - -/** - * Build and stream a chat-completion request against a llama-server - * `/v1/chat/completions` endpoint. - * - * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0` - * so {@link AiChatTask} (chat path) and {@link TextGenerationTask} - * (prompt-only path) share the same registered run-fn, consistent with - * the pattern used across workglow providers. - */ -function createLlamaCppServerTextGenerationStream( - options: ILlamaCppServerProviderOptions -): AiProviderRunFn { - return async (input, model, signal, emit) => { - signal?.throwIfAborted?.(); - - const unified = input as UnifiedTextGenerationInput; - const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0; - const messages = hasMessages - ? [ - ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []), - ...unified.messages!.map((m) => ({ role: m.role, content: m.content })), - ] - : [{ role: "user", content: input.prompt }]; - - const body = JSON.stringify({ - model: model?.model_id ?? "", - messages, - stream: true, - ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}), - ...(input.temperature !== undefined ? { temperature: input.temperature } : {}), - ...(input.topP !== undefined ? { top_p: input.topP } : {}), - ...(input.frequencyPenalty !== undefined - ? { frequency_penalty: input.frequencyPenalty } - : {}), - ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}), - }); - - // Acquire base URL — either from external override or via transport. - let baseUrl: string; - let handle: IRunningHandle | undefined; - - if (options.externalUrl) { - baseUrl = options.externalUrl.replace(/\/$/, ""); - } else { - if (!model?.model_id) { - throw new Error("LlamaCppServerProvider: model.model_id is required to acquire a backend"); - } - handle = await options.transport.ensureRunning({ - backend: "llamacpp-server", - modelPath: model.model_id, - opts: { ctx: options.defaultCtx ?? 4096 }, - }); - baseUrl = handle.url.replace(/\/$/, ""); - } - - try { - signal?.throwIfAborted?.(); - - const response = await fetch(`${baseUrl}/v1/chat/completions`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body, - signal, - }); - - if (!response.ok) { - const text = await response.text().catch(() => "(no body)"); - throw new Error( - `LlamaCppServerProvider: HTTP ${response.status} from /v1/chat/completions — ${text}` - ); - } - - const reader = response.body?.getReader(); - if (!reader) { - throw new Error("LlamaCppServerProvider: response body is null"); - } - - const decoder = new TextDecoder(); - let buffer = ""; - - try { - let sawDone = false; - while (!sawDone) { - signal?.throwIfAborted?.(); - const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split("\n"); - buffer = lines.pop() ?? ""; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed.startsWith("data:")) continue; - const data = trimmed.slice(5).trim(); - if (data === "[DONE]") { - sawDone = true; - await reader.cancel().catch(() => undefined); - break; - } - if (!data) continue; - - let chunk: { choices?: { delta?: { content?: string } }[] }; - try { - chunk = JSON.parse(data) as typeof chunk; - } catch { - continue; - } - - const delta = chunk.choices?.[0]?.delta?.content; - if (delta) { - emit({ type: "text-delta", port: "text", textDelta: delta }); - } - } - } - } finally { - reader.releaseLock(); - } - - emit({ type: "finish", data: {} as TextGenerationTaskOutput }); - } finally { - await handle?.release(); - } - }; + protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] { + return llamaCppServerWorkerRunFnSpecs(); + } } diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts new file mode 100644 index 000000000..70521c6ab --- /dev/null +++ b/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, ModelRecord } from "@workglow/ai"; +import { AiProvider } from "@workglow/ai"; +import { createCloudProviderClass } from "@workglow/ai/provider-utils"; +import { + inferLlamaCppServerCapabilities, + llamaCppServerWorkerRunFnSpecs, +} from "./common/LlamaCppServer_Capabilities"; +import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants"; +import type { LlamaCppServerModelConfig } from "./common/LlamaCppServer_ModelSchema"; + +/** Main-thread registration (inline or worker-backed). */ +export class LlamaCppServerQueuedProvider extends createCloudProviderClass( + AiProvider, + { + name: LOCAL_LLAMACPP_SERVER, + displayName: "Local llama-server (HTTP)", + isLocal: true, + supportsBrowser: true, + } +) { + override inferCapabilities(model: ModelRecord): readonly Capability[] { + return inferLlamaCppServerCapabilities(model); + } + + protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] { + return llamaCppServerWorkerRunFnSpecs(); + } +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts new file mode 100644 index 000000000..14b103184 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts @@ -0,0 +1,80 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, ModelRecord } from "@workglow/ai/worker"; +import { LLAMACPP_SERVER_CAPABILITY_SETS } from "./LlamaCppServer_CapabilitySets"; + +export const LLAMACPP_SERVER_RUN_FN_SPECS = LLAMACPP_SERVER_CAPABILITY_SETS.map((serves) => ({ + serves, +})); + +export function llamaCppServerWorkerRunFnSpecs(): readonly { + readonly serves: readonly Capability[]; +}[] { + return LLAMACPP_SERVER_RUN_FN_SPECS; +} + +const EMBEDDING_NAME_PATTERNS: readonly RegExp[] = [ + /embed/i, + /^nomic-embed/i, + /^mxbai-embed/i, + /^all-minilm/i, + /^snowflake-arctic-embed/i, + /^bge-/i, + /^gte-/i, +]; + +type CapabilityHints = Pick; + +/** + * Heuristic capability inference. Like Ollama, default-permissive: a + * mis-routed model surfaces as a runtime HTTP error rather than a missed + * capability. + * + * 1. `provider_config.native_dimensions` set → embedding model + * 2. Filename matches an embedding pattern → embedding model + * 3. Filename matches llava / bakllava / -vision → vision-capable text-gen + * 4. Any other name → full text-gen + rewriter + summary + tool-use + meta + * 5. No id at all → declared caps OR baseline meta-ops + */ +export function inferLlamaCppServerCapabilities(model: CapabilityHints): readonly Capability[] { + const pc = model.provider_config as + | { model_path?: string; model_name?: string; native_dimensions?: number } + | undefined; + const id = String(pc?.model_path ?? pc?.model_name ?? model.model_id ?? ""); + const base = (id.split("/").pop() ?? "").toLowerCase(); + + if (typeof pc?.native_dimensions === "number") { + return ["text.embedding", "model.info", "model.search"]; + } + if (EMBEDDING_NAME_PATTERNS.some((rx) => rx.test(base))) { + return ["text.embedding", "model.info", "model.search"]; + } + if (/llava|bakllava|-vision\b/.test(base)) { + return [ + "text.generation", + "text.rewriter", + "text.summary", + "tool-use", + "vision-input", + "model.info", + "model.search", + ]; + } + if (base.length > 0) { + return [ + "text.generation", + "text.rewriter", + "text.summary", + "tool-use", + "model.info", + "model.search", + ]; + } + const declared = (model.capabilities as readonly Capability[] | undefined) ?? []; + if (declared.length > 0) return declared; + return ["model.info", "model.search"]; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts new file mode 100644 index 000000000..444b92946 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts @@ -0,0 +1,38 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability } from "@workglow/ai/worker"; + +/** + * Single source of truth for llamacpp-server's capability sets. + * + * Both `LLAMACPP_SERVER_RUN_FNS` (worker-side registration) and + * `workerRunFnSpecs()` derive their `serves` arrays from these named exports. + * SDK-free so the main thread can import them without pulling in fetch code. + */ +export const LLAMACPP_SERVER_TEXT_GENERATION = ["text.generation"] as const satisfies Capability[]; + +export const LLAMACPP_SERVER_TOOL_USE = [ + "text.generation", + "tool-use", +] as const satisfies Capability[]; + +export const LLAMACPP_SERVER_TEXT_REWRITER = ["text.rewriter"] as const satisfies Capability[]; +export const LLAMACPP_SERVER_TEXT_SUMMARY = ["text.summary"] as const satisfies Capability[]; +export const LLAMACPP_SERVER_TEXT_EMBEDDING = ["text.embedding"] as const satisfies Capability[]; +export const LLAMACPP_SERVER_MODEL_SEARCH = ["model.search"] as const satisfies Capability[]; +export const LLAMACPP_SERVER_MODEL_INFO = ["model.info"] as const satisfies Capability[]; + +/** Aggregated list — for `workerRunFnSpecs()` derivation. Order MUST match `LLAMACPP_SERVER_RUN_FNS`. */ +export const LLAMACPP_SERVER_CAPABILITY_SETS = [ + LLAMACPP_SERVER_TEXT_GENERATION, + LLAMACPP_SERVER_TOOL_USE, + LLAMACPP_SERVER_TEXT_REWRITER, + LLAMACPP_SERVER_TEXT_SUMMARY, + LLAMACPP_SERVER_TEXT_EMBEDDING, + LLAMACPP_SERVER_MODEL_SEARCH, + LLAMACPP_SERVER_MODEL_INFO, +] as const; diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts new file mode 100644 index 000000000..232b4316c --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts @@ -0,0 +1,161 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { LLAMACPP_SERVER_DEFAULT_CTX } from "./LlamaCppServer_Constants"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; + +/** + * Provider-construction options shared across registrations. + * + * `transport` and `externalUrl` are both optional, but the resolver throws + * at acquisition time if no URL source resolves for a given request. + */ +export interface ILlamaCppServerProviderOptions { + readonly transport?: IBackendsTransport; + readonly externalUrl?: string; + /** Default context length forwarded to the broker. Falls back to {@link LLAMACPP_SERVER_DEFAULT_CTX}. */ + readonly defaultCtx?: number; +} + +/** Resolved base URL plus a release callback (no-op for externalUrl paths). */ +export interface IAcquiredBaseUrl { + readonly baseUrl: string; + readonly release: () => Promise; +} + +/** + * Resolve a base URL for one request. + * + * Precedence: + * 1. `model.provider_config.base_url` + * 2. `opts.externalUrl` + * 3. `opts.transport.ensureRunning({ ... })` — requires `provider_config.model_path` + * + * Throws with a clear message if none of the three resolves. + */ +export async function acquireBaseUrl( + model: LlamaCppServerModelConfig | undefined, + opts: ILlamaCppServerProviderOptions +): Promise { + const modelBaseUrl = model?.provider_config?.base_url; + if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) { + return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease }; + } + if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) { + return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease }; + } + if (opts.transport) { + const modelPath = model?.provider_config?.model_path; + if (typeof modelPath !== "string" || modelPath.length === 0) { + throw new Error( + "LlamaCppServer: transport-mode acquisition requires provider_config.model_path." + ); + } + const ctx = + typeof model?.provider_config?.ctx === "number" + ? model.provider_config.ctx + : (opts.defaultCtx ?? LLAMACPP_SERVER_DEFAULT_CTX); + const handle: IRunningHandle = await opts.transport.ensureRunning({ + backend: "llamacpp-server", + modelPath, + opts: { ctx }, + }); + return { + baseUrl: stripTrailingSlash(handle.url), + release: () => handle.release(), + }; + } + throw new Error( + "LlamaCppServer: no base URL source — set provider_config.base_url, opts.externalUrl, or opts.transport." + ); +} + +function stripTrailingSlash(url: string): string { + return url.replace(/\/+$/, ""); +} + +const noopRelease = async (): Promise => {}; + +// ── SSE helper ───────────────────────────────────────────────────────────── + +/** One parsed delta from an OpenAI-compatible `/v1/chat/completions` stream. */ +export interface IChatCompletionDelta { + readonly contentDelta?: string; + readonly toolCallDeltas?: ReadonlyArray<{ + readonly index?: number; + readonly id?: string; + readonly type?: string; + readonly function?: { readonly name?: string; readonly arguments?: string }; + }>; + readonly done?: boolean; + readonly finishReason?: string; +} + +/** + * Iterate over `data:` lines from an SSE response body, parsing each into + * an {@link IChatCompletionDelta}. Yields `{ done: true }` on `data: [DONE]`. + * + * The caller passes the `AbortSignal` so per-line throws happen promptly. + * Cancels the reader on abort and on `[DONE]`. + */ +export async function* readChatCompletionDeltas( + response: Response, + signal: AbortSignal | undefined +): AsyncGenerator { + const reader = response.body?.getReader(); + if (!reader) { + throw new Error("LlamaCppServer: response body is null"); + } + const decoder = new TextDecoder(); + let buffer = ""; + try { + let sawDone = false; + while (!sawDone) { + signal?.throwIfAborted?.(); + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.startsWith("data:")) continue; + const data = trimmed.slice(5).trim(); + if (data === "[DONE]") { + sawDone = true; + yield { done: true }; + await reader.cancel().catch(() => undefined); + break; + } + if (!data) continue; + let chunk: { + choices?: Array<{ + delta?: { + content?: string; + tool_calls?: IChatCompletionDelta["toolCallDeltas"]; + }; + finish_reason?: string; + }>; + }; + try { + chunk = JSON.parse(data) as typeof chunk; + } catch { + continue; + } + const choice = chunk.choices?.[0]; + const contentDelta = choice?.delta?.content; + const toolCallDeltas = choice?.delta?.tool_calls; + const finishReason = choice?.finish_reason; + if (contentDelta !== undefined || toolCallDeltas !== undefined || finishReason) { + yield { contentDelta, toolCallDeltas, finishReason }; + } + } + } + } finally { + reader.releaseLock(); + } +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts index 53cc7a8ee..2c8601d0a 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts @@ -5,3 +5,6 @@ */ export const LOCAL_LLAMACPP_SERVER = "LOCAL_LLAMACPP_SERVER"; + +/** Default llama-server context length when no per-request or per-model override is set. */ +export const LLAMACPP_SERVER_DEFAULT_CTX = 4096; diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts new file mode 100644 index 000000000..1ad09c4b3 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts @@ -0,0 +1,57 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFnRegistration } from "@workglow/ai"; +import { + LLAMACPP_SERVER_MODEL_INFO, + LLAMACPP_SERVER_MODEL_SEARCH, + LLAMACPP_SERVER_TEXT_EMBEDDING, + LLAMACPP_SERVER_TEXT_GENERATION, + LLAMACPP_SERVER_TEXT_REWRITER, + LLAMACPP_SERVER_TEXT_SUMMARY, + LLAMACPP_SERVER_TOOL_USE, +} from "./LlamaCppServer_CapabilitySets"; +import { type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import { createLlamaCppServerModelInfoStream } from "./LlamaCppServer_ModelInfo"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { createLlamaCppServerModelSearchStream } from "./LlamaCppServer_ModelSearch"; +import { createLlamaCppServerTextEmbeddingStream } from "./LlamaCppServer_TextEmbedding"; +import { createLlamaCppServerTextGenerationStream } from "./LlamaCppServer_TextGeneration"; +import { createLlamaCppServerTextRewriterStream } from "./LlamaCppServer_TextRewriter"; +import { createLlamaCppServerTextSummaryStream } from "./LlamaCppServer_TextSummary"; +import { createLlamaCppServerToolCallingStream } from "./LlamaCppServer_ToolCalling"; + +/** + * Build the full set of capability-set run-fn registrations bound to a + * single set of provider options. Order is significant only as a + * tiebreaker — the dispatcher prefers the smallest `serves` superset of + * the task's `requires`. + */ +export function buildLlamaCppServerRunFns( + opts: ILlamaCppServerProviderOptions +): readonly AiProviderRunFnRegistration< + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any, + LlamaCppServerModelConfig +>[] { + return [ + { + serves: LLAMACPP_SERVER_TEXT_GENERATION, + runFn: createLlamaCppServerTextGenerationStream(opts), + }, + { serves: LLAMACPP_SERVER_TOOL_USE, runFn: createLlamaCppServerToolCallingStream(opts) }, + { serves: LLAMACPP_SERVER_TEXT_REWRITER, runFn: createLlamaCppServerTextRewriterStream(opts) }, + { serves: LLAMACPP_SERVER_TEXT_SUMMARY, runFn: createLlamaCppServerTextSummaryStream(opts) }, + { + serves: LLAMACPP_SERVER_TEXT_EMBEDDING, + runFn: createLlamaCppServerTextEmbeddingStream(opts), + }, + { serves: LLAMACPP_SERVER_MODEL_SEARCH, runFn: createLlamaCppServerModelSearchStream(opts) }, + { serves: LLAMACPP_SERVER_MODEL_INFO, runFn: createLlamaCppServerModelInfoStream(opts) }, + ]; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts new file mode 100644 index 000000000..f2801948b --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts @@ -0,0 +1,93 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai"; +import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +export function createLlamaCppServerModelInfoStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const pc = model?.provider_config; + + if (input.detail === "dimensions") { + let native_dimensions = + typeof pc?.native_dimensions === "number" ? pc.native_dimensions : undefined; + if (native_dimensions === undefined) { + try { + const { baseUrl, release } = await acquire(model, opts); + try { + const res = await fetch(`${baseUrl}/props`, { signal }); + if (res.ok) { + const props = (await res.json()) as { + default_generation_settings?: { n_embd?: number }; + }; + const n = props.default_generation_settings?.n_embd; + if (typeof n === "number") native_dimensions = n; + } + } finally { + await release(); + } + } catch { + // Leave unset — caller handles missing dimensions. + } + } + emit({ + type: "finish", + data: { + model: input.model, + is_local: true, + is_remote: false, + supports_browser: true, + supports_node: true, + is_cached: false, + is_loaded: false, + file_sizes: null, + ...(native_dimensions !== undefined ? { native_dimensions } : {}), + } as ModelInfoTaskOutput, + }); + return; + } + + // General info — try /v1/models. is_loaded = the server reports this model name. + let is_loaded = false; + const expectedName = getLlamaCppServerModelName(model); + try { + const { baseUrl, release } = await acquire(model, opts); + try { + const res = await fetch(`${baseUrl}/v1/models`, { signal }); + if (res.ok) { + const body = (await res.json()) as { data?: Array<{ id?: string }> }; + is_loaded = !!body.data?.some((m) => m.id === expectedName); + } + } finally { + await release(); + } + } catch { + // Server unreachable — leave is_loaded false. + } + + emit({ + type: "finish", + data: { + model: input.model, + is_local: true, + is_remote: false, + supports_browser: true, + supports_node: true, + is_cached: false, + is_loaded, + file_sizes: null, + }, + }); + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts new file mode 100644 index 000000000..91a1a3a4a --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { ModelConfigSchema, ModelRecordSchema } from "@workglow/ai/worker"; +import type { DataPortSchemaObject, FromSchema } from "@workglow/util/worker"; +import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants"; + +/** + * Provider-config schema for `@workglow/llamacpp-server`. + * + * Required fields: + * - `provider` — discriminator + * - `provider_config.model_path` — absolute path passed to the broker; OR `base_url` if externalUrl-mode + * + * Either `model_path` (transport mode) OR `base_url` (externalUrl mode) must be set + * for a usable record. The provider resolver throws at runtime if neither resolves. + */ +export const LlamaCppServerModelSchema = { + type: "object", + properties: { + provider: { + const: LOCAL_LLAMACPP_SERVER, + description: "Discriminator: local llama-server HTTP provider.", + }, + provider_config: { + type: "object", + description: "llama-server-specific configuration.", + properties: { + model_path: { + type: "string", + description: + "Absolute filesystem path to the .gguf model. Required for transport-mode acquisition.", + }, + model_name: { + type: "string", + description: + "Optional logical model name sent as OpenAI `model` field. llama-server ignores it.", + }, + base_url: { + type: "string", + description: + "Optional per-record base URL override. Takes precedence over provider-level externalUrl. Used for records discovered via externalUrl-mode model.search.", + }, + native_dimensions: { + type: "number", + description: "Embedding dimensions for embedding models. Skips /props lookup.", + }, + ctx: { + type: "number", + description: "Per-model llama-server context length override.", + }, + }, + additionalProperties: false, + }, + }, + required: ["provider", "provider_config"], + additionalProperties: true, +} as const satisfies DataPortSchemaObject; + +export const LlamaCppServerModelRecordSchema = { + type: "object", + properties: { + ...ModelRecordSchema.properties, + ...LlamaCppServerModelSchema.properties, + }, + required: [...ModelRecordSchema.required, ...LlamaCppServerModelSchema.required], + additionalProperties: false, +} as const satisfies DataPortSchemaObject; + +export type LlamaCppServerModelRecord = FromSchema; + +export const LlamaCppServerModelConfigSchema = { + type: "object", + properties: { + ...ModelConfigSchema.properties, + ...LlamaCppServerModelSchema.properties, + }, + required: [...ModelConfigSchema.required, ...LlamaCppServerModelSchema.required], + additionalProperties: false, +} as const satisfies DataPortSchemaObject; + +export type LlamaCppServerModelConfig = FromSchema; diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts new file mode 100644 index 000000000..ffd3eca39 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts @@ -0,0 +1,58 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai"; +import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils"; +import type { ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; + +/** + * Returns the single loaded model when the provider has a usable external URL + * (provider-level `externalUrl`). Otherwise returns `[]` — transport mode + * cannot search because `transport.ensureRunning` itself requires a model path. + */ +export function createLlamaCppServerModelSearchStream( + opts: ILlamaCppServerProviderOptions +): AiProviderRunFn { + return async (input, _model, signal, emit) => { + signal?.throwIfAborted?.(); + if (!opts.externalUrl) { + emit({ type: "finish", data: { results: [] } }); + return; + } + const baseUrl = opts.externalUrl.replace(/\/+$/, ""); + try { + const res = await fetch(`${baseUrl}/v1/models`, { signal }); + if (!res.ok) { + emit({ type: "finish", data: { results: [] } }); + return; + } + const body = (await res.json()) as { data?: Array<{ id: string }> }; + const results = (body.data ?? []).map((m) => ({ + id: m.id, + label: m.id, + description: "llama-server loaded model", + record: { + model_id: m.id, + provider: LOCAL_LLAMACPP_SERVER, + title: m.id, + description: "", + capabilities: [], + provider_config: { model_name: m.id, base_url: baseUrl }, + metadata: {}, + }, + raw: m, + })); + emit({ + type: "finish", + data: { results: filterModelSearchResultsByQuery(results, input.query) }, + }); + } catch { + emit({ type: "finish", data: { results: [] } }); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts new file mode 100644 index 000000000..4678be11c --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; + +/** + * Returns the logical model name to send in the OpenAI `model` field. + * llama-server ignores this value (it serves one model per process), so we + * fall back to model_path, then model_id, then the empty string. + */ +export function getLlamaCppServerModelName(model: LlamaCppServerModelConfig | undefined): string { + const pc = model?.provider_config; + return String(pc?.model_name ?? pc?.model_path ?? model?.model_id ?? ""); +} + +/** + * Returns the absolute filesystem path used by `transport.ensureRunning`. + * Required for transport-mode acquisition; throws if missing. + */ +export function getLlamaCppServerModelPath(model: LlamaCppServerModelConfig | undefined): string { + const path = model?.provider_config?.model_path; + if (typeof path !== "string" || path.length === 0) { + throw new Error( + "LlamaCppServer: provider_config.model_path is required for transport-mode acquisition." + ); + } + return path; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts new file mode 100644 index 000000000..7f242c632 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts @@ -0,0 +1,64 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderRunFn, + TextEmbeddingTaskInput, + TextEmbeddingTaskOutput, +} from "@workglow/ai"; +import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +/** + * One-shot embedding run-fn. Per the project convention, the run-fn emits + * a single `finish` event whose `data` is the full `TextEmbeddingTaskOutput`. + */ +export function createLlamaCppServerTextEmbeddingStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const texts = Array.isArray(input.text) ? input.text : [input.text]; + const body = JSON.stringify({ + model: getLlamaCppServerModelName(model), + input: texts, + }); + const { baseUrl, release } = await acquire(model, opts); + try { + const response = await fetch(`${baseUrl}/v1/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServer: HTTP ${response.status} from /v1/embeddings (embeddings) — ${text}` + ); + } + const json = (await response.json()) as { + data?: Array<{ embedding: number[] }>; + }; + const vectors = (json.data ?? []).map((d) => new Float32Array(d.embedding)); + if (vectors.length !== texts.length) { + throw new Error( + `LlamaCppServer: /v1/embeddings returned ${vectors.length} embeddings for ${texts.length} input(s)` + ); + } + const data: TextEmbeddingTaskOutput = Array.isArray(input.text) + ? { vector: vectors } + : { vector: vectors[0] }; + emit({ type: "finish", data }); + } finally { + await release(); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts new file mode 100644 index 000000000..29c674674 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts @@ -0,0 +1,103 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderRunFn, + TextGenerationTaskInput, + TextGenerationTaskOutput, +} from "@workglow/ai"; +import { + acquireBaseUrl, + readChatCompletionDeltas, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +interface UnifiedTextGenerationInput extends TextGenerationTaskInput { + readonly messages?: ReadonlyArray<{ + readonly role: string; + readonly content: + | string + | ReadonlyArray< + | { readonly type: "text"; readonly text: string } + | { readonly type: "image_url"; readonly image_url: { readonly url: string } } + >; + }>; + readonly systemPrompt?: string; +} + +/** + * Streaming run-fn factory for `["text.generation"]` (and, when the model has + * `vision-input`, image-bearing chat content too). + * + * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0` + * so {@link AiChatTask} and {@link TextGenerationTask} share the same + * registered run-fn, consistent with the project convention. + * + * Vision-input is folded into this run-fn rather than living separately: + * llava-family chat is still a `/v1/chat/completions` call — only the + * `content` shape changes. The provider's `inferCapabilities` decides + * whether `vision-input` is declared. + */ +export function createLlamaCppServerTextGenerationStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const unified = input as UnifiedTextGenerationInput; + const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0; + + const messages = hasMessages + ? [ + ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []), + ...unified.messages!.map((m) => ({ role: m.role, content: m.content })), + ] + : [{ role: "user", content: input.prompt }]; + + const body = JSON.stringify({ + model: getLlamaCppServerModelName(model), + messages, + stream: true, + ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}), + ...(input.temperature !== undefined ? { temperature: input.temperature } : {}), + ...(input.topP !== undefined ? { top_p: input.topP } : {}), + ...(input.frequencyPenalty !== undefined + ? { frequency_penalty: input.frequencyPenalty } + : {}), + ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}), + }); + + const { baseUrl, release } = await acquire(model, opts); + try { + signal?.throwIfAborted?.(); + const response = await fetch(`${baseUrl}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (text-generation) — ${text}` + ); + } + for await (const delta of readChatCompletionDeltas(response, signal)) { + if (delta.done) break; + if (delta.contentDelta) { + emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta }); + } + } + emit({ type: "finish", data: {} as TextGenerationTaskOutput }); + } finally { + await release(); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts new file mode 100644 index 000000000..c0266ad82 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts @@ -0,0 +1,57 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, TextRewriterTaskInput, TextRewriterTaskOutput } from "@workglow/ai"; +import { + acquireBaseUrl, + readChatCompletionDeltas, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +export function createLlamaCppServerTextRewriterStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const body = JSON.stringify({ + model: getLlamaCppServerModelName(model), + messages: [ + { role: "system", content: input.prompt }, + { role: "user", content: input.text }, + ], + stream: true, + }); + const { baseUrl, release } = await acquire(model, opts); + try { + const response = await fetch(`${baseUrl}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (rewriter) — ${text}` + ); + } + for await (const delta of readChatCompletionDeltas(response, signal)) { + if (delta.done) break; + if (delta.contentDelta) { + emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta }); + } + } + emit({ type: "finish", data: {} as TextRewriterTaskOutput }); + } finally { + await release(); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts new file mode 100644 index 000000000..14c914cd5 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts @@ -0,0 +1,57 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, TextSummaryTaskInput, TextSummaryTaskOutput } from "@workglow/ai"; +import { + acquireBaseUrl, + readChatCompletionDeltas, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +export function createLlamaCppServerTextSummaryStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const body = JSON.stringify({ + model: getLlamaCppServerModelName(model), + messages: [ + { role: "system", content: "Summarize the following text concisely." }, + { role: "user", content: input.text }, + ], + stream: true, + }); + const { baseUrl, release } = await acquire(model, opts); + try { + const response = await fetch(`${baseUrl}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (summary) — ${text}` + ); + } + for await (const delta of readChatCompletionDeltas(response, signal)) { + if (delta.done) break; + if (delta.contentDelta) { + emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta }); + } + } + emit({ type: "finish", data: {} as TextSummaryTaskOutput }); + } finally { + await release(); + } + }; +} diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts new file mode 100644 index 000000000..7f4575234 --- /dev/null +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts @@ -0,0 +1,138 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderRunFn, + ToolCallingTaskInput, + ToolCallingTaskOutput, + ToolCalls, + ToolDefinition, +} from "@workglow/ai"; +import { + buildToolDescription, + filterValidToolCalls, + sanitizeToolArgs, + toTextFlatMessages, +} from "@workglow/ai/worker"; +import { parsePartialJson } from "@workglow/util/worker"; +import { + acquireBaseUrl, + readChatCompletionDeltas, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; +import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; +import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +function mapTools(tools: readonly ToolDefinition[]) { + return tools.map((t) => ({ + type: "function" as const, + function: { + name: t.name, + description: buildToolDescription(t), + parameters: t.inputSchema as unknown, + }, + })); +} + +export function createLlamaCppServerToolCallingStream( + opts: ILlamaCppServerProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const messages = toTextFlatMessages(input); + const tools = input.toolChoice === "none" ? undefined : mapTools(input.tools); + const body = JSON.stringify({ + model: getLlamaCppServerModelName(model), + messages, + ...(tools ? { tools } : {}), + stream: true, + ...(input.temperature !== undefined ? { temperature: input.temperature } : {}), + ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}), + }); + const { baseUrl, release } = await acquire(model, opts); + try { + const response = await fetch(`${baseUrl}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (tool-use) — ${text}` + ); + } + + let accumulatedText = ""; + const accumulatedArgs = new Map(); + const callMeta = new Map(); + let nextSyntheticIndex = 0; + let lastEmittedToolCalls: ToolCalls = []; + + for await (const delta of readChatCompletionDeltas(response, signal)) { + if (delta.done) break; + if (delta.contentDelta) { + accumulatedText += delta.contentDelta; + emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta }); + } + if (delta.toolCallDeltas?.length) { + for (const tc of delta.toolCallDeltas) { + const idx = typeof tc.index === "number" ? tc.index : nextSyntheticIndex++; + const meta = callMeta.get(idx) ?? {}; + if (tc.id) meta.id = tc.id; + if (tc.function?.name) meta.name = tc.function.name; + callMeta.set(idx, meta); + if (tc.function?.arguments) { + accumulatedArgs.set(idx, (accumulatedArgs.get(idx) ?? "") + tc.function.arguments); + } + } + lastEmittedToolCalls = buildToolCalls(accumulatedArgs, callMeta); + emit({ type: "object-delta", port: "toolCalls", objectDelta: [...lastEmittedToolCalls] }); + } + } + const finalToolCalls = filterValidToolCalls(lastEmittedToolCalls, input.tools); + emit({ + type: "finish", + data: { text: accumulatedText, toolCalls: finalToolCalls } as ToolCallingTaskOutput, + }); + } finally { + await release(); + } + }; +} + +function buildToolCalls( + argsByIndex: Map, + metaByIndex: Map +): ToolCalls { + const result: ToolCalls = []; + const indices = [...argsByIndex.keys(), ...metaByIndex.keys()]; + const unique = Array.from(new Set(indices)).sort((a, b) => a - b); + for (const idx of unique) { + const meta = metaByIndex.get(idx) ?? {}; + if (!meta.name) continue; + const raw = argsByIndex.get(idx) ?? ""; + let parsed: Record = {}; + if (raw.length > 0) { + try { + parsed = JSON.parse(raw) as Record; + } catch { + const partial = parsePartialJson(raw); + parsed = (partial as Record) ?? {}; + } + } + result.push({ + id: meta.id ?? `call_${idx}`, + name: meta.name, + input: sanitizeToolArgs(parsed) as Record, + }); + } + return result; +} diff --git a/providers/llamacpp-server/src/ai/index.ts b/providers/llamacpp-server/src/ai/index.ts index 4f3d7f42a..6612079ff 100644 --- a/providers/llamacpp-server/src/ai/index.ts +++ b/providers/llamacpp-server/src/ai/index.ts @@ -7,5 +7,22 @@ // organize-imports-ignore export * from "./common/LlamaCppServer_Constants"; -export * from "./LlamaCppServerProvider"; +export * from "./common/LlamaCppServer_ModelSchema"; +export * from "./common/LlamaCppServer_Capabilities"; +export * from "./common/LlamaCppServer_CapabilitySets"; export * from "./registerLlamaCppServer"; +export * from "./registerLlamaCppServerInline"; +export * from "./registerLlamaCppServerWorker"; + +import { LLAMACPP_SERVER_RUN_FN_SPECS } from "./common/LlamaCppServer_Capabilities"; +import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns"; +import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider"; + +/** + * @internal Symbols exported only for use by `@workglow/test`. Not part of the stable public API. + */ +export const _testOnly = { + LlamaCppServerQueuedProvider, + LLAMACPP_SERVER_RUN_FN_SPECS, + buildLlamaCppServerRunFns, +} as const; diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts index b0f4406aa..b9b6b32bf 100644 --- a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts +++ b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts @@ -5,23 +5,20 @@ */ import type { AiProviderRegisterOptions } from "@workglow/ai"; -import type { IBackendsTransport } from "@workglow/ai/provider-utils"; -import { registerProviderInline } from "@workglow/ai/provider-utils"; -import { LlamaCppServerProvider } from "./LlamaCppServerProvider"; - -export interface IRegisterLlamaCppServerOptions extends AiProviderRegisterOptions { - readonly transport: IBackendsTransport; - readonly externalUrl?: string; - readonly defaultCtx?: number; -} +import { registerProviderWithWorker } from "@workglow/ai/provider-utils"; +import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider"; +/** + * Main-thread worker-backed registration. The provider proxy lives on the + * main thread and forwards jobs to the worker, which holds the real run-fns. + * + * Use {@link registerLlamaCppServerInline} for transport mode (broker + * acquisition). + */ export async function registerLlamaCppServer( - options: IRegisterLlamaCppServerOptions + options: AiProviderRegisterOptions & { + worker: Worker | (() => Worker); + } ): Promise { - const { transport, externalUrl, defaultCtx, ...registerOptions } = options; - await registerProviderInline( - new LlamaCppServerProvider({ transport, externalUrl, defaultCtx }), - "LlamaCppServer", - registerOptions - ); + await registerProviderWithWorker(new LlamaCppServerQueuedProvider(), "LlamaCppServer", options); } diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts new file mode 100644 index 000000000..ec85b055e --- /dev/null +++ b/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRegisterOptions } from "@workglow/ai"; +import { registerProviderInline } from "@workglow/ai/provider-utils"; +import { type ILlamaCppServerProviderOptions } from "./common/LlamaCppServer_Client"; +import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns"; +import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider"; + +export interface IRegisterLlamaCppServerInlineOptions + extends AiProviderRegisterOptions, ILlamaCppServerProviderOptions {} + +/** Main-thread inline registration. Supports transport mode. */ +export async function registerLlamaCppServerInline( + options: IRegisterLlamaCppServerInlineOptions = {} +): Promise { + const { transport, externalUrl, defaultCtx, ...registerOptions } = options; + await registerProviderInline( + new LlamaCppServerQueuedProvider( + buildLlamaCppServerRunFns({ transport, externalUrl, defaultCtx }) + ), + "LlamaCppServer", + registerOptions + ); +} diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts new file mode 100644 index 000000000..d25f64bd6 --- /dev/null +++ b/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts @@ -0,0 +1,29 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { registerProviderWorker } from "@workglow/ai/provider-utils"; +import type { ILlamaCppServerProviderOptions } from "./common/LlamaCppServer_Client"; +import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns"; +import { LlamaCppServerProvider } from "./LlamaCppServerProvider"; + +/** + * Worker-server-side registration. Supports both transport and externalUrl + * modes — the transport object is constructed inside this worker runtime + * by the caller and held by closure in the run-fns. No port transfer. + * + * This is the primary registration path in production. Callers in the + * Builder construct `MessagePortBackendsTransport` locally in the worker + * renderer and pass it straight here. + */ +export async function registerLlamaCppServerWorker( + options: ILlamaCppServerProviderOptions = {} +): Promise { + await registerProviderWorker( + (ws) => + new LlamaCppServerProvider(buildLlamaCppServerRunFns(options)).registerOnWorkerServer(ws), + "LlamaCppServer" + ); +} diff --git a/providers/llamacpp-server/src/ai/runtime.ts b/providers/llamacpp-server/src/ai/runtime.ts index 5a1f42e73..5ba10d546 100644 --- a/providers/llamacpp-server/src/ai/runtime.ts +++ b/providers/llamacpp-server/src/ai/runtime.ts @@ -6,4 +6,14 @@ // organize-imports-ignore -export * from "./registerLlamaCppServer"; +export * from "./common/LlamaCppServer_Client"; +export * from "./common/LlamaCppServer_TextGeneration"; +export * from "./common/LlamaCppServer_TextRewriter"; +export * from "./common/LlamaCppServer_TextSummary"; +export * from "./common/LlamaCppServer_TextEmbedding"; +export * from "./common/LlamaCppServer_ToolCalling"; +export * from "./common/LlamaCppServer_ModelInfo"; +export * from "./common/LlamaCppServer_ModelSearch"; +export * from "./common/LlamaCppServer_JobRunFns"; +export * from "./registerLlamaCppServerInline"; +export * from "./registerLlamaCppServerWorker"; From 0f0e4566fa708ca8b4579d094bdb4c2dceacc148 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 21:48:44 +0000 Subject: [PATCH 4/8] test(llamacpp-server): full unit and integration test suite --- .../LlamaCppServerProvider.test.ts | 173 ++++++++++++++++++ .../LlamaCppServer_Client.test.ts | 96 ++++++++++ ...LlamaCppServer_Generic.integration.test.ts | 71 +++++++ .../LlamaCppServer_ModelInfo.test.ts | 78 ++++++++ .../LlamaCppServer_ModelSearch.test.ts | 60 ++++++ .../LlamaCppServer_TextEmbedding.test.ts | 74 ++++++++ ...lamaCppServer_TextGenerationStream.test.ts | 94 ++++++++++ .../LlamaCppServer_ToolCalling.test.ts | 81 ++++++++ .../LocalBackendsProviderContracts.test.ts | 14 +- .../ai/common/LlamaCppServer_ModelSearch.ts | 2 +- 10 files changed, 737 insertions(+), 6 deletions(-) create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts new file mode 100644 index 000000000..9150c1585 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts @@ -0,0 +1,173 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelRecord } from "@workglow/ai"; +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { _testOnly } from "@workglow/llamacpp-server/ai"; +import { describe, expect, it, vi } from "vitest"; + +const { LlamaCppServerQueuedProvider, LLAMACPP_SERVER_RUN_FN_SPECS, buildLlamaCppServerRunFns } = + _testOnly; + +function model( + model_id: string, + provider_config: Record = { model_path: `/models/${model_id}` }, + capabilities: readonly string[] = [] +): ModelRecord { + return { + model_id, + title: model_id, + description: "", + provider: "LOCAL_LLAMACPP_SERVER", + provider_config, + capabilities: [...capabilities], + metadata: {}, + } as ModelRecord; +} + +describe("LlamaCppServerQueuedProvider.inferCapabilities", () => { + const provider = new LlamaCppServerQueuedProvider(buildLlamaCppServerRunFns({})); + + it("infers full text-gen set for a generic .gguf", () => { + const caps = provider.inferCapabilities(model("llama-3-8b-q4_k_m.gguf")); + expect(caps).toContain("text.generation"); + expect(caps).toContain("tool-use"); + expect(caps).toContain("text.rewriter"); + expect(caps).toContain("text.summary"); + expect(caps).toContain("model.info"); + expect(caps).toContain("model.search"); + expect(caps).not.toContain("vision-input"); + }); + + it("infers vision-input for llava-family", () => { + const caps = provider.inferCapabilities( + model("llava-7b-v1.6-q4_k_m.gguf", { model_path: "/models/llava-7b-v1.6-q4_k_m.gguf" }) + ); + expect(caps).toContain("vision-input"); + expect(caps).toContain("text.generation"); + }); + + it("infers vision-input for bakllava", () => { + const caps = provider.inferCapabilities( + model("bakllava-q5.gguf", { model_path: "/models/bakllava-q5.gguf" }) + ); + expect(caps).toContain("vision-input"); + }); + + it("infers text.embedding for nomic-embed gguf", () => { + const caps = provider.inferCapabilities( + model("nomic-embed-text.gguf", { model_path: "/models/nomic-embed-text.gguf" }) + ); + expect(caps).toContain("text.embedding"); + expect(caps).not.toContain("text.generation"); + }); + + it("infers text.embedding when native_dimensions is set explicitly", () => { + const caps = provider.inferCapabilities( + model("custom.gguf", { model_path: "/models/custom.gguf", native_dimensions: 768 }) + ); + expect(caps).toEqual(["text.embedding", "model.info", "model.search"]); + }); + + it("falls back to declared caps when id is empty", () => { + const caps = provider.inferCapabilities(model("", {}, ["text.classification"])); + expect(caps).toEqual(["text.classification"]); + }); + + it("falls back to baseline meta-ops when nothing matches and nothing is declared", () => { + const caps = provider.inferCapabilities(model("", {})); + expect(caps).toEqual(["model.info", "model.search"]); + }); +}); + +describe("LlamaCppServer capability-set parity", () => { + it("LLAMACPP_SERVER_RUN_FN_SPECS matches buildLlamaCppServerRunFns({}) serves shapes", () => { + const fns = buildLlamaCppServerRunFns({}); + const fnsServes = fns.map((r) => [...r.serves].sort().join(",")); + const specsServes = LLAMACPP_SERVER_RUN_FN_SPECS.map((s) => [...s.serves].sort().join(",")); + expect(specsServes).toEqual(fnsServes); + }); +}); + +describe("LlamaCppServer run-fn shape", () => { + it("registers a runFn for every canonical capability set", () => { + const sets = buildLlamaCppServerRunFns({}).map((r) => [...r.serves].sort().join(",")); + expect(sets).toContain("text.generation"); + expect(sets).toContain("text.generation,tool-use"); + expect(sets).toContain("text.rewriter"); + expect(sets).toContain("text.summary"); + expect(sets).toContain("text.embedding"); + expect(sets).toContain("model.search"); + expect(sets).toContain("model.info"); + }); + + it("tiebreaks text.generation to the smallest serves entry", () => { + const candidates = buildLlamaCppServerRunFns({}).filter((r) => + r.serves.includes("text.generation") + ); + expect(candidates.some((r) => r.serves.length === 1)).toBe(true); + }); +}); + +function fakeTransport(): IBackendsTransport & { + ensureRunning: ReturnType; +} { + return { + ensureRunning: vi.fn(), + subscribeStatus: vi.fn(() => () => undefined), + install: vi.fn(), + list: vi.fn(), + uninstall: vi.fn(), + } as unknown as IBackendsTransport & { ensureRunning: ReturnType }; +} + +describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)", () => { + it("acquires URL via transport and releases the handle (text.generation)", async () => { + const release = vi.fn().mockResolvedValue(undefined); + const transport = fakeTransport(); + transport.ensureRunning.mockResolvedValue({ + url: "http://broker:9999", + release, + } as IRunningHandle); + + const enc = new TextEncoder(); + const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response( + new ReadableStream({ + start(controller) { + controller.enqueue( + enc.encode(`data: ${JSON.stringify({ choices: [{ delta: { content: "ok" } }] })}\n`) + ); + controller.enqueue(enc.encode("data: [DONE]\n")); + controller.close(); + }, + }), + { status: 200 } + ) + ); + + const fns = buildLlamaCppServerRunFns({ transport }); + const textGen = fns.find((r) => r.serves.join(",") === "text.generation")!; + const events: any[] = []; + const emit = (e: any) => events.push(e); + await textGen.runFn( + { prompt: "hi" } as any, + { provider_config: { model_path: "/abs/m.gguf", ctx: 4096 } } as any, + undefined as any, + emit + ); + + expect(transport.ensureRunning).toHaveBeenCalledWith({ + backend: "llamacpp-server", + modelPath: "/abs/m.gguf", + opts: { ctx: 4096 }, + }); + const fetchedUrl = String(fetchSpy.mock.calls[0]![0]); + expect(fetchedUrl).toBe("http://broker:9999/v1/chat/completions"); + expect(release).toHaveBeenCalledTimes(1); + fetchSpy.mockRestore(); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts new file mode 100644 index 000000000..fecfa6516 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts @@ -0,0 +1,96 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { acquireBaseUrl } from "@workglow/llamacpp-server/ai-runtime"; +import { describe, expect, it, vi } from "vitest"; + +function fakeTransport(): IBackendsTransport & { + ensureRunning: ReturnType; +} { + return { + ensureRunning: vi.fn(), + subscribeStatus: vi.fn(() => () => undefined), + install: vi.fn(), + list: vi.fn(), + uninstall: vi.fn(), + } as unknown as IBackendsTransport & { ensureRunning: ReturnType }; +} + +describe("acquireBaseUrl precedence", () => { + it("prefers model.provider_config.base_url over everything", async () => { + const transport = fakeTransport(); + const result = await acquireBaseUrl( + { provider_config: { base_url: "http://from-model:8080/" } } as any, + { externalUrl: "http://from-opts:8080", transport } + ); + expect(result.baseUrl).toBe("http://from-model:8080"); + expect(transport.ensureRunning).not.toHaveBeenCalled(); + await result.release(); // no-op + }); + + it("prefers opts.externalUrl over transport when no model.base_url", async () => { + const transport = fakeTransport(); + const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, { + externalUrl: "http://from-opts:8080", + transport, + }); + expect(result.baseUrl).toBe("http://from-opts:8080"); + expect(transport.ensureRunning).not.toHaveBeenCalled(); + await result.release(); // no-op + }); + + it("falls back to transport.ensureRunning when neither URL is set", async () => { + const release = vi.fn().mockResolvedValue(undefined); + const transport = fakeTransport(); + transport.ensureRunning.mockResolvedValue({ + url: "http://broker:9999/", + release, + } as IRunningHandle); + const result = await acquireBaseUrl( + { provider_config: { model_path: "/abs/m.gguf", ctx: 8192 } } as any, + { transport, defaultCtx: 4096 } + ); + expect(transport.ensureRunning).toHaveBeenCalledWith({ + backend: "llamacpp-server", + modelPath: "/abs/m.gguf", + opts: { ctx: 8192 }, + }); + expect(result.baseUrl).toBe("http://broker:9999"); + await result.release(); + expect(release).toHaveBeenCalledTimes(1); + }); + + it("uses defaultCtx when model has no ctx override", async () => { + const transport = fakeTransport(); + transport.ensureRunning.mockResolvedValue({ + url: "http://broker:9999", + release: vi.fn(), + } as IRunningHandle); + await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, { + transport, + defaultCtx: 12345, + }); + expect(transport.ensureRunning).toHaveBeenCalledWith({ + backend: "llamacpp-server", + modelPath: "/abs/m.gguf", + opts: { ctx: 12345 }, + }); + }); + + it("throws when transport mode is selected but model_path is missing", async () => { + const transport = fakeTransport(); + await expect(acquireBaseUrl({ provider_config: {} } as any, { transport })).rejects.toThrow( + /model_path/ + ); + }); + + it("throws when no source resolves", async () => { + await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow( + /no base URL source/ + ); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts new file mode 100644 index 000000000..2eeeba10d --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts @@ -0,0 +1,71 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + createLlamaCppServerModelInfoStream, + createLlamaCppServerModelSearchStream, + createLlamaCppServerTextEmbeddingStream, + createLlamaCppServerTextGenerationStream, +} from "@workglow/llamacpp-server/ai-runtime"; +import { describe, expect, it } from "vitest"; + +const RUN = process.env.RUN_LLAMACPP_SERVER_TESTS === "1"; +const BASE_URL = process.env.LLAMACPP_SERVER_URL ?? "http://localhost:8080"; + +describe.skipIf(!RUN)("LlamaCppServer integration (real server)", () => { + const model = { + provider_config: { base_url: BASE_URL, model_name: "model" }, + } as any; + + it("text.generation streams non-empty content", async () => { + const fn = createLlamaCppServerTextGenerationStream({ externalUrl: BASE_URL }); + let text = ""; + const emit = (e: any) => { + if (e.type === "text-delta") text += e.textDelta; + }; + await fn({ prompt: "Say hi.", maxTokens: 16 } as any, model, undefined as any, emit); + expect(text.length).toBeGreaterThan(0); + }); + + it("model.search returns at least one entry via /v1/models", async () => { + const fn = createLlamaCppServerModelSearchStream({ externalUrl: BASE_URL }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(events.at(-1)!.data.results.length).toBeGreaterThanOrEqual(1); + }); + + it("model.info reports is_loaded=true for the running model", async () => { + const search = createLlamaCppServerModelSearchStream({ externalUrl: BASE_URL }); + const searchEvents: any[] = []; + const searchEmit = (e: any) => searchEvents.push(e); + await search({ query: "" } as any, undefined as any, undefined as any, searchEmit); + const loaded = searchEvents.at(-1)!.data.results[0]!; + const fn = createLlamaCppServerModelInfoStream({ externalUrl: BASE_URL }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: loaded.id } as any, + { provider_config: { base_url: BASE_URL, model_name: loaded.id } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(true); + }); + + it("text.embedding returns a Float32Array (skipped if /v1/embeddings 404s)", async () => { + const fn = createLlamaCppServerTextEmbeddingStream({ externalUrl: BASE_URL }); + try { + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ text: "hello" } as any, model, undefined as any, emit); + expect(events.at(-1)!.data.vector).toBeInstanceOf(Float32Array); + } catch (err) { + if (/HTTP 404/.test(String(err))) return; // server not started with --embedding + throw err; + } + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts new file mode 100644 index 000000000..654f75f8e --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts @@ -0,0 +1,78 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createLlamaCppServerModelInfoStream } from "@workglow/llamacpp-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +afterEach(() => vi.restoreAllMocks()); + +describe("createLlamaCppServerModelInfoStream", () => { + it("trusts native_dimensions when set", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch"); + const fn = createLlamaCppServerModelInfoStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { detail: "dimensions", model: "m" } as any, + { provider_config: { base_url: "http://x:8080", native_dimensions: 768 } } as any, + undefined as any, + emit + ); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(events.at(-1)!.data.native_dimensions).toBe(768); + }); + + it("falls back to /props for embedding dimensions", async () => { + vi.spyOn(globalThis, "fetch").mockImplementation(async (url) => { + if (String(url).endsWith("/props")) { + return new Response(JSON.stringify({ default_generation_settings: { n_embd: 1024 } }), { + status: 200, + }); + } + return new Response("", { status: 404 }); + }); + const fn = createLlamaCppServerModelInfoStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { detail: "dimensions", model: "m" } as any, + { provider_config: { base_url: "http://x:8080" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.native_dimensions).toBe(1024); + }); + + it("reports is_loaded=true when /v1/models includes the model name", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "m" }, { id: "other" }] }), { status: 200 }) + ); + const fn = createLlamaCppServerModelInfoStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: "m" } as any, + { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(true); + }); + + it("reports is_loaded=false when server unreachable", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED")); + const fn = createLlamaCppServerModelInfoStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: "m" } as any, + { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(false); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts new file mode 100644 index 000000000..4c934d408 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts @@ -0,0 +1,60 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createLlamaCppServerModelSearchStream } from "@workglow/llamacpp-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +afterEach(() => vi.restoreAllMocks()); + +describe("createLlamaCppServerModelSearchStream", () => { + it("returns [] when no externalUrl set", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch"); + const fn = createLlamaCppServerModelSearchStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(events.at(-1)!.data.results).toEqual([]); + }); + + it("returns mapped results from /v1/models when externalUrl set", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 }) + ); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + const results = events.at(-1)!.data.results; + expect(results).toHaveLength(1); + expect(results[0].id).toBe("loaded-model"); + expect(results[0].record.provider).toBe("LOCAL_LLAMACPP_SERVER"); + expect(results[0].record.provider_config.base_url).toBe("http://x:8080"); + }); + + it("returns [] when fetch fails", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED")); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(events.at(-1)!.data.results).toEqual([]); + }); + + it("filters by query case-insensitively", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "Llama-3" }, { id: "Mistral" }] }), { + status: 200, + }) + ); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "llama" } as any, undefined as any, undefined as any, emit); + const results = events.at(-1)!.data.results; + expect(results.map((r: any) => r.id)).toEqual(["Llama-3"]); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts new file mode 100644 index 000000000..3f991b362 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts @@ -0,0 +1,74 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createLlamaCppServerTextEmbeddingStream } from "@workglow/llamacpp-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +afterEach(() => vi.restoreAllMocks()); + +const model = { provider_config: { base_url: "http://x:8080", model_name: "emb" } } as any; + +describe("createLlamaCppServerTextEmbeddingStream", () => { + it("returns a single Float32Array for string input", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ embedding: [0.1, 0.2, 0.3] }] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }) + ); + const fn = createLlamaCppServerTextEmbeddingStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ text: "hello" } as any, model, undefined as any, emit); + const finish = events.find((e) => e.type === "finish")!; + expect(finish.data.vector).toBeInstanceOf(Float32Array); + const arr = Array.from(finish.data.vector as Float32Array); + expect(arr).toHaveLength(3); + expect(arr[0]).toBeCloseTo(0.1, 5); + expect(arr[1]).toBeCloseTo(0.2, 5); + expect(arr[2]).toBeCloseTo(0.3, 5); + }); + + it("returns an array of Float32Arrays for string[] input", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ embedding: [1, 2] }, { embedding: [3, 4] }] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }) + ); + const fn = createLlamaCppServerTextEmbeddingStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ text: ["a", "b"] } as any, model, undefined as any, emit); + const finish = events.find((e) => e.type === "finish")!; + expect(Array.isArray(finish.data.vector)).toBe(true); + expect(finish.data.vector).toHaveLength(2); + expect(finish.data.vector[0]).toBeInstanceOf(Float32Array); + }); + + it("throws on HTTP error", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("oops", { status: 500 })); + const fn = createLlamaCppServerTextEmbeddingStream({}); + const emit = (_e: any) => undefined; + await expect(fn({ text: "x" } as any, model, undefined as any, emit)).rejects.toThrow( + /embeddings/ + ); + }); + + it("throws when /v1/embeddings returns fewer embeddings than inputs", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }) + ); + const fn = createLlamaCppServerTextEmbeddingStream({}); + const emit = (_e: any) => undefined; + await expect(fn({ text: "x" } as any, model, undefined as any, emit)).rejects.toThrow( + /returned 0 embeddings for 1 input/ + ); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts new file mode 100644 index 000000000..5efa1ced7 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts @@ -0,0 +1,94 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createLlamaCppServerTextGenerationStream } from "@workglow/llamacpp-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +function sseResponse(chunks: string[]): Response { + const stream = new ReadableStream({ + start(controller) { + const enc = new TextEncoder(); + for (const c of chunks) controller.enqueue(enc.encode(c)); + controller.close(); + }, + }); + return new Response(stream, { status: 200, headers: { "Content-Type": "text/event-stream" } }); +} + +function dataLine(delta: string): string { + return `data: ${JSON.stringify({ choices: [{ delta: { content: delta } }] })}\n`; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("createLlamaCppServerTextGenerationStream", () => { + const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any; + + it("yields text-delta events for each delta line and a final finish", async () => { + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(sseResponse([dataLine("Hel"), dataLine("lo"), "data: [DONE]\n"])); + const fn = createLlamaCppServerTextGenerationStream({}); + + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ prompt: "hi" } as any, model, undefined as any, emit); + + expect(fetchSpy).toHaveBeenCalledTimes(1); + const [url] = fetchSpy.mock.calls[0]!; + expect(String(url)).toBe("http://x:8080/v1/chat/completions"); + expect(events.filter((e) => e.type === "text-delta").map((e) => e.textDelta)).toEqual([ + "Hel", + "lo", + ]); + expect(events[events.length - 1].type).toBe("finish"); + }); + + it("uses chat messages when input.messages is non-empty", async () => { + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(sseResponse([dataLine("ok"), "data: [DONE]\n"])); + const fn = createLlamaCppServerTextGenerationStream({}); + const emit = (_e: any) => undefined; + await fn( + { + prompt: "ignored", + messages: [{ role: "user", content: "hi" }], + systemPrompt: "be helpful", + } as any, + model, + undefined as any, + emit + ); + const body = JSON.parse(String((fetchSpy.mock.calls[0]![1] as RequestInit).body)); + expect(body.messages).toEqual([ + { role: "system", content: "be helpful" }, + { role: "user", content: "hi" }, + ]); + }); + + it("throws on non-2xx with informative message", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("nope", { status: 500 })); + const fn = createLlamaCppServerTextGenerationStream({}); + const emit = (_e: any) => undefined; + await expect(fn({ prompt: "x" } as any, model, undefined as any, emit)).rejects.toThrow( + /HTTP 500/ + ); + }); + + it("aborts pending fetch when signal aborts before request", async () => { + const controller = new AbortController(); + controller.abort(); + vi.spyOn(globalThis, "fetch").mockResolvedValue( + sseResponse([dataLine("ok"), "data: [DONE]\n"]) + ); + const fn = createLlamaCppServerTextGenerationStream({}); + const emit = (_e: any) => undefined; + await expect(fn({ prompt: "x" } as any, model, controller.signal, emit)).rejects.toThrow(); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts new file mode 100644 index 000000000..7030fe8a5 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts @@ -0,0 +1,81 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createLlamaCppServerToolCallingStream } from "@workglow/llamacpp-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +function sseChunks(chunks: object[]): Response { + const enc = new TextEncoder(); + const lines = chunks.map((c) => `data: ${JSON.stringify(c)}\n`).concat("data: [DONE]\n"); + const stream = new ReadableStream({ + start(controller) { + for (const l of lines) controller.enqueue(enc.encode(l)); + controller.close(); + }, + }); + return new Response(stream, { status: 200 }); +} + +afterEach(() => vi.restoreAllMocks()); + +const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any; +const TOOLS = [ + { + name: "add", + description: "add", + inputSchema: { type: "object", properties: { a: { type: "number" }, b: { type: "number" } } }, + }, +]; + +describe("createLlamaCppServerToolCallingStream", () => { + it("accumulates partial-JSON args across deltas and emits final tool calls", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + sseChunks([ + { + choices: [ + { + delta: { + tool_calls: [ + { index: 0, id: "c0", function: { name: "add", arguments: '{"a":1' } }, + ], + }, + }, + ], + }, + { + choices: [{ delta: { tool_calls: [{ index: 0, function: { arguments: ',"b":2}' } }] } }], + }, + ]) + ); + const fn = createLlamaCppServerToolCallingStream({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { prompt: "p", tools: TOOLS, toolChoice: "auto" } as any, + model, + undefined as any, + emit + ); + const finish = events.find((e) => e.type === "finish")!; + expect(finish.data.toolCalls).toEqual([{ id: "c0", name: "add", input: { a: 1, b: 2 } }]); + }); + + it("omits tools[] when toolChoice='none'", async () => { + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(sseChunks([{ choices: [{ delta: { content: "hi" } }] }])); + const fn = createLlamaCppServerToolCallingStream({}); + const emit = (_e: any) => undefined; + await fn( + { prompt: "p", tools: TOOLS, toolChoice: "none" } as any, + model, + undefined as any, + emit + ); + const body = JSON.parse(String((fetchSpy.mock.calls[0]![1] as RequestInit).body)); + expect(body.tools).toBeUndefined(); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts index 7e0f04c5d..d7d3db18c 100644 --- a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts +++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts @@ -18,7 +18,7 @@ import type { IRunningHandle, } from "@workglow/ai/provider-utils"; import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; -import { LlamaCppServerProvider } from "@workglow/llamacpp-server/ai"; +import { LOCAL_LLAMACPP_SERVER, registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai"; import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; @@ -109,10 +109,9 @@ describe("local backend provider stream contracts", () => { it("llama.cpp stops after [DONE] even if the server keeps the stream open", async () => { const { release, transport } = createTransportStub(); - const provider = new LlamaCppServerProvider({ transport }); - await provider.register(); + await registerLlamaCppServerInline({ transport }); - const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["text.generation"]); + const runFn = getAiProviderRegistry().getRunFnFor(LOCAL_LLAMACPP_SERVER, ["text.generation"]); expect(runFn).toBeDefined(); const payload = new TextEncoder().encode( @@ -144,13 +143,18 @@ describe("local backend provider stream contracts", () => { ({ ok: true, body: { getReader: () => reader }, + text: async (): Promise => "", }) as unknown as Response ) as unknown as typeof fetch; const events = await runProviderStream( runFn!, { prompt: "hello" }, - { model_id: "/models/llama.gguf" } + { + model_id: "llama-test", + provider: LOCAL_LLAMACPP_SERVER, + provider_config: { model_path: "/models/llama.gguf" }, + } ); expect(events).toEqual([ diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts index ffd3eca39..5e97e4acb 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts @@ -35,7 +35,7 @@ export function createLlamaCppServerModelSearchStream( const results = (body.data ?? []).map((m) => ({ id: m.id, label: m.id, - description: "llama-server loaded model", + description: m.id, record: { model_id: m.id, provider: LOCAL_LLAMACPP_SERVER, From 1d69434a60126c79bbe5b2b58b88973422edcb43 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 22:04:55 +0000 Subject: [PATCH 5/8] =?UTF-8?q?docs(llamacpp-server):=20README=20=E2=80=94?= =?UTF-8?q?=20install,=20quickstart,=20capability=20table,=20browser/worke?= =?UTF-8?q?r=20constraints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- providers/llamacpp-server/README.md | 121 ++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 providers/llamacpp-server/README.md diff --git a/providers/llamacpp-server/README.md b/providers/llamacpp-server/README.md new file mode 100644 index 000000000..622bd1b5e --- /dev/null +++ b/providers/llamacpp-server/README.md @@ -0,0 +1,121 @@ +# `@workglow/llamacpp-server` + +OpenAI-compatible HTTP client for an upstream +[`llama-server`](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) +instance. + +This package **does not bundle llama.cpp**. It speaks to a running +`llama-server` process — either one you start yourself (`externalUrl` mode) +or one acquired through an `IBackendsTransport` (`transport` mode, used by +the Workglow Builder's broker). + +## Install + +```bash +bun add @workglow/llamacpp-server +``` + +You also need `@workglow/ai`, `@workglow/task-graph`, `@workglow/storage`, +`@workglow/job-queue`, and `@workglow/util` (peer dependencies). + +## Quickstart — `externalUrl` mode + +Start `llama-server` yourself, then point the provider at it: + +```bash +llama-server -m ./models/llama-3-8b-q4_k_m.gguf --port 8080 --embedding +``` + +```ts +import { registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai-runtime"; + +await registerLlamaCppServerInline({ + externalUrl: "http://localhost:8080", +}); +``` + +The provider is now visible to the registry as `LOCAL_LLAMACPP_SERVER`. + +## Quickstart — `transport` mode (Electron + broker) + +```ts +import { registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai-runtime"; + +await registerLlamaCppServerInline({ + transport: backendsTransport, // your IBackendsTransport implementation + defaultCtx: 4096, +}); +``` + +In transport mode each model record must include +`provider_config.model_path` — the absolute path to the `.gguf` file. The +broker spawns one `llama-server` per `(modelPath, ctx)` triple, shared by +refcount. + +## Model record shape + +```ts +{ + model_id: "llama-3-8b", + provider: "LOCAL_LLAMACPP_SERVER", + provider_config: { + model_path: "/abs/path/to/llama-3-8b.gguf", // required for transport mode + model_name: "llama-3-8b", // optional; sent as OpenAI `model` field + base_url: "http://localhost:8080", // optional per-record override + native_dimensions: 768, // optional embedding-dim override + ctx: 8192, // optional ctx override + }, + capabilities: [], + metadata: {}, +} +``` + +The provider's `inferCapabilities` heuristic populates the capability set +at runtime based on the file name (llava → vision, `*embed*` → embedding, +otherwise full text-gen + tool-use). + +## Supported capabilities + +| Capability | Endpoint | Notes | +|---|---|---| +| `text.generation` | `POST /v1/chat/completions` | Chat + prompt unified | +| `text.generation` + `tool-use` | `POST /v1/chat/completions` with `tools[]` | OpenAI tool calls | +| `text.rewriter` | `POST /v1/chat/completions` | System=prompt, user=text | +| `text.summary` | `POST /v1/chat/completions` | Fixed summary instruction | +| `text.embedding` | `POST /v1/embeddings` | Requires `--embedding` flag | +| `vision-input` | `POST /v1/chat/completions` with `image_url` parts | llava-family models | +| `model.info` | `GET /v1/models` + `GET /props` | Embedding dims via `n_embd` | +| `model.search` | `GET /v1/models` | externalUrl mode only — see below | + +### Why `model.search` returns `[]` in transport mode + +`transport.ensureRunning` requires a `modelPath`, which is what +`model.search` is meant to help the user pick. The broker's catalog of +installed models is the Builder UI's concern, not the provider's. In +`externalUrl` mode `GET /v1/models` works and returns the one model the +server has loaded. + +## Registration shapes + +Three registration entry points, all sharing the same options: + +- **`registerLlamaCppServerInline({ transport?, externalUrl?, defaultCtx? })`** + — main-thread inline. Primarily used in tests and any single-thread + embedding scenario. +- **`registerLlamaCppServerWorker({ transport?, externalUrl?, defaultCtx? })`** + — called inside a worker runtime. This is the primary production path. + The worker constructs its own `IBackendsTransport` (e.g., + `MessagePortBackendsTransport`) and passes it here directly — no port + transfer happens. +- **`registerLlamaCppServer({ worker })`** — main-thread proxy that + forwards jobs to a worker. The actual run-fns and transport live in + the worker; this side only exposes the provider identifier to the + registry. + +## Browser + +`@workglow/llamacpp-server/ai` resolves to a browser bundle that uses the +exact same source as the node bundle. Pure `fetch` works the same in +both. In a plain browser there is no broker to construct an +`IBackendsTransport` against, so practical use is `externalUrl` mode; +nothing in the code forbids passing a custom transport if one exists. From 5e5c8d9ca39f542d74cd0d0e4cc39a7391b1f789 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 22:41:00 +0000 Subject: [PATCH 6/8] =?UTF-8?q?feat(stable-diffusion-server):=20@workglow/?= =?UTF-8?q?stable-diffusion-server=20provider=20=E2=80=94=20run-fns,=20reg?= =?UTF-8?q?istration,=20and=20barrels?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stable-diffusion-server/package.json | 16 +- .../src/ai-runtime.browser.ts | 9 + .../stable-diffusion-server/src/ai.browser.ts | 9 + .../src/ai/StableDiffusionCppProvider.ts | 166 +++--------------- .../ai/StableDiffusionCppQueuedProvider.ts | 34 ++++ .../common/StableDiffusionCpp_Capabilities.ts | 37 ++++ .../StableDiffusionCpp_CapabilitySets.ts | 22 +++ .../ai/common/StableDiffusionCpp_Client.ts | 113 ++++++++++++ .../ai/common/StableDiffusionCpp_Constants.ts | 3 + .../ai/common/StableDiffusionCpp_ImageEdit.ts | 75 ++++++++ .../StableDiffusionCpp_ImageGenerate.ts | 77 ++++++++ .../ai/common/StableDiffusionCpp_JobRunFns.ts | 48 +++++ .../ai/common/StableDiffusionCpp_ModelInfo.ts | 55 ++++++ .../common/StableDiffusionCpp_ModelSchema.ts | 82 +++++++++ .../common/StableDiffusionCpp_ModelSearch.ts | 53 ++++++ .../ai/common/StableDiffusionCpp_ModelUtil.ts | 35 ++++ .../stable-diffusion-server/src/ai/index.ts | 17 +- .../src/ai/registerStableDiffusionCpp.ts | 30 ++-- .../ai/registerStableDiffusionCppInline.ts | 28 +++ .../ai/registerStableDiffusionCppWorker.ts | 27 +++ .../stable-diffusion-server/src/ai/runtime.ts | 9 +- 21 files changed, 785 insertions(+), 160 deletions(-) create mode 100644 providers/stable-diffusion-server/src/ai-runtime.browser.ts create mode 100644 providers/stable-diffusion-server/src/ai.browser.ts create mode 100644 providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts diff --git a/providers/stable-diffusion-server/package.json b/providers/stable-diffusion-server/package.json index b3718e830..02845c13a 100644 --- a/providers/stable-diffusion-server/package.json +++ b/providers/stable-diffusion-server/package.json @@ -12,20 +12,30 @@ "scripts": { "watch": "concurrently -c 'auto' 'bun:watch-*'", "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "watch-browser": "bun build --watch --no-clear-screen --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts", "watch-types": "tsc --watch --preserveWatchOutput", - "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'", - "build-js": "bun run build-code", + "build-package": "concurrently -c 'auto' -n 'code,browser,types' 'bun run build-code' 'bun run build-browser' 'bun run build-types'", + "build-js": "concurrently -c 'auto' -n 'code,browser' 'bun run build-code' 'bun run build-browser'", "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo", "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts", + "build-browser": "bun build --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts", "build-types": "rm -f tsconfig.tsbuildinfo && tsgo", "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0" }, "exports": { "./ai": { + "browser": { + "types": "./dist/ai.d.ts", + "import": "./dist/ai.browser.js" + }, "types": "./dist/ai.d.ts", "import": "./dist/ai.js" }, "./ai-runtime": { + "browser": { + "types": "./dist/ai-runtime.d.ts", + "import": "./dist/ai-runtime.browser.js" + }, "types": "./dist/ai-runtime.d.ts", "import": "./dist/ai-runtime.js" } @@ -66,4 +76,4 @@ "publishConfig": { "access": "public" } -} \ No newline at end of file +} diff --git a/providers/stable-diffusion-server/src/ai-runtime.browser.ts b/providers/stable-diffusion-server/src/ai-runtime.browser.ts new file mode 100644 index 000000000..a1fd9b608 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai-runtime.browser.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/runtime"; diff --git a/providers/stable-diffusion-server/src/ai.browser.ts b/providers/stable-diffusion-server/src/ai.browser.ts new file mode 100644 index 000000000..2210c547d --- /dev/null +++ b/providers/stable-diffusion-server/src/ai.browser.ts @@ -0,0 +1,9 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +// organize-imports-ignore + +export * from "./ai/index"; diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts index 2d38c2e99..eb94b10d8 100644 --- a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts +++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts @@ -4,153 +4,39 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { - AiProviderPreviewRunFn, - AiProviderRunFn, - AiProviderRunFnRegistration, - Capability, - ImageGenerateTaskInput, - ImageGenerateTaskOutput, - ModelConfig, - ModelRecord, -} from "@workglow/ai"; -import { AiProvider } from "@workglow/ai"; -import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; -import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; +import { createCloudProviderClass } from "@workglow/ai/provider-utils"; +import type { Capability, ModelRecord } from "@workglow/ai/worker"; +import { AiProvider } from "@workglow/ai/worker"; +import { + inferStableDiffusionCppCapabilities, + stableDiffusionCppWorkerRunFnSpecs, +} from "./common/StableDiffusionCpp_Capabilities"; import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants"; +import type { StableDiffusionCppModelConfig } from "./common/StableDiffusionCpp_ModelSchema"; /** - * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img` - * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used - * by OpenAI-compatible builds. Configurable so callers can switch without - * forking the provider while the Phase-8 integration spike is pending. - */ -export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations"; - -export interface IStableDiffusionCppProviderOptions { - readonly transport: IBackendsTransport; - readonly externalUrl?: string; - readonly endpoint?: StableDiffusionCppEndpoint; -} - -/** - * HTTP client for a local stable-diffusion.cpp server. If `externalUrl` is - * provided the server is assumed to already be running; otherwise the provider - * acquires a handle via `transport.ensureRunning` before each request and - * releases it afterwards. + * Worker-server registration shell for stable-diffusion.cpp. * - * v1 scope: text-to-image only. Other capabilities are not registered; the - * provider serves only image generation in v1. + * Both transport and externalUrl modes are supported. The `IBackendsTransport` + * is constructed inside the worker runtime by the caller and held by closure + * inside the run-fns — no port transfer across the worker boundary. + * Worker registration is the primary production path; inline registration + * (`StableDiffusionCppQueuedProvider`) is primarily a testing seam. */ -export class StableDiffusionCppProvider extends AiProvider { - readonly name = LOCAL_STABLE_DIFFUSION_CPP; - readonly displayName = "Local stable-diffusion.cpp (HTTP)"; - readonly isLocal = true; - readonly supportsBrowser = false; - - constructor(options: IStableDiffusionCppProviderOptions) { - const runFns: readonly AiProviderRunFnRegistration< - ImageGenerateTaskInput, - ImageGenerateTaskOutput, - ModelConfig - >[] = [ - { - serves: ["image.generation"] as readonly Capability[], - runFn: createStableDiffusionCppImageGenerateRunFn(options) as AiProviderRunFn< - ImageGenerateTaskInput, - ImageGenerateTaskOutput, - ModelConfig - >, - }, - ]; - - const previewTasks: Record< - string, - AiProviderPreviewRunFn - > = {}; - - super(runFns, previewTasks); +export class StableDiffusionCppProvider extends createCloudProviderClass( + AiProvider, + { + name: LOCAL_STABLE_DIFFUSION_CPP, + displayName: "Local stable-diffusion.cpp (HTTP)", + isLocal: true, + supportsBrowser: true, } - +) { override inferCapabilities(model: ModelRecord): readonly Capability[] { - return (model.capabilities as readonly Capability[] | undefined) ?? ["image.generation"]; + return inferStableDiffusionCppCapabilities(model); } -} - -// ───────────────────────────────────────────────────────────────────────────── -// Image-generation run-fn -// ───────────────────────────────────────────────────────────────────────────── - -/** - * One-shot run-fn for text-to-image generation via stable-diffusion.cpp HTTP server. - * - * Endpoint is selected via {@link IStableDiffusionCppProviderOptions.endpoint} - * (defaults to `/txt2img`). Request: `POST ` with `{ "prompt": "..." }`. - * Response: `{ "images": ["", ...] }` — the first image is used. - */ -function createStableDiffusionCppImageGenerateRunFn( - options: IStableDiffusionCppProviderOptions -): AiProviderRunFn { - const endpoint = options.endpoint ?? "/txt2img"; - return async (input, model, signal, emit) => { - signal?.throwIfAborted?.(); - - const body = JSON.stringify({ prompt: input.prompt }); - - // Acquire base URL — either from external override or via transport. - let baseUrl: string; - let handle: IRunningHandle | undefined; - - if (options.externalUrl) { - baseUrl = options.externalUrl.replace(/\/$/, ""); - } else { - if (!model?.model_id) { - throw new Error( - "StableDiffusionCppProvider: model.model_id is required to acquire a backend" - ); - } - handle = await options.transport.ensureRunning({ - backend: "stable-diffusion-server", - modelPath: model.model_id, - opts: {}, - }); - baseUrl = handle.url.replace(/\/$/, ""); - } - try { - signal?.throwIfAborted?.(); - - const response = await fetch(`${baseUrl}${endpoint}`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body, - signal, - }); - - if (!response.ok) { - const text = await response.text().catch(() => "(no body)"); - throw new Error( - `StableDiffusionCppProvider: HTTP ${response.status} from ${endpoint} — ${text}` - ); - } - - const json = (await response.json()) as { images?: string[] }; - const base64 = json.images?.[0]; - if (!base64) { - throw new Error("StableDiffusionCppProvider: response contained no images"); - } - - // Decode base64 PNG bytes platform-neutrally and wrap in an ImageValue. - // Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic. - const binary = atob(base64); - const bytes = new Uint8Array(binary.length); - for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i); - const image = await pngBytesToImageValue(bytes, "png"); - - emit({ type: "snapshot", data: { image } }); - emit({ type: "finish", data: {} as ImageGenerateTaskOutput }); - } finally { - await handle?.release(); - } - }; + protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] { + return stableDiffusionCppWorkerRunFnSpecs(); + } } diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts new file mode 100644 index 000000000..4574544e1 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, ModelRecord } from "@workglow/ai"; +import { AiProvider } from "@workglow/ai"; +import { createCloudProviderClass } from "@workglow/ai/provider-utils"; +import { + inferStableDiffusionCppCapabilities, + stableDiffusionCppWorkerRunFnSpecs, +} from "./common/StableDiffusionCpp_Capabilities"; +import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants"; +import type { StableDiffusionCppModelConfig } from "./common/StableDiffusionCpp_ModelSchema"; + +/** Main-thread registration (inline or worker-backed). */ +export class StableDiffusionCppQueuedProvider extends createCloudProviderClass( + AiProvider, + { + name: LOCAL_STABLE_DIFFUSION_CPP, + displayName: "Local stable-diffusion.cpp (HTTP)", + isLocal: true, + supportsBrowser: true, + } +) { + override inferCapabilities(model: ModelRecord): readonly Capability[] { + return inferStableDiffusionCppCapabilities(model); + } + + protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] { + return stableDiffusionCppWorkerRunFnSpecs(); + } +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts new file mode 100644 index 000000000..2e551d1bf --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts @@ -0,0 +1,37 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability, ModelRecord } from "@workglow/ai/worker"; +import { STABLE_DIFFUSION_CPP_CAPABILITY_SETS } from "./StableDiffusionCpp_CapabilitySets"; + +export const STABLE_DIFFUSION_CPP_RUN_FN_SPECS = STABLE_DIFFUSION_CPP_CAPABILITY_SETS.map( + (serves) => ({ serves }) +); + +export function stableDiffusionCppWorkerRunFnSpecs(): readonly { + readonly serves: readonly Capability[]; +}[] { + return STABLE_DIFFUSION_CPP_RUN_FN_SPECS; +} + +type CapabilityHints = Pick; + +/** + * sd-server hosts generative image models. Every valid record gets the full + * generative set (image.generation + image.editing + meta-ops). If the record + * has explicit capabilities and no identifying fields, declared wins; + * otherwise the baseline is meta-ops only. + */ +export function inferStableDiffusionCppCapabilities(model: CapabilityHints): readonly Capability[] { + const pc = model.provider_config as { model_path?: string; model_name?: string } | undefined; + const id = String(pc?.model_path ?? pc?.model_name ?? model.model_id ?? ""); + if (id.length > 0) { + return ["image.generation", "image.editing", "model.info", "model.search"]; + } + const declared = (model.capabilities as readonly Capability[] | undefined) ?? []; + if (declared.length > 0) return declared; + return ["model.info", "model.search"]; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts new file mode 100644 index 000000000..99a5a38a6 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts @@ -0,0 +1,22 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Capability } from "@workglow/ai/worker"; + +export const STABLE_DIFFUSION_CPP_IMAGE_GENERATION = [ + "image.generation", +] as const satisfies Capability[]; +export const STABLE_DIFFUSION_CPP_IMAGE_EDITING = ["image.editing"] as const satisfies Capability[]; +export const STABLE_DIFFUSION_CPP_MODEL_INFO = ["model.info"] as const satisfies Capability[]; +export const STABLE_DIFFUSION_CPP_MODEL_SEARCH = ["model.search"] as const satisfies Capability[]; + +/** Order MUST match STABLE_DIFFUSION_CPP_RUN_FNS in JobRunFns. */ +export const STABLE_DIFFUSION_CPP_CAPABILITY_SETS = [ + STABLE_DIFFUSION_CPP_IMAGE_GENERATION, + STABLE_DIFFUSION_CPP_IMAGE_EDITING, + STABLE_DIFFUSION_CPP_MODEL_SEARCH, + STABLE_DIFFUSION_CPP_MODEL_INFO, +] as const; diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts new file mode 100644 index 000000000..205be3448 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts @@ -0,0 +1,113 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; + +/** + * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img` + * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used + * by OpenAI-compatible builds. + */ +export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations"; + +/** + * Provider-construction options shared across registrations. + * + * `transport` and `externalUrl` are both optional, but the resolver throws + * at acquisition time if no URL source resolves for a given request. + */ +export interface IStableDiffusionCppProviderOptions { + readonly transport?: IBackendsTransport; + readonly externalUrl?: string; + /** Default endpoint used when neither the model nor the request overrides it. */ + readonly endpoint?: StableDiffusionCppEndpoint; +} + +/** Resolved base URL plus a release callback (no-op for externalUrl paths). */ +export interface IAcquiredBaseUrl { + readonly baseUrl: string; + readonly release: () => Promise; +} + +/** + * Resolve a base URL for one request. + * + * Precedence: + * 1. `model.provider_config.base_url` + * 2. `opts.externalUrl` + * 3. `opts.transport.ensureRunning({ ... })` — requires `provider_config.model_path` + * + * Throws with a clear message if none of the three resolves. + */ +export async function acquireBaseUrl( + model: StableDiffusionCppModelConfig | undefined, + opts: IStableDiffusionCppProviderOptions +): Promise { + const modelBaseUrl = model?.provider_config?.base_url; + if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) { + return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease }; + } + if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) { + return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease }; + } + if (opts.transport) { + const modelPath = model?.provider_config?.model_path; + if (typeof modelPath !== "string" || modelPath.length === 0) { + throw new Error( + "StableDiffusionCpp: transport-mode acquisition requires provider_config.model_path." + ); + } + const handle: IRunningHandle = await opts.transport.ensureRunning({ + backend: "stable-diffusion-server", + modelPath, + opts: {}, + }); + return { + baseUrl: stripTrailingSlash(handle.url), + release: () => handle.release(), + }; + } + throw new Error( + "StableDiffusionCpp: no base URL source — set provider_config.base_url, opts.externalUrl, or opts.transport." + ); +} + +function stripTrailingSlash(url: string): string { + return url.replace(/\/+$/, ""); +} + +const noopRelease = async (): Promise => {}; + +// ── Base64 PNG helpers ───────────────────────────────────────────────────── + +/** + * Decodes a base64-encoded PNG string to bytes platform-neutrally. + * Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic. + */ +export function decodeBase64Png(b64: string): Uint8Array { + const binary = atob(b64); + const bytes = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i); + return bytes; +} + +/** + * Encodes raw bytes to a base64 string platform-neutrally. + * Used for `image.editing` to send the source image as a base64 PNG. + */ +export function encodeBytesToBase64(bytes: Uint8Array): string { + let binary = ""; + // Process in chunks to avoid blowing the call stack for large images. + const CHUNK = 0x8000; + for (let i = 0; i < bytes.length; i += CHUNK) { + binary += String.fromCharCode.apply( + null, + Array.from(bytes.subarray(i, Math.min(i + CHUNK, bytes.length))) + ); + } + return btoa(binary); +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts index 56c976ccd..97dc9c9e0 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts @@ -5,3 +5,6 @@ */ export const LOCAL_STABLE_DIFFUSION_CPP = "LOCAL_STABLE_DIFFUSION_CPP"; + +/** Default sd.cpp HTTP endpoint when no per-request or per-model override is set. */ +export const STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT = "/txt2img" as const; diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts new file mode 100644 index 000000000..cc4dbac24 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts @@ -0,0 +1,75 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, ImageEditTaskInput, ImageEditTaskOutput } from "@workglow/ai"; +import { imageValueToPngBytes, pngBytesToImageValue } from "@workglow/ai/provider-utils"; +import { + acquireBaseUrl, + decodeBase64Png, + encodeBytesToBase64, + type IStableDiffusionCppProviderOptions, +} from "./StableDiffusionCpp_Client"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; +import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +/** + * One-shot run-fn for image + prompt -> image (img2img) via stable-diffusion.cpp. + * + * Request: `POST /img2img` with `{ prompt, init_image: base64Png, model? }`. + * Response: `{ images: [base64Png, ...] }` — first image used. + * Emits `snapshot` then `finish`. + * + * Always uses `/img2img` — no OpenAI-compat alternative because + * `/v1/images/edits` is multipart and sd.cpp doesn't speak that shape. + */ +export function createStableDiffusionCppImageEditRunFn( + opts: IStableDiffusionCppProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + const modelName = getStableDiffusionCppModelName(model); + + const inputBytes = await imageValueToPngBytes(input.image); + const initImageB64 = encodeBytesToBase64(inputBytes); + + const body = JSON.stringify({ + prompt: input.prompt, + init_image: initImageB64, + ...(modelName ? { model: modelName } : {}), + }); + + const { baseUrl, release } = await acquire(model, opts); + try { + signal?.throwIfAborted?.(); + const response = await fetch(`${baseUrl}/img2img`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `StableDiffusionCpp: HTTP ${response.status} from /img2img (image-editing) — ${text}` + ); + } + const json = (await response.json()) as { images?: string[] }; + const base64 = json.images?.[0]; + if (!base64) { + throw new Error("StableDiffusionCpp: response contained no images"); + } + const bytes = decodeBase64Png(base64); + const image = await pngBytesToImageValue(bytes, "png"); + emit({ type: "snapshot", data: { image } }); + emit({ type: "finish", data: {} as ImageEditTaskOutput }); + } finally { + await release(); + } + }; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts new file mode 100644 index 000000000..fa2666069 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + AiProviderRunFn, + ImageGenerateTaskInput, + ImageGenerateTaskOutput, +} from "@workglow/ai"; +import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; +import { + acquireBaseUrl, + decodeBase64Png, + type IStableDiffusionCppProviderOptions, +} from "./StableDiffusionCpp_Client"; +import { STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT } from "./StableDiffusionCpp_Constants"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; +import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +/** + * One-shot run-fn for text -> image via stable-diffusion.cpp HTTP server. + * Endpoint resolution: model.provider_config.endpoint > opts.endpoint > + * STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT (`/txt2img`). + * + * Request: `POST ` with `{ prompt, model?, ...optional params }`. + * Response: `{ images: [base64Png, ...] }` — first image used. + * Emits a `snapshot` with the decoded image, then `finish`. + */ +export function createStableDiffusionCppImageGenerateRunFn( + opts: IStableDiffusionCppProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + + const endpoint = + model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT; + const modelName = getStableDiffusionCppModelName(model); + + const body = JSON.stringify({ + prompt: input.prompt, + ...(modelName ? { model: modelName } : {}), + }); + + const { baseUrl, release } = await acquire(model, opts); + try { + signal?.throwIfAborted?.(); + const response = await fetch(`${baseUrl}${endpoint}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + signal, + }); + if (!response.ok) { + const text = await response.text().catch(() => "(no body)"); + throw new Error( + `StableDiffusionCpp: HTTP ${response.status} from ${endpoint} (image-generation) — ${text}` + ); + } + const json = (await response.json()) as { images?: string[] }; + const base64 = json.images?.[0]; + if (!base64) { + throw new Error("StableDiffusionCpp: response contained no images"); + } + const bytes = decodeBase64Png(base64); + const image = await pngBytesToImageValue(bytes, "png"); + emit({ type: "snapshot", data: { image } }); + emit({ type: "finish", data: {} as ImageGenerateTaskOutput }); + } finally { + await release(); + } + }; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts new file mode 100644 index 000000000..9256058ca --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFnRegistration } from "@workglow/ai"; +import { + STABLE_DIFFUSION_CPP_IMAGE_EDITING, + STABLE_DIFFUSION_CPP_IMAGE_GENERATION, + STABLE_DIFFUSION_CPP_MODEL_INFO, + STABLE_DIFFUSION_CPP_MODEL_SEARCH, +} from "./StableDiffusionCpp_CapabilitySets"; +import { type IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client"; +import { createStableDiffusionCppImageEditRunFn } from "./StableDiffusionCpp_ImageEdit"; +import { createStableDiffusionCppImageGenerateRunFn } from "./StableDiffusionCpp_ImageGenerate"; +import { createStableDiffusionCppModelInfoRunFn } from "./StableDiffusionCpp_ModelInfo"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; +import { createStableDiffusionCppModelSearchRunFn } from "./StableDiffusionCpp_ModelSearch"; + +export function buildStableDiffusionCppRunFns( + opts: IStableDiffusionCppProviderOptions +): readonly AiProviderRunFnRegistration< + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + any, + StableDiffusionCppModelConfig +>[] { + return [ + { + serves: STABLE_DIFFUSION_CPP_IMAGE_GENERATION, + runFn: createStableDiffusionCppImageGenerateRunFn(opts), + }, + { + serves: STABLE_DIFFUSION_CPP_IMAGE_EDITING, + runFn: createStableDiffusionCppImageEditRunFn(opts), + }, + { + serves: STABLE_DIFFUSION_CPP_MODEL_SEARCH, + runFn: createStableDiffusionCppModelSearchRunFn(opts), + }, + { + serves: STABLE_DIFFUSION_CPP_MODEL_INFO, + runFn: createStableDiffusionCppModelInfoRunFn(opts), + }, + ]; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts new file mode 100644 index 000000000..03726b6ad --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts @@ -0,0 +1,55 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai"; +import { + acquireBaseUrl, + type IStableDiffusionCppProviderOptions, +} from "./StableDiffusionCpp_Client"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; +import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil"; + +type AcquireFn = typeof acquireBaseUrl; + +export function createStableDiffusionCppModelInfoRunFn( + opts: IStableDiffusionCppProviderOptions, + acquire: AcquireFn = acquireBaseUrl +): AiProviderRunFn { + return async (input, model, signal, emit) => { + signal?.throwIfAborted?.(); + let is_loaded = false; + const expectedName = getStableDiffusionCppModelName(model); + + try { + const { baseUrl, release } = await acquire(model, opts); + try { + const res = await fetch(`${baseUrl}/v1/models`, { signal }); + if (res.ok) { + const body = (await res.json()) as { data?: Array<{ id?: string }> }; + is_loaded = !!body.data?.some((m) => m.id === expectedName); + } + } finally { + await release(); + } + } catch { + // Server unreachable or /v1/models not implemented — leave is_loaded false. + } + + emit({ + type: "finish", + data: { + model: input.model, + is_local: true, + is_remote: false, + supports_browser: true, + supports_node: true, + is_cached: false, + is_loaded, + file_sizes: null, + }, + }); + }; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts new file mode 100644 index 000000000..4090723c2 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts @@ -0,0 +1,82 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { ModelConfigSchema, ModelRecordSchema } from "@workglow/ai/worker"; +import type { DataPortSchemaObject, FromSchema } from "@workglow/util/worker"; +import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants"; + +/** + * Provider-config schema for `@workglow/stable-diffusion-server`. + * + * Required fields: + * - `provider` — discriminator + * - `provider_config.model_path` — absolute path passed to the broker; OR `base_url` if externalUrl-mode + * + * Either `model_path` (transport mode) OR `base_url` (externalUrl mode) must be set + * for a usable record. The provider resolver throws at runtime if neither resolves. + */ +export const StableDiffusionCppModelSchema = { + type: "object", + properties: { + provider: { + const: LOCAL_STABLE_DIFFUSION_CPP, + description: "Discriminator: local stable-diffusion.cpp HTTP provider.", + }, + provider_config: { + type: "object", + description: "stable-diffusion.cpp-specific configuration.", + properties: { + model_path: { + type: "string", + description: + "Absolute filesystem path to the .gguf or .safetensors model. Required for transport-mode acquisition.", + }, + model_name: { + type: "string", + description: + "Optional logical model name sent as OpenAI `model` field when using the `/v1/images/generations` endpoint.", + }, + base_url: { + type: "string", + description: + "Optional per-record base URL override. Takes precedence over provider-level externalUrl. Used for records discovered via externalUrl-mode model.search.", + }, + endpoint: { + type: "string", + description: + "Optional per-record endpoint override. Either `/txt2img` (default sd.cpp HTTP API) or `/v1/images/generations` (OpenAI-compatible builds). Overrides provider-level default.", + }, + }, + additionalProperties: false, + }, + }, + required: ["provider", "provider_config"], + additionalProperties: true, +} as const satisfies DataPortSchemaObject; + +export const StableDiffusionCppModelRecordSchema = { + type: "object", + properties: { + ...ModelRecordSchema.properties, + ...StableDiffusionCppModelSchema.properties, + }, + required: [...ModelRecordSchema.required, ...StableDiffusionCppModelSchema.required], + additionalProperties: false, +} as const satisfies DataPortSchemaObject; + +export type StableDiffusionCppModelRecord = FromSchema; + +export const StableDiffusionCppModelConfigSchema = { + type: "object", + properties: { + ...ModelConfigSchema.properties, + ...StableDiffusionCppModelSchema.properties, + }, + required: [...ModelConfigSchema.required, ...StableDiffusionCppModelSchema.required], + additionalProperties: false, +} as const satisfies DataPortSchemaObject; + +export type StableDiffusionCppModelConfig = FromSchema; diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts new file mode 100644 index 000000000..bba4b0e4f --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts @@ -0,0 +1,53 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai"; +import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils"; +import type { IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client"; +import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants"; +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; + +export function createStableDiffusionCppModelSearchRunFn( + opts: IStableDiffusionCppProviderOptions +): AiProviderRunFn { + return async (input, _model, signal, emit) => { + signal?.throwIfAborted?.(); + if (!opts.externalUrl) { + emit({ type: "finish", data: { results: [] } }); + return; + } + const baseUrl = opts.externalUrl.replace(/\/+$/, ""); + try { + const res = await fetch(`${baseUrl}/v1/models`, { signal }); + if (!res.ok) { + emit({ type: "finish", data: { results: [] } }); + return; + } + const body = (await res.json()) as { data?: Array<{ id: string }> }; + const results = (body.data ?? []).map((m) => ({ + id: m.id, + label: m.id, + description: m.id, + record: { + model_id: m.id, + provider: LOCAL_STABLE_DIFFUSION_CPP, + title: m.id, + description: "", + capabilities: [], + provider_config: { model_name: m.id, base_url: baseUrl }, + metadata: {}, + }, + raw: m, + })); + emit({ + type: "finish", + data: { results: filterModelSearchResultsByQuery(results, input.query) }, + }); + } catch { + emit({ type: "finish", data: { results: [] } }); + } + }; +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts new file mode 100644 index 000000000..2d2190c65 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; + +/** + * Returns the logical model name to send in the OpenAI `model` field for + * `/v1/images/generations`-shape requests. sd.cpp's native `/txt2img` ignores + * this value, so we fall back to model_path, then model_id, then the empty string. + */ +export function getStableDiffusionCppModelName( + model: StableDiffusionCppModelConfig | undefined +): string { + const pc = model?.provider_config; + return String(pc?.model_name ?? pc?.model_path ?? model?.model_id ?? ""); +} + +/** + * Returns the absolute filesystem path used by `transport.ensureRunning`. + * Required for transport-mode acquisition; throws if missing. + */ +export function getStableDiffusionCppModelPath( + model: StableDiffusionCppModelConfig | undefined +): string { + const path = model?.provider_config?.model_path; + if (typeof path !== "string" || path.length === 0) { + throw new Error( + "StableDiffusionCpp: provider_config.model_path is required for transport-mode acquisition." + ); + } + return path; +} diff --git a/providers/stable-diffusion-server/src/ai/index.ts b/providers/stable-diffusion-server/src/ai/index.ts index 5df0a99bc..a511a3750 100644 --- a/providers/stable-diffusion-server/src/ai/index.ts +++ b/providers/stable-diffusion-server/src/ai/index.ts @@ -7,5 +7,20 @@ // organize-imports-ignore export * from "./common/StableDiffusionCpp_Constants"; -export * from "./StableDiffusionCppProvider"; +export * from "./common/StableDiffusionCpp_ModelSchema"; +export * from "./common/StableDiffusionCpp_Capabilities"; +export * from "./common/StableDiffusionCpp_CapabilitySets"; export * from "./registerStableDiffusionCpp"; +export * from "./registerStableDiffusionCppInline"; +export * from "./registerStableDiffusionCppWorker"; + +import { STABLE_DIFFUSION_CPP_RUN_FN_SPECS } from "./common/StableDiffusionCpp_Capabilities"; +import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns"; +import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider"; + +/** @internal */ +export const _testOnly = { + StableDiffusionCppQueuedProvider, + STABLE_DIFFUSION_CPP_RUN_FN_SPECS, + buildStableDiffusionCppRunFns, +} as const; diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts index 4b5db64dc..7e8d12fd0 100644 --- a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts +++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts @@ -5,24 +5,24 @@ */ import type { AiProviderRegisterOptions } from "@workglow/ai"; -import type { IBackendsTransport } from "@workglow/ai/provider-utils"; -import { registerProviderInline } from "@workglow/ai/provider-utils"; -import type { StableDiffusionCppEndpoint } from "./StableDiffusionCppProvider"; -import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider"; - -export interface IRegisterStableDiffusionCppOptions extends AiProviderRegisterOptions { - readonly transport: IBackendsTransport; - readonly externalUrl?: string; - readonly endpoint?: StableDiffusionCppEndpoint; -} +import { registerProviderWithWorker } from "@workglow/ai/provider-utils"; +import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider"; +/** + * Main-thread worker-backed registration. The provider proxy lives on the + * main thread and forwards jobs to the worker, which holds the real run-fns. + * + * Use {@link registerStableDiffusionCppInline} for transport mode within a + * single thread. + */ export async function registerStableDiffusionCpp( - options: IRegisterStableDiffusionCppOptions + options: AiProviderRegisterOptions & { + worker: Worker | (() => Worker); + } ): Promise { - const { transport, externalUrl, endpoint, ...registerOptions } = options; - await registerProviderInline( - new StableDiffusionCppProvider({ transport, externalUrl, endpoint }), + await registerProviderWithWorker( + new StableDiffusionCppQueuedProvider(), "StableDiffusionCpp", - registerOptions + options ); } diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts new file mode 100644 index 000000000..c2b8271fa --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts @@ -0,0 +1,28 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { AiProviderRegisterOptions } from "@workglow/ai"; +import { registerProviderInline } from "@workglow/ai/provider-utils"; +import { type IStableDiffusionCppProviderOptions } from "./common/StableDiffusionCpp_Client"; +import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns"; +import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider"; + +export interface IRegisterStableDiffusionCppInlineOptions + extends AiProviderRegisterOptions, IStableDiffusionCppProviderOptions {} + +/** Main-thread inline registration. Supports transport mode. */ +export async function registerStableDiffusionCppInline( + options: IRegisterStableDiffusionCppInlineOptions = {} +): Promise { + const { transport, externalUrl, endpoint, ...registerOptions } = options; + await registerProviderInline( + new StableDiffusionCppQueuedProvider( + buildStableDiffusionCppRunFns({ transport, externalUrl, endpoint }) + ), + "StableDiffusionCpp", + registerOptions + ); +} diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts new file mode 100644 index 000000000..6c9dfa166 --- /dev/null +++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts @@ -0,0 +1,27 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { registerProviderWorker } from "@workglow/ai/provider-utils"; +import type { IStableDiffusionCppProviderOptions } from "./common/StableDiffusionCpp_Client"; +import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns"; +import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider"; + +/** + * Worker-server-side registration. Supports both transport and externalUrl modes. + * Transport is constructed inside this worker runtime by the caller and held + * by closure in the run-fns. Primary production path. + */ +export async function registerStableDiffusionCppWorker( + options: IStableDiffusionCppProviderOptions = {} +): Promise { + await registerProviderWorker( + (ws) => + new StableDiffusionCppProvider(buildStableDiffusionCppRunFns(options)).registerOnWorkerServer( + ws + ), + "StableDiffusionCpp" + ); +} diff --git a/providers/stable-diffusion-server/src/ai/runtime.ts b/providers/stable-diffusion-server/src/ai/runtime.ts index 094645c24..4fea72a1a 100644 --- a/providers/stable-diffusion-server/src/ai/runtime.ts +++ b/providers/stable-diffusion-server/src/ai/runtime.ts @@ -6,4 +6,11 @@ // organize-imports-ignore -export * from "./registerStableDiffusionCpp"; +export * from "./common/StableDiffusionCpp_Client"; +export * from "./common/StableDiffusionCpp_ImageGenerate"; +export * from "./common/StableDiffusionCpp_ImageEdit"; +export * from "./common/StableDiffusionCpp_ModelInfo"; +export * from "./common/StableDiffusionCpp_ModelSearch"; +export * from "./common/StableDiffusionCpp_JobRunFns"; +export * from "./registerStableDiffusionCppInline"; +export * from "./registerStableDiffusionCppWorker"; From 4638a83ab45290cdbde3d20a087720f5edd2f238 Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 22:58:13 +0000 Subject: [PATCH 7/8] test(stable-diffusion-server): full unit and integration test suite --- .../LlamaCppServerProvider.test.ts | 4 +- .../LlamaCppServer_Client.test.ts | 30 ++-- .../LlamaCppServer_ModelInfo.test.ts | 8 +- .../LlamaCppServer_ModelSearch.test.ts | 18 ++- .../LlamaCppServer_TextEmbedding.test.ts | 2 +- ...lamaCppServer_TextGenerationStream.test.ts | 4 +- .../LlamaCppServer_ToolCalling.test.ts | 2 +- .../LocalBackendsProviderContracts.test.ts | 18 ++- .../StableDiffusionCppProvider.test.ts | 134 ++++++++++++++++++ .../StableDiffusionCpp_Client.test.ts | 111 +++++++++++++++ ...leDiffusionCpp_Generic.integration.test.ts | 37 +++++ .../StableDiffusionCpp_ImageEdit.test.ts | 77 ++++++++++ .../StableDiffusionCpp_ImageGenerate.test.ts | 79 +++++++++++ .../StableDiffusionCpp_ModelInfo.test.ts | 56 ++++++++ .../StableDiffusionCpp_ModelSearch.test.ts | 72 ++++++++++ .../src/ai/common/LlamaCppServer_Client.ts | 96 ++++++++++++- .../src/ai/common/LlamaCppServer_ModelInfo.ts | 10 +- .../ai/common/LlamaCppServer_ModelSearch.ts | 10 +- .../ai/common/LlamaCppServer_TextEmbedding.ts | 8 +- .../common/LlamaCppServer_TextGeneration.ts | 3 +- .../ai/common/LlamaCppServer_TextRewriter.ts | 3 +- .../ai/common/LlamaCppServer_TextSummary.ts | 3 +- .../ai/common/LlamaCppServer_ToolCalling.ts | 3 +- .../ai/common/StableDiffusionCpp_Client.ts | 96 ++++++++++++- .../ai/common/StableDiffusionCpp_ImageEdit.ts | 3 +- .../StableDiffusionCpp_ImageGenerate.ts | 15 +- .../ai/common/StableDiffusionCpp_ModelInfo.ts | 3 +- .../common/StableDiffusionCpp_ModelSearch.ts | 10 +- 28 files changed, 858 insertions(+), 57 deletions(-) create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts index 9150c1585..b55f1ede1 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts @@ -129,7 +129,7 @@ describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)", const release = vi.fn().mockResolvedValue(undefined); const transport = fakeTransport(); transport.ensureRunning.mockResolvedValue({ - url: "http://broker:9999", + url: "http://127.0.0.1:9999", release, } as IRunningHandle); @@ -166,7 +166,7 @@ describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)", opts: { ctx: 4096 }, }); const fetchedUrl = String(fetchSpy.mock.calls[0]![0]); - expect(fetchedUrl).toBe("http://broker:9999/v1/chat/completions"); + expect(fetchedUrl).toBe("http://127.0.0.1:9999/v1/chat/completions"); expect(release).toHaveBeenCalledTimes(1); fetchSpy.mockRestore(); }); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts index fecfa6516..721819f62 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts @@ -24,10 +24,10 @@ describe("acquireBaseUrl precedence", () => { it("prefers model.provider_config.base_url over everything", async () => { const transport = fakeTransport(); const result = await acquireBaseUrl( - { provider_config: { base_url: "http://from-model:8080/" } } as any, - { externalUrl: "http://from-opts:8080", transport } + { provider_config: { base_url: "http://localhost:8080/" } } as any, + { externalUrl: "http://127.0.0.1:8081", transport } ); - expect(result.baseUrl).toBe("http://from-model:8080"); + expect(result.baseUrl).toBe("http://localhost:8080"); expect(transport.ensureRunning).not.toHaveBeenCalled(); await result.release(); // no-op }); @@ -35,10 +35,10 @@ describe("acquireBaseUrl precedence", () => { it("prefers opts.externalUrl over transport when no model.base_url", async () => { const transport = fakeTransport(); const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, { - externalUrl: "http://from-opts:8080", + externalUrl: "http://127.0.0.1:8081", transport, }); - expect(result.baseUrl).toBe("http://from-opts:8080"); + expect(result.baseUrl).toBe("http://127.0.0.1:8081"); expect(transport.ensureRunning).not.toHaveBeenCalled(); await result.release(); // no-op }); @@ -47,7 +47,7 @@ describe("acquireBaseUrl precedence", () => { const release = vi.fn().mockResolvedValue(undefined); const transport = fakeTransport(); transport.ensureRunning.mockResolvedValue({ - url: "http://broker:9999/", + url: "http://127.0.0.1:9999/", release, } as IRunningHandle); const result = await acquireBaseUrl( @@ -59,7 +59,7 @@ describe("acquireBaseUrl precedence", () => { modelPath: "/abs/m.gguf", opts: { ctx: 8192 }, }); - expect(result.baseUrl).toBe("http://broker:9999"); + expect(result.baseUrl).toBe("http://127.0.0.1:9999"); await result.release(); expect(release).toHaveBeenCalledTimes(1); }); @@ -67,7 +67,7 @@ describe("acquireBaseUrl precedence", () => { it("uses defaultCtx when model has no ctx override", async () => { const transport = fakeTransport(); transport.ensureRunning.mockResolvedValue({ - url: "http://broker:9999", + url: "http://127.0.0.1:9999", release: vi.fn(), } as IRunningHandle); await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, { @@ -88,6 +88,20 @@ describe("acquireBaseUrl precedence", () => { ); }); + it("rejects public model URLs before requests can use them", async () => { + await expect( + acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {}) + ).rejects.toThrow(/local HTTP/); + }); + + it("normalizes slash-heavy local URLs", async () => { + const result = await acquireBaseUrl( + { provider_config: { base_url: `http://127.0.0.1:8080${"/".repeat(1_000)}` } } as any, + {} + ); + expect(result.baseUrl).toBe("http://127.0.0.1:8080"); + }); + it("throws when no source resolves", async () => { await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow( /no base URL source/ diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts index 654f75f8e..2ba9e3aa2 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts @@ -17,7 +17,7 @@ describe("createLlamaCppServerModelInfoStream", () => { const emit = (e: any) => events.push(e); await fn( { detail: "dimensions", model: "m" } as any, - { provider_config: { base_url: "http://x:8080", native_dimensions: 768 } } as any, + { provider_config: { base_url: "http://localhost:8080", native_dimensions: 768 } } as any, undefined as any, emit ); @@ -39,7 +39,7 @@ describe("createLlamaCppServerModelInfoStream", () => { const emit = (e: any) => events.push(e); await fn( { detail: "dimensions", model: "m" } as any, - { provider_config: { base_url: "http://x:8080" } } as any, + { provider_config: { base_url: "http://localhost:8080" } } as any, undefined as any, emit ); @@ -55,7 +55,7 @@ describe("createLlamaCppServerModelInfoStream", () => { const emit = (e: any) => events.push(e); await fn( { model: "m" } as any, - { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any, + { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any, undefined as any, emit ); @@ -69,7 +69,7 @@ describe("createLlamaCppServerModelInfoStream", () => { const emit = (e: any) => events.push(e); await fn( { model: "m" } as any, - { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any, + { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any, undefined as any, emit ); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts index 4c934d408..41248708d 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts @@ -24,7 +24,7 @@ describe("createLlamaCppServerModelSearchStream", () => { vi.spyOn(globalThis, "fetch").mockResolvedValue( new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 }) ); - const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" }); const events: any[] = []; const emit = (e: any) => events.push(e); await fn({ query: "" } as any, undefined as any, undefined as any, emit); @@ -32,25 +32,35 @@ describe("createLlamaCppServerModelSearchStream", () => { expect(results).toHaveLength(1); expect(results[0].id).toBe("loaded-model"); expect(results[0].record.provider).toBe("LOCAL_LLAMACPP_SERVER"); - expect(results[0].record.provider_config.base_url).toBe("http://x:8080"); + expect(results[0].record.provider_config.base_url).toBe("http://localhost:8080"); }); it("returns [] when fetch fails", async () => { vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED")); - const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" }); const events: any[] = []; const emit = (e: any) => events.push(e); await fn({ query: "" } as any, undefined as any, undefined as any, emit); expect(events.at(-1)!.data.results).toEqual([]); }); + it("does not fetch public externalUrl values", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch"); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "https://example.com:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(events.at(-1)!.data.results).toEqual([]); + }); + it("filters by query case-insensitively", async () => { vi.spyOn(globalThis, "fetch").mockResolvedValue( new Response(JSON.stringify({ data: [{ id: "Llama-3" }, { id: "Mistral" }] }), { status: 200, }) ); - const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" }); + const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" }); const events: any[] = []; const emit = (e: any) => events.push(e); await fn({ query: "llama" } as any, undefined as any, undefined as any, emit); diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts index 3f991b362..18fe9dba6 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts @@ -9,7 +9,7 @@ import { afterEach, describe, expect, it, vi } from "vitest"; afterEach(() => vi.restoreAllMocks()); -const model = { provider_config: { base_url: "http://x:8080", model_name: "emb" } } as any; +const model = { provider_config: { base_url: "http://localhost:8080", model_name: "emb" } } as any; describe("createLlamaCppServerTextEmbeddingStream", () => { it("returns a single Float32Array for string input", async () => { diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts index 5efa1ced7..49ad0974b 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts @@ -27,7 +27,7 @@ afterEach(() => { }); describe("createLlamaCppServerTextGenerationStream", () => { - const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any; + const model = { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any; it("yields text-delta events for each delta line and a final finish", async () => { const fetchSpy = vi @@ -41,7 +41,7 @@ describe("createLlamaCppServerTextGenerationStream", () => { expect(fetchSpy).toHaveBeenCalledTimes(1); const [url] = fetchSpy.mock.calls[0]!; - expect(String(url)).toBe("http://x:8080/v1/chat/completions"); + expect(String(url)).toBe("http://localhost:8080/v1/chat/completions"); expect(events.filter((e) => e.type === "text-delta").map((e) => e.textDelta)).toEqual([ "Hel", "lo", diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts index 7030fe8a5..9b1279cb2 100644 --- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts +++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts @@ -21,7 +21,7 @@ function sseChunks(chunks: object[]): Response { afterEach(() => vi.restoreAllMocks()); -const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any; +const model = { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any; const TOOLS = [ { name: "add", diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts index d7d3db18c..cbe6cf032 100644 --- a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts +++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts @@ -19,7 +19,10 @@ import type { } from "@workglow/ai/provider-utils"; import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; import { LOCAL_LLAMACPP_SERVER, registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai"; -import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai"; +import { + LOCAL_STABLE_DIFFUSION_CPP, + registerStableDiffusionCppInline, +} from "@workglow/stable-diffusion-server/ai"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; vi.mock("@workglow/ai/provider-utils", async (importOriginal) => { @@ -167,10 +170,11 @@ describe("local backend provider stream contracts", () => { it("stable-diffusion emits the generated image as a snapshot before finish", async () => { const { release, transport } = createTransportStub(); - const provider = new StableDiffusionCppProvider({ transport }); - await provider.register(); + await registerStableDiffusionCppInline({ transport }); - const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["image.generation"]); + const runFn = getAiProviderRegistry().getRunFnFor(LOCAL_STABLE_DIFFUSION_CPP, [ + "image.generation", + ]); expect(runFn).toBeDefined(); globalThis.fetch = vi.fn( @@ -188,7 +192,11 @@ describe("local backend provider stream contracts", () => { const events = await runProviderStream( runFn!, { prompt: "draw a cat" }, - { model_id: "/models/stable-diffusion.gguf" } + { + model_id: "sd-test", + provider: LOCAL_STABLE_DIFFUSION_CPP, + provider_config: { model_path: "/models/stable-diffusion.gguf" }, + } ); expect(events).toEqual([ diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts new file mode 100644 index 000000000..2d227ce86 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts @@ -0,0 +1,134 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelRecord } from "@workglow/ai"; +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { _testOnly } from "@workglow/stable-diffusion-server/ai"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +vi.mock("@workglow/ai/provider-utils", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })), + }; +}); + +const { + StableDiffusionCppQueuedProvider, + STABLE_DIFFUSION_CPP_RUN_FN_SPECS, + buildStableDiffusionCppRunFns, +} = _testOnly; + +function model( + model_id: string, + provider_config: Record = { model_path: `/models/${model_id}` }, + capabilities: readonly string[] = [] +): ModelRecord { + return { + model_id, + title: model_id, + description: "", + provider: "LOCAL_STABLE_DIFFUSION_CPP", + provider_config, + capabilities: [...capabilities], + metadata: {}, + } as ModelRecord; +} + +describe("StableDiffusionCppQueuedProvider.inferCapabilities", () => { + const provider = new StableDiffusionCppQueuedProvider(buildStableDiffusionCppRunFns({})); + + it("infers full generative set for any non-empty model id", () => { + const caps = provider.inferCapabilities(model("sd-1.5.gguf")); + expect([...caps].sort()).toEqual([ + "image.editing", + "image.generation", + "model.info", + "model.search", + ]); + }); + + it("falls back to declared caps when id is empty", () => { + const caps = provider.inferCapabilities(model("", {}, ["image.generation"])); + expect(caps).toEqual(["image.generation"]); + }); + + it("falls back to baseline meta-ops when nothing declared and nothing matches", () => { + const caps = provider.inferCapabilities(model("", {})); + expect(caps).toEqual(["model.info", "model.search"]); + }); +}); + +describe("StableDiffusionCpp capability-set parity", () => { + it("STABLE_DIFFUSION_CPP_RUN_FN_SPECS matches buildStableDiffusionCppRunFns({}) serves shapes", () => { + const fns = buildStableDiffusionCppRunFns({}); + const fnsServes = fns.map((r) => [...r.serves].sort().join(",")); + const specsServes = STABLE_DIFFUSION_CPP_RUN_FN_SPECS.map((s) => + [...s.serves].sort().join(",") + ); + expect(specsServes).toEqual(fnsServes); + }); +}); + +describe("StableDiffusionCpp run-fn shape", () => { + it("registers a runFn for every canonical capability set", () => { + const sets = buildStableDiffusionCppRunFns({}).map((r) => [...r.serves].sort().join(",")); + expect(sets).toContain("image.generation"); + expect(sets).toContain("image.editing"); + expect(sets).toContain("model.search"); + expect(sets).toContain("model.info"); + }); +}); + +function fakeTransport(): IBackendsTransport & { + ensureRunning: ReturnType; +} { + return { + ensureRunning: vi.fn(), + subscribeStatus: vi.fn(() => () => undefined), + install: vi.fn(), + list: vi.fn(), + uninstall: vi.fn(), + } as unknown as IBackendsTransport & { ensureRunning: ReturnType }; +} + +describe("StableDiffusionCpp transport-mode run-fn (parity across inline + worker)", () => { + afterEach(() => vi.restoreAllMocks()); + + it("acquires URL via transport and releases the handle (image.generation)", async () => { + const release = vi.fn().mockResolvedValue(undefined); + const transport = fakeTransport(); + transport.ensureRunning.mockResolvedValue({ + url: "http://127.0.0.1:9999", + release, + } as IRunningHandle); + + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(new Response(JSON.stringify({ images: ["aGk="] }), { status: 200 })); + + const fns = buildStableDiffusionCppRunFns({ transport }); + const imageGen = fns.find((r) => r.serves.join(",") === "image.generation")!; + const events: any[] = []; + const emit = (e: any) => events.push(e); + await imageGen.runFn( + { prompt: "hi" } as any, + { provider_config: { model_path: "/abs/m.gguf" } } as any, + undefined as any, + emit + ); + + expect(transport.ensureRunning).toHaveBeenCalledWith({ + backend: "stable-diffusion-server", + modelPath: "/abs/m.gguf", + opts: {}, + }); + const fetchedUrl = String(fetchSpy.mock.calls[0]![0]); + expect(fetchedUrl).toBe("http://127.0.0.1:9999/txt2img"); + expect(release).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts new file mode 100644 index 000000000..68573fa1b --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts @@ -0,0 +1,111 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils"; +import { + acquireBaseUrl, + decodeBase64Png, + encodeBytesToBase64, +} from "@workglow/stable-diffusion-server/ai-runtime"; +import { describe, expect, it, vi } from "vitest"; + +function fakeTransport(): IBackendsTransport & { + ensureRunning: ReturnType; +} { + return { + ensureRunning: vi.fn(), + subscribeStatus: vi.fn(() => () => undefined), + install: vi.fn(), + list: vi.fn(), + uninstall: vi.fn(), + } as unknown as IBackendsTransport & { ensureRunning: ReturnType }; +} + +describe("acquireBaseUrl precedence", () => { + it("prefers model.provider_config.base_url over everything", async () => { + const transport = fakeTransport(); + const result = await acquireBaseUrl( + { provider_config: { base_url: "http://localhost:8080/" } } as any, + { externalUrl: "http://127.0.0.1:8081", transport } + ); + expect(result.baseUrl).toBe("http://localhost:8080"); + expect(transport.ensureRunning).not.toHaveBeenCalled(); + await result.release(); // no-op + }); + + it("prefers opts.externalUrl over transport when no model.base_url", async () => { + const transport = fakeTransport(); + const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, { + externalUrl: "http://127.0.0.1:8081", + transport, + }); + expect(result.baseUrl).toBe("http://127.0.0.1:8081"); + expect(transport.ensureRunning).not.toHaveBeenCalled(); + await result.release(); // no-op + }); + + it("falls back to transport.ensureRunning when neither URL is set", async () => { + const release = vi.fn().mockResolvedValue(undefined); + const transport = fakeTransport(); + transport.ensureRunning.mockResolvedValue({ + url: "http://127.0.0.1:9999/", + release, + } as IRunningHandle); + const result = await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, { + transport, + }); + expect(transport.ensureRunning).toHaveBeenCalledWith({ + backend: "stable-diffusion-server", + modelPath: "/abs/m.gguf", + opts: {}, + }); + expect(result.baseUrl).toBe("http://127.0.0.1:9999"); + await result.release(); + expect(release).toHaveBeenCalledTimes(1); + }); + + it("throws when transport mode is selected but model_path is missing", async () => { + const transport = fakeTransport(); + await expect(acquireBaseUrl({ provider_config: {} } as any, { transport })).rejects.toThrow( + /model_path/ + ); + }); + + it("rejects public model URLs before requests can use them", async () => { + await expect( + acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {}) + ).rejects.toThrow(/local HTTP/); + }); + + it("normalizes slash-heavy local URLs", async () => { + const result = await acquireBaseUrl( + { provider_config: { base_url: `http://127.0.0.1:8080${"/".repeat(1_000)}` } } as any, + {} + ); + expect(result.baseUrl).toBe("http://127.0.0.1:8080"); + }); + + it("throws when no source resolves", async () => { + await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow( + /no base URL source/ + ); + }); +}); + +describe("decodeBase64Png / encodeBytesToBase64 roundtrip", () => { + it("decode then encode produces the same string for small payloads", () => { + const original = btoa("hello PNG bytes"); + const bytes = decodeBase64Png(original); + expect(encodeBytesToBase64(bytes)).toBe(original); + }); + + it("handles binary bytes (high values)", () => { + const bytes = new Uint8Array([0, 1, 254, 255, 128, 64]); + const b64 = encodeBytesToBase64(bytes); + const decoded = decodeBase64Png(b64); + expect(Array.from(decoded)).toEqual(Array.from(bytes)); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts new file mode 100644 index 000000000..67ab62c6b --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts @@ -0,0 +1,37 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + createStableDiffusionCppImageGenerateRunFn, + createStableDiffusionCppModelSearchRunFn, +} from "@workglow/stable-diffusion-server/ai-runtime"; +import { describe, expect, it } from "vitest"; + +const RUN = process.env.RUN_SD_SERVER_TESTS === "1"; +const BASE_URL = process.env.SD_SERVER_URL ?? "http://localhost:7860"; + +describe.skipIf(!RUN)("StableDiffusionCpp integration (real server)", () => { + const model = { + provider_config: { base_url: BASE_URL, model_name: "model" }, + } as any; + + it("image.generation produces a snapshot with an image", async () => { + const fn = createStableDiffusionCppImageGenerateRunFn({ externalUrl: BASE_URL }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ prompt: "a small red square" } as any, model, undefined as any, emit); + expect(events.some((e) => e.type === "snapshot")).toBe(true); + expect(events.at(-1)!.type).toBe("finish"); + }); + + it("model.search returns at least one entry via /v1/models", async () => { + const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: BASE_URL }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(events.at(-1)!.data.results.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts new file mode 100644 index 000000000..c1ca66b59 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts @@ -0,0 +1,77 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createStableDiffusionCppImageEditRunFn } from "@workglow/stable-diffusion-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +vi.mock("@workglow/ai/provider-utils", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + imageValueToPngBytes: vi.fn(async () => new Uint8Array([1, 2, 3, 4])), + pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })), + }; +}); + +afterEach(() => vi.restoreAllMocks()); + +const model = { + provider_config: { base_url: "http://localhost:8080", model_name: "sd1.5" }, +} as any; + +describe("createStableDiffusionCppImageEditRunFn", () => { + it("encodes input image as base64 PNG and POSTs to /img2img", async () => { + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(new Response(JSON.stringify({ images: ["aGVsbG8="] }), { status: 200 })); + const fn = createStableDiffusionCppImageEditRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { prompt: "make it blue", image: { kind: "input-image" } } as any, + model, + undefined as any, + emit + ); + const [url, init] = fetchSpy.mock.calls[0]!; + expect(String(url)).toBe("http://localhost:8080/img2img"); + const body = JSON.parse(String((init as RequestInit).body)); + expect(body.prompt).toBe("make it blue"); + expect(typeof body.init_image).toBe("string"); + expect(body.init_image.length).toBeGreaterThan(0); // base64 of [1,2,3,4] + expect(body.model).toBe("sd1.5"); + expect(events.some((e) => e.type === "snapshot")).toBe(true); + expect(events.at(-1)!.type).toBe("finish"); + }); + + it("throws on non-2xx", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("bad", { status: 400 })); + const fn = createStableDiffusionCppImageEditRunFn({}); + await expect( + fn( + { prompt: "x", image: { kind: "input-image" } } as any, + model, + undefined as any, + () => undefined + ) + ).rejects.toThrow(/HTTP 400/); + }); + + it("throws when response contains no images", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({}), { status: 200 }) + ); + const fn = createStableDiffusionCppImageEditRunFn({}); + await expect( + fn( + { prompt: "x", image: { kind: "input-image" } } as any, + model, + undefined as any, + () => undefined + ) + ).rejects.toThrow(/no images/); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts new file mode 100644 index 000000000..bcc141e77 --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts @@ -0,0 +1,79 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createStableDiffusionCppImageGenerateRunFn } from "@workglow/stable-diffusion-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +vi.mock("@workglow/ai/provider-utils", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })), + }; +}); + +afterEach(() => vi.restoreAllMocks()); + +const model = { + provider_config: { base_url: "http://localhost:8080", model_name: "sd1.5" }, +} as any; + +describe("createStableDiffusionCppImageGenerateRunFn", () => { + it("POSTs to /txt2img by default and emits snapshot + finish", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ images: ["aGVsbG8="] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }) + ); + const fn = createStableDiffusionCppImageGenerateRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ prompt: "draw a cat" } as any, model, undefined as any, emit); + const [url] = fetchSpy.mock.calls[0]!; + expect(String(url)).toBe("http://localhost:8080/txt2img"); + expect(events.some((e) => e.type === "snapshot")).toBe(true); + expect(events.at(-1)!.type).toBe("finish"); + }); + + it("uses the OpenAI-compat endpoint when configured at the model level", async () => { + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(new Response(JSON.stringify({ images: ["aGk="] }), { status: 200 })); + const fn = createStableDiffusionCppImageGenerateRunFn({}); + await fn( + { prompt: "x" } as any, + { + provider_config: { + base_url: "http://localhost:8080", + model_name: "sd1.5", + endpoint: "/v1/images/generations", + }, + } as any, + undefined as any, + () => undefined + ); + expect(String(fetchSpy.mock.calls[0]![0])).toBe("http://localhost:8080/v1/images/generations"); + }); + + it("throws on non-2xx with informative message", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("nope", { status: 500 })); + const fn = createStableDiffusionCppImageGenerateRunFn({}); + await expect( + fn({ prompt: "x" } as any, model, undefined as any, () => undefined) + ).rejects.toThrow(/HTTP 500/); + }); + + it("throws when response contains no images", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ images: [] }), { status: 200 }) + ); + const fn = createStableDiffusionCppImageGenerateRunFn({}); + await expect( + fn({ prompt: "x" } as any, model, undefined as any, () => undefined) + ).rejects.toThrow(/no images/); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts new file mode 100644 index 000000000..b39c5bf2b --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts @@ -0,0 +1,56 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createStableDiffusionCppModelInfoRunFn } from "@workglow/stable-diffusion-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +afterEach(() => vi.restoreAllMocks()); + +describe("createStableDiffusionCppModelInfoRunFn", () => { + it("reports is_loaded=true when /v1/models includes the model name", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "m" }, { id: "other" }] }), { status: 200 }) + ); + const fn = createStableDiffusionCppModelInfoRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: "m" } as any, + { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(true); + }); + + it("reports is_loaded=false when server unreachable", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED")); + const fn = createStableDiffusionCppModelInfoRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: "m" } as any, + { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(false); + }); + + it("reports is_loaded=false when /v1/models 404s", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("", { status: 404 })); + const fn = createStableDiffusionCppModelInfoRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn( + { model: "m" } as any, + { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any, + undefined as any, + emit + ); + expect(events.at(-1)!.data.is_loaded).toBe(false); + }); +}); diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts new file mode 100644 index 000000000..eb0a85e4d --- /dev/null +++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts @@ -0,0 +1,72 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createStableDiffusionCppModelSearchRunFn } from "@workglow/stable-diffusion-server/ai-runtime"; +import { afterEach, describe, expect, it, vi } from "vitest"; + +afterEach(() => vi.restoreAllMocks()); + +describe("createStableDiffusionCppModelSearchRunFn", () => { + it("returns [] when no externalUrl set", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch"); + const fn = createStableDiffusionCppModelSearchRunFn({}); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(events.at(-1)!.data.results).toEqual([]); + }); + + it("returns mapped results from /v1/models when externalUrl set", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 }) + ); + const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + const results = events.at(-1)!.data.results; + expect(results).toHaveLength(1); + expect(results[0].id).toBe("loaded-model"); + expect(results[0].record.provider).toBe("LOCAL_STABLE_DIFFUSION_CPP"); + expect(results[0].record.provider_config.base_url).toBe("http://localhost:8080"); + }); + + it("returns [] when fetch fails", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED")); + const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(events.at(-1)!.data.results).toEqual([]); + }); + + it("does not fetch public externalUrl values", async () => { + const fetchSpy = vi.spyOn(globalThis, "fetch"); + const fn = createStableDiffusionCppModelSearchRunFn({ + externalUrl: "https://example.com:8080", + }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "" } as any, undefined as any, undefined as any, emit); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(events.at(-1)!.data.results).toEqual([]); + }); + + it("filters by query case-insensitively", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({ data: [{ id: "sd-1.5" }, { id: "Flux-1" }] }), { + status: 200, + }) + ); + const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" }); + const events: any[] = []; + const emit = (e: any) => events.push(e); + await fn({ query: "flux" } as any, undefined as any, undefined as any, emit); + const results = events.at(-1)!.data.results; + expect(results.map((r: any) => r.id)).toEqual(["Flux-1"]); + }); +}); diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts index 232b4316c..fb3b2f139 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts @@ -43,10 +43,10 @@ export async function acquireBaseUrl( ): Promise { const modelBaseUrl = model?.provider_config?.base_url; if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) { - return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease }; + return { baseUrl: normalizeServerBaseUrl(modelBaseUrl), release: noopRelease }; } if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) { - return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease }; + return { baseUrl: normalizeServerBaseUrl(opts.externalUrl), release: noopRelease }; } if (opts.transport) { const modelPath = model?.provider_config?.model_path; @@ -65,7 +65,7 @@ export async function acquireBaseUrl( opts: { ctx }, }); return { - baseUrl: stripTrailingSlash(handle.url), + baseUrl: normalizeServerBaseUrl(handle.url), release: () => handle.release(), }; } @@ -74,8 +74,94 @@ export async function acquireBaseUrl( ); } -function stripTrailingSlash(url: string): string { - return url.replace(/\/+$/, ""); +export function normalizeServerBaseUrl(rawUrl: string): string { + let url: URL; + try { + url = new URL(rawUrl); + } catch { + throw new Error("LlamaCppServer: base URL must be a valid local HTTP(S) URL."); + } + + if (url.protocol !== "http:" && url.protocol !== "https:") { + throw new Error("LlamaCppServer: base URL must be a valid local HTTP(S) URL."); + } + if (url.username || url.password) { + throw new Error("LlamaCppServer: base URL must not include credentials."); + } + if (!isLocalHostname(url.hostname)) { + throw new Error("LlamaCppServer: base URL must target a local HTTP(S) server."); + } + + url.hash = ""; + url.search = ""; + let pathnameEnd = url.pathname.length; + while (pathnameEnd > 1 && url.pathname.charCodeAt(pathnameEnd - 1) === 47) { + pathnameEnd--; + } + const pathname = url.pathname.slice(0, pathnameEnd); + return pathname === "/" ? url.origin : `${url.origin}${pathname}`; +} + +export function buildServerUrl(baseUrl: string, endpoint: `/${string}`): string { + const base = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`; + const path = endpoint.startsWith("/") ? endpoint.slice(1) : endpoint; + return new URL(path, base).toString(); +} + +function isLocalHostname(hostname: string): boolean { + const host = removeIpv6Brackets(hostname.toLowerCase()); + if (host === "localhost" || host.endsWith(".localhost")) { + return true; + } + return isLocalIpv4(host) || isLocalIpv6(host); +} + +function removeIpv6Brackets(hostname: string): string { + if (hostname.startsWith("[") && hostname.endsWith("]")) { + return hostname.slice(1, -1); + } + return hostname; +} + +function isLocalIpv4(hostname: string): boolean { + const parts = hostname.split("."); + if (parts.length !== 4) { + return false; + } + const octets: number[] = []; + for (const part of parts) { + if (part.length === 0) { + return false; + } + for (const char of part) { + if (char < "0" || char > "9") { + return false; + } + } + const octet = Number(part); + if (!Number.isInteger(octet) || octet < 0 || octet > 255) { + return false; + } + octets.push(octet); + } + + const [first, second] = octets; + return ( + first === 10 || + first === 127 || + (first === 172 && second >= 16 && second <= 31) || + (first === 192 && second === 168) || + (first === 169 && second === 254) + ); +} + +function isLocalIpv6(hostname: string): boolean { + return ( + hostname === "::1" || + hostname.startsWith("fc") || + hostname.startsWith("fd") || + hostname.startsWith("fe80:") + ); } const noopRelease = async (): Promise => {}; diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts index f2801948b..d732b2c00 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts @@ -5,7 +5,11 @@ */ import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai"; -import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import { + acquireBaseUrl, + buildServerUrl, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; @@ -26,7 +30,7 @@ export function createLlamaCppServerModelInfoStream( try { const { baseUrl, release } = await acquire(model, opts); try { - const res = await fetch(`${baseUrl}/props`, { signal }); + const res = await fetch(buildServerUrl(baseUrl, "/props"), { signal }); if (res.ok) { const props = (await res.json()) as { default_generation_settings?: { n_embd?: number }; @@ -64,7 +68,7 @@ export function createLlamaCppServerModelInfoStream( try { const { baseUrl, release } = await acquire(model, opts); try { - const res = await fetch(`${baseUrl}/v1/models`, { signal }); + const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal }); if (res.ok) { const body = (await res.json()) as { data?: Array<{ id?: string }> }; is_loaded = !!body.data?.some((m) => m.id === expectedName); diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts index 5e97e4acb..36517dfd5 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts @@ -6,7 +6,11 @@ import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai"; import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils"; -import type { ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import { + buildServerUrl, + normalizeServerBaseUrl, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants"; import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; @@ -24,9 +28,9 @@ export function createLlamaCppServerModelSearchStream( emit({ type: "finish", data: { results: [] } }); return; } - const baseUrl = opts.externalUrl.replace(/\/+$/, ""); try { - const res = await fetch(`${baseUrl}/v1/models`, { signal }); + const baseUrl = normalizeServerBaseUrl(opts.externalUrl); + const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal }); if (!res.ok) { emit({ type: "finish", data: { results: [] } }); return; diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts index 7f242c632..b8004cc5c 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts @@ -9,7 +9,11 @@ import type { TextEmbeddingTaskInput, TextEmbeddingTaskOutput, } from "@workglow/ai"; -import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client"; +import { + acquireBaseUrl, + buildServerUrl, + type ILlamaCppServerProviderOptions, +} from "./LlamaCppServer_Client"; import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema"; import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil"; @@ -32,7 +36,7 @@ export function createLlamaCppServerTextEmbeddingStream( }); const { baseUrl, release } = await acquire(model, opts); try { - const response = await fetch(`${baseUrl}/v1/embeddings`, { + const response = await fetch(buildServerUrl(baseUrl, "/v1/embeddings"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts index 29c674674..014685ad4 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts @@ -11,6 +11,7 @@ import type { } from "@workglow/ai"; import { acquireBaseUrl, + buildServerUrl, readChatCompletionDeltas, type ILlamaCppServerProviderOptions, } from "./LlamaCppServer_Client"; @@ -77,7 +78,7 @@ export function createLlamaCppServerTextGenerationStream( const { baseUrl, release } = await acquire(model, opts); try { signal?.throwIfAborted?.(); - const response = await fetch(`${baseUrl}/v1/chat/completions`, { + const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts index c0266ad82..35bab3425 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts @@ -7,6 +7,7 @@ import type { AiProviderRunFn, TextRewriterTaskInput, TextRewriterTaskOutput } from "@workglow/ai"; import { acquireBaseUrl, + buildServerUrl, readChatCompletionDeltas, type ILlamaCppServerProviderOptions, } from "./LlamaCppServer_Client"; @@ -31,7 +32,7 @@ export function createLlamaCppServerTextRewriterStream( }); const { baseUrl, release } = await acquire(model, opts); try { - const response = await fetch(`${baseUrl}/v1/chat/completions`, { + const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts index 14c914cd5..28286bdbd 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts @@ -7,6 +7,7 @@ import type { AiProviderRunFn, TextSummaryTaskInput, TextSummaryTaskOutput } from "@workglow/ai"; import { acquireBaseUrl, + buildServerUrl, readChatCompletionDeltas, type ILlamaCppServerProviderOptions, } from "./LlamaCppServer_Client"; @@ -31,7 +32,7 @@ export function createLlamaCppServerTextSummaryStream( }); const { baseUrl, release } = await acquire(model, opts); try { - const response = await fetch(`${baseUrl}/v1/chat/completions`, { + const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts index 7f4575234..686275e85 100644 --- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts +++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts @@ -20,6 +20,7 @@ import { import { parsePartialJson } from "@workglow/util/worker"; import { acquireBaseUrl, + buildServerUrl, readChatCompletionDeltas, type ILlamaCppServerProviderOptions, } from "./LlamaCppServer_Client"; @@ -57,7 +58,7 @@ export function createLlamaCppServerToolCallingStream( }); const { baseUrl, release } = await acquire(model, opts); try { - const response = await fetch(`${baseUrl}/v1/chat/completions`, { + const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts index 205be3448..29bbe9c07 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts @@ -49,10 +49,10 @@ export async function acquireBaseUrl( ): Promise { const modelBaseUrl = model?.provider_config?.base_url; if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) { - return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease }; + return { baseUrl: normalizeServerBaseUrl(modelBaseUrl), release: noopRelease }; } if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) { - return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease }; + return { baseUrl: normalizeServerBaseUrl(opts.externalUrl), release: noopRelease }; } if (opts.transport) { const modelPath = model?.provider_config?.model_path; @@ -67,7 +67,7 @@ export async function acquireBaseUrl( opts: {}, }); return { - baseUrl: stripTrailingSlash(handle.url), + baseUrl: normalizeServerBaseUrl(handle.url), release: () => handle.release(), }; } @@ -76,8 +76,94 @@ export async function acquireBaseUrl( ); } -function stripTrailingSlash(url: string): string { - return url.replace(/\/+$/, ""); +export function normalizeServerBaseUrl(rawUrl: string): string { + let url: URL; + try { + url = new URL(rawUrl); + } catch { + throw new Error("StableDiffusionCpp: base URL must be a valid local HTTP(S) URL."); + } + + if (url.protocol !== "http:" && url.protocol !== "https:") { + throw new Error("StableDiffusionCpp: base URL must be a valid local HTTP(S) URL."); + } + if (url.username || url.password) { + throw new Error("StableDiffusionCpp: base URL must not include credentials."); + } + if (!isLocalHostname(url.hostname)) { + throw new Error("StableDiffusionCpp: base URL must target a local HTTP(S) server."); + } + + url.hash = ""; + url.search = ""; + let pathnameEnd = url.pathname.length; + while (pathnameEnd > 1 && url.pathname.charCodeAt(pathnameEnd - 1) === 47) { + pathnameEnd--; + } + const pathname = url.pathname.slice(0, pathnameEnd); + return pathname === "/" ? url.origin : `${url.origin}${pathname}`; +} + +export function buildServerUrl(baseUrl: string, endpoint: `/${string}`): string { + const base = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`; + const path = endpoint.startsWith("/") ? endpoint.slice(1) : endpoint; + return new URL(path, base).toString(); +} + +function isLocalHostname(hostname: string): boolean { + const host = removeIpv6Brackets(hostname.toLowerCase()); + if (host === "localhost" || host.endsWith(".localhost")) { + return true; + } + return isLocalIpv4(host) || isLocalIpv6(host); +} + +function removeIpv6Brackets(hostname: string): string { + if (hostname.startsWith("[") && hostname.endsWith("]")) { + return hostname.slice(1, -1); + } + return hostname; +} + +function isLocalIpv4(hostname: string): boolean { + const parts = hostname.split("."); + if (parts.length !== 4) { + return false; + } + const octets: number[] = []; + for (const part of parts) { + if (part.length === 0) { + return false; + } + for (const char of part) { + if (char < "0" || char > "9") { + return false; + } + } + const octet = Number(part); + if (!Number.isInteger(octet) || octet < 0 || octet > 255) { + return false; + } + octets.push(octet); + } + + const [first, second] = octets; + return ( + first === 10 || + first === 127 || + (first === 172 && second >= 16 && second <= 31) || + (first === 192 && second === 168) || + (first === 169 && second === 254) + ); +} + +function isLocalIpv6(hostname: string): boolean { + return ( + hostname === "::1" || + hostname.startsWith("fc") || + hostname.startsWith("fd") || + hostname.startsWith("fe80:") + ); } const noopRelease = async (): Promise => {}; diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts index cc4dbac24..0e959333d 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts @@ -8,6 +8,7 @@ import type { AiProviderRunFn, ImageEditTaskInput, ImageEditTaskOutput } from "@ import { imageValueToPngBytes, pngBytesToImageValue } from "@workglow/ai/provider-utils"; import { acquireBaseUrl, + buildServerUrl, decodeBase64Png, encodeBytesToBase64, type IStableDiffusionCppProviderOptions, @@ -47,7 +48,7 @@ export function createStableDiffusionCppImageEditRunFn( const { baseUrl, release } = await acquire(model, opts); try { signal?.throwIfAborted?.(); - const response = await fetch(`${baseUrl}/img2img`, { + const response = await fetch(buildServerUrl(baseUrl, "/img2img"), { method: "POST", headers: { "Content-Type": "application/json" }, body, diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts index fa2666069..52850db64 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts @@ -12,6 +12,7 @@ import type { import { pngBytesToImageValue } from "@workglow/ai/provider-utils"; import { acquireBaseUrl, + buildServerUrl, decodeBase64Png, type IStableDiffusionCppProviderOptions, } from "./StableDiffusionCpp_Client"; @@ -37,8 +38,9 @@ export function createStableDiffusionCppImageGenerateRunFn( return async (input, model, signal, emit) => { signal?.throwIfAborted?.(); - const endpoint = - model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT; + const endpoint = resolveEndpoint( + model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT + ); const modelName = getStableDiffusionCppModelName(model); const body = JSON.stringify({ @@ -49,7 +51,7 @@ export function createStableDiffusionCppImageGenerateRunFn( const { baseUrl, release } = await acquire(model, opts); try { signal?.throwIfAborted?.(); - const response = await fetch(`${baseUrl}${endpoint}`, { + const response = await fetch(buildServerUrl(baseUrl, endpoint), { method: "POST", headers: { "Content-Type": "application/json" }, body, @@ -75,3 +77,10 @@ export function createStableDiffusionCppImageGenerateRunFn( } }; } + +function resolveEndpoint(endpoint: string): "/txt2img" | "/v1/images/generations" { + if (endpoint === "/txt2img" || endpoint === "/v1/images/generations") { + return endpoint; + } + throw new Error(`StableDiffusionCpp: unsupported image-generation endpoint ${endpoint}`); +} diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts index 03726b6ad..7460cd973 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts @@ -7,6 +7,7 @@ import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai"; import { acquireBaseUrl, + buildServerUrl, type IStableDiffusionCppProviderOptions, } from "./StableDiffusionCpp_Client"; import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; @@ -26,7 +27,7 @@ export function createStableDiffusionCppModelInfoRunFn( try { const { baseUrl, release } = await acquire(model, opts); try { - const res = await fetch(`${baseUrl}/v1/models`, { signal }); + const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal }); if (res.ok) { const body = (await res.json()) as { data?: Array<{ id?: string }> }; is_loaded = !!body.data?.some((m) => m.id === expectedName); diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts index bba4b0e4f..e27e14402 100644 --- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts +++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts @@ -6,7 +6,11 @@ import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai"; import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils"; -import type { IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client"; +import { + buildServerUrl, + normalizeServerBaseUrl, + type IStableDiffusionCppProviderOptions, +} from "./StableDiffusionCpp_Client"; import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants"; import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema"; @@ -19,9 +23,9 @@ export function createStableDiffusionCppModelSearchRunFn( emit({ type: "finish", data: { results: [] } }); return; } - const baseUrl = opts.externalUrl.replace(/\/+$/, ""); try { - const res = await fetch(`${baseUrl}/v1/models`, { signal }); + const baseUrl = normalizeServerBaseUrl(opts.externalUrl); + const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal }); if (!res.ok) { emit({ type: "finish", data: { results: [] } }); return; From ec5cc74d3e28c7ebad60a8cbebfb46153a3e4b3d Mon Sep 17 00:00:00 2001 From: Steven Roussey Date: Sat, 23 May 2026 23:07:36 +0000 Subject: [PATCH 8/8] =?UTF-8?q?docs(stable-diffusion-server):=20README=20?= =?UTF-8?q?=E2=80=94=20install,=20quickstart,=20capability=20table,=20brow?= =?UTF-8?q?ser=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- providers/stable-diffusion-server/README.md | 125 ++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 providers/stable-diffusion-server/README.md diff --git a/providers/stable-diffusion-server/README.md b/providers/stable-diffusion-server/README.md new file mode 100644 index 000000000..5861874c9 --- /dev/null +++ b/providers/stable-diffusion-server/README.md @@ -0,0 +1,125 @@ +# `@workglow/stable-diffusion-server` + +OpenAI-compatible HTTP client for an upstream +[`stable-diffusion.cpp`](https://github.com/leejet/stable-diffusion.cpp) server. + +This package **does not bundle stable-diffusion.cpp**. It speaks to a +running `sd-server` process — either one you start yourself +(`externalUrl` mode) or one acquired through an `IBackendsTransport` +(`transport` mode, used by the Workglow Builder's broker). + +## Install + +```bash +bun add @workglow/stable-diffusion-server +``` + +You also need `@workglow/ai`, `@workglow/task-graph`, `@workglow/storage`, +`@workglow/job-queue`, and `@workglow/util` (peer dependencies). + +## Quickstart — `externalUrl` mode + +Start `sd-server` yourself, then point the provider at it: + +```bash +sd-server -m ./models/sd-1.5.gguf --port 7860 --listen +``` + +```ts +import { registerStableDiffusionCppInline } from "@workglow/stable-diffusion-server/ai-runtime"; + +await registerStableDiffusionCppInline({ + externalUrl: "http://localhost:7860", +}); +``` + +The provider is now visible to the registry as `LOCAL_STABLE_DIFFUSION_CPP`. + +## Quickstart — `transport` mode (Electron + broker) + +```ts +import { registerStableDiffusionCppInline } from "@workglow/stable-diffusion-server/ai-runtime"; + +await registerStableDiffusionCppInline({ + transport: backendsTransport, // your IBackendsTransport implementation + endpoint: "/txt2img", +}); +``` + +In transport mode each model record must include +`provider_config.model_path` — the absolute path to the model file. The +broker spawns one `sd-server` per `modelPath`, shared by refcount. + +## Model record shape + +```ts +{ + model_id: "sd-1.5", + provider: "LOCAL_STABLE_DIFFUSION_CPP", + provider_config: { + model_path: "/abs/path/to/sd-1.5.gguf", // required for transport mode + model_name: "sd-1.5", // optional; sent as OpenAI `model` field + base_url: "http://localhost:7860", // optional per-record override + endpoint: "/txt2img", // optional per-record endpoint override + }, + capabilities: [], + metadata: {}, +} +``` + +## Supported capabilities + +| Capability | Endpoint | Notes | +|---|---|---| +| `image.generation` | `POST /txt2img` (or `POST /v1/images/generations`) | txt2img — endpoint flavor configurable, see below | +| `image.editing` | `POST /img2img` | img2img with base64-encoded init image | +| `model.info` | derived from acquired URL | Reports `is_loaded` based on broker handle / externalUrl | +| `model.search` | `GET /v1/models` | externalUrl mode only — see below | + +### Endpoint flavor: `/txt2img` vs `/v1/images/generations` + +`image.generation` supports two request shapes, selectable per record +(via `provider_config.endpoint`) or per provider (via the +`registerStableDiffusionCpp*({ endpoint })` option): + +- **`/txt2img`** — the conventional stable-diffusion.cpp HTTP API. + Defaults to this if neither model nor provider sets one. +- **`/v1/images/generations`** — used by OpenAI-compatible sd.cpp + builds. Sends `model`, `prompt`, `n`, `size` in the OpenAI request + shape; response is parsed as `data[].b64_json`. + +`image.editing` always uses `/img2img` regardless of the txt2img +endpoint flavor. + +### Why `model.search` returns `[]` in transport mode + +`transport.ensureRunning` requires a `modelPath`, which is what +`model.search` is meant to help the user pick. The broker's catalog of +installed models is the Builder UI's concern, not the provider's. In +`externalUrl` mode `GET /v1/models` works and returns whatever the +server enumerates. + +## Registration shapes + +Three registration entry points, all sharing the same options +(`{ transport?, externalUrl?, endpoint? }`): + +- **`registerStableDiffusionCppInline(options)`** — main-thread inline. + Primarily used in tests and any single-thread scenario. +- **`registerStableDiffusionCppWorker(options)`** — called inside a + worker runtime. This is the primary production path. The worker + constructs its own `IBackendsTransport` (e.g., + `MessagePortBackendsTransport`) and passes it here directly — no port + transfer happens. +- **`registerStableDiffusionCpp({ worker })`** — main-thread proxy that + forwards jobs to a worker. The actual run-fns and transport live in + the worker; this side only exposes the provider identifier to the + registry. + +## Browser + +`@workglow/stable-diffusion-server/ai` resolves to a browser bundle that +uses the exact same source as the node bundle. Pure `fetch` works the +same in both. In a plain browser there is no broker to construct an +`IBackendsTransport` against, so practical use is `externalUrl` mode; +nothing in the code forbids passing a custom transport if one exists.