From a911c5467642edc885714f5225baea7a345a6317 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Fri, 22 May 2026 00:33:44 +0000
Subject: [PATCH 1/8] feat(ai): IBackendsTransport interface + provider package
 scaffolding

---
 bun.lock                                      |  54 +++++
 packages/ai/src/provider-utils.ts             |   1 +
 .../src/provider-utils/IBackendsTransport.ts  | 120 ++++++++++
 packages/test/package.json                    |   3 +
 .../LocalBackendsProviderContracts.test.ts    | 197 ++++++++++++++++
 .../test/ai/IBackendsTransport.types.test.ts  |  75 +++++++
 providers/llamacpp-server/package.json        |  69 ++++++
 providers/llamacpp-server/src/ai-runtime.ts   |   9 +
 providers/llamacpp-server/src/ai.ts           |   9 +
 .../src/ai/LlamaCppServerProvider.ts          | 210 ++++++++++++++++++
 .../src/ai/common/LlamaCppServer_Constants.ts |   7 +
 providers/llamacpp-server/src/ai/index.ts     |  11 +
 .../src/ai/registerLlamaCppServer.ts          |  27 +++
 providers/llamacpp-server/src/ai/runtime.ts   |   9 +
 providers/llamacpp-server/tsconfig.json       |  29 +++
 providers/mlx/package.json                    |  69 ++++++
 providers/mlx/src/ai-runtime.ts               |   9 +
 providers/mlx/src/ai.ts                       |   9 +
 providers/mlx/src/ai/MlxProvider.ts           |  76 +++++++
 providers/mlx/src/ai/common/Mlx_Constants.ts  |   9 +
 providers/mlx/src/ai/index.ts                 |  11 +
 providers/mlx/src/ai/registerMlx.ts           |  15 ++
 providers/mlx/src/ai/runtime.ts               |   9 +
 providers/mlx/tsconfig.json                   |  29 +++
 .../stable-diffusion-server/package.json      |  69 ++++++
 .../stable-diffusion-server/src/ai-runtime.ts |   9 +
 providers/stable-diffusion-server/src/ai.ts   |   9 +
 .../src/ai/StableDiffusionCppProvider.ts      | 156 +++++++++++++
 .../ai/common/StableDiffusionCpp_Constants.ts |   7 +
 .../stable-diffusion-server/src/ai/index.ts   |  11 +
 .../src/ai/registerStableDiffusionCpp.ts      |  28 +++
 .../stable-diffusion-server/src/ai/runtime.ts |   9 +
 .../stable-diffusion-server/tsconfig.json     |  29 +++
 33 files changed, 1393 insertions(+)
 create mode 100644 packages/ai/src/provider-utils/IBackendsTransport.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
 create mode 100644 packages/test/src/test/ai/IBackendsTransport.types.test.ts
 create mode 100644 providers/llamacpp-server/package.json
 create mode 100644 providers/llamacpp-server/src/ai-runtime.ts
 create mode 100644 providers/llamacpp-server/src/ai.ts
 create mode 100644 providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
 create mode 100644 providers/llamacpp-server/src/ai/index.ts
 create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
 create mode 100644 providers/llamacpp-server/src/ai/runtime.ts
 create mode 100644 providers/llamacpp-server/tsconfig.json
 create mode 100644 providers/mlx/package.json
 create mode 100644 providers/mlx/src/ai-runtime.ts
 create mode 100644 providers/mlx/src/ai.ts
 create mode 100644 providers/mlx/src/ai/MlxProvider.ts
 create mode 100644 providers/mlx/src/ai/common/Mlx_Constants.ts
 create mode 100644 providers/mlx/src/ai/index.ts
 create mode 100644 providers/mlx/src/ai/registerMlx.ts
 create mode 100644 providers/mlx/src/ai/runtime.ts
 create mode 100644 providers/mlx/tsconfig.json
 create mode 100644 providers/stable-diffusion-server/package.json
 create mode 100644 providers/stable-diffusion-server/src/ai-runtime.ts
 create mode 100644 providers/stable-diffusion-server/src/ai.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/index.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/runtime.ts
 create mode 100644 providers/stable-diffusion-server/tsconfig.json

diff --git a/bun.lock b/bun.lock
index bbef45094..0177b1e86 100644
--- a/bun.lock
+++ b/bun.lock
@@ -301,13 +301,16 @@
         "@workglow/javascript": "workspace:*",
         "@workglow/job-queue": "workspace:*",
         "@workglow/knowledge-base": "workspace:*",
+        "@workglow/llamacpp-server": "workspace:*",
         "@workglow/mcp": "workspace:*",
+        "@workglow/mlx": "workspace:*",
         "@workglow/node-llama-cpp": "workspace:*",
         "@workglow/ollama": "workspace:*",
         "@workglow/openai": "workspace:*",
         "@workglow/playwright": "workspace:*",
         "@workglow/postgres": "workspace:*",
         "@workglow/sqlite": "workspace:*",
+        "@workglow/stable-diffusion-server": "workspace:*",
         "@workglow/storage": "workspace:*",
         "@workglow/supabase": "workspace:*",
         "@workglow/task-graph": "workspace:*",
@@ -541,6 +544,36 @@
         "@workglow/util": "workspace:*",
       },
     },
+    "providers/llamacpp-server": {
+      "name": "@workglow/llamacpp-server",
+      "version": "0.0.1",
+      "devDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+      "peerDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/job-queue": "workspace:*",
+        "@workglow/storage": "workspace:*",
+        "@workglow/task-graph": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+    },
+    "providers/mlx": {
+      "name": "@workglow/mlx",
+      "version": "0.0.1",
+      "devDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+      "peerDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/job-queue": "workspace:*",
+        "@workglow/storage": "workspace:*",
+        "@workglow/task-graph": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+    },
     "providers/node-llama-cpp": {
       "name": "@workglow/node-llama-cpp",
       "version": "0.3.5",
@@ -666,6 +699,21 @@
         "better-sqlite3",
       ],
     },
+    "providers/stable-diffusion-server": {
+      "name": "@workglow/stable-diffusion-server",
+      "version": "0.0.1",
+      "devDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+      "peerDependencies": {
+        "@workglow/ai": "workspace:*",
+        "@workglow/job-queue": "workspace:*",
+        "@workglow/storage": "workspace:*",
+        "@workglow/task-graph": "workspace:*",
+        "@workglow/util": "workspace:*",
+      },
+    },
     "providers/supabase": {
       "name": "@workglow/supabase",
       "version": "0.3.5",
@@ -1405,8 +1453,12 @@
 
     "@workglow/knowledge-base": ["@workglow/knowledge-base@workspace:packages/knowledge-base"],
 
+    "@workglow/llamacpp-server": ["@workglow/llamacpp-server@workspace:providers/llamacpp-server"],
+
     "@workglow/mcp": ["@workglow/mcp@workspace:packages/mcp"],
 
+    "@workglow/mlx": ["@workglow/mlx@workspace:providers/mlx"],
+
     "@workglow/node-llama-cpp": ["@workglow/node-llama-cpp@workspace:providers/node-llama-cpp"],
 
     "@workglow/ollama": ["@workglow/ollama@workspace:providers/ollama"],
@@ -1419,6 +1471,8 @@
 
     "@workglow/sqlite": ["@workglow/sqlite@workspace:providers/sqlite"],
 
+    "@workglow/stable-diffusion-server": ["@workglow/stable-diffusion-server@workspace:providers/stable-diffusion-server"],
+
     "@workglow/storage": ["@workglow/storage@workspace:packages/storage"],
 
     "@workglow/supabase": ["@workglow/supabase@workspace:providers/supabase"],
diff --git a/packages/ai/src/provider-utils.ts b/packages/ai/src/provider-utils.ts
index ba076f02c..832ec1970 100644
--- a/packages/ai/src/provider-utils.ts
+++ b/packages/ai/src/provider-utils.ts
@@ -23,3 +23,4 @@ export * from "./provider-utils/imageOutputHelpers";
 export * from "./provider-utils/BaseCloudProvider";
 export * from "./provider-utils/CloudProviderClient";
 export * from "./provider-utils/OpenAIShapedChat";
+export * from "./provider-utils/IBackendsTransport";
diff --git a/packages/ai/src/provider-utils/IBackendsTransport.ts b/packages/ai/src/provider-utils/IBackendsTransport.ts
new file mode 100644
index 000000000..f4833482a
--- /dev/null
+++ b/packages/ai/src/provider-utils/IBackendsTransport.ts
@@ -0,0 +1,120 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// ────────────────────────────────────────────────────────────────────────────
+// IBackendsTransport — renderer-side abstraction over backend transport
+//
+// Provider packages (libs) consume ONLY this interface. No platform-specific
+// imports are permitted here; concrete implementations live elsewhere.
+// ────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Request payload for `IBackendsTransport.ensureRunning`.
+ *
+ * `backend` is a plain string (not the BackendName union) so that provider
+ * packages do not need to know the closed set of backend identifiers. The
+ * transport implementation resolves it to the host's concrete backend identifier.
+ */
+export interface IEnsureRunningRequest {
+  /** Backend identifier, e.g. "llamacpp-server". */
+  readonly backend: string;
+  /** Absolute path to the model file. */
+  readonly modelPath: string;
+  /**
+   * Backend-specific runtime options forwarded to the broker as opaque JSON.
+   * llamacpp uses `{ ctx: number }`; sd-cpp passes an empty object `{}`;
+   * future backends define their own schema.
+   */
+  readonly opts: Readonly<Record<string, unknown>>;
+}
+
+/**
+ * Handle returned by a successful `ensureRunning` call.
+ *
+ * Callers MUST call `release()` when done to decrement the broker's refcount.
+ * After all handles for a backend are released, the broker may shut down the
+ * backend process after its idle timeout.
+ */
+export interface IRunningHandle {
+  /** Base URL of the running backend, e.g. "http://127.0.0.1:8765". */
+  readonly url: string;
+  /**
+   * Decrements the broker's refcount for this handle. The backend may shut
+   * down after the broker's idle timeout if refcount reaches zero.
+   *
+   * The returned promise resolves once the release message has been posted
+   * to the port; the broker does not acknowledge. Errors posting (e.g. port
+   * closed) reject.
+   */
+  readonly release: () => Promise<void>;
+}
+
+/**
+ * Status snapshot for a backend.
+ *
+ * Mirrors the host transport's backend status snapshot shape without coupling
+ * this shared interface to any package-private implementation path.
+ */
+export interface IBackendStatus {
+  readonly state: "not-installed" | "installed" | "running" | "error";
+  readonly message: string | undefined;
+  readonly pinnedVersion: string | undefined;
+}
+
+/**
+ * Renderer-side transport abstraction for the backends broker.
+ *
+ * Concrete implementations obtain a channel from their host environment and
+ * speak whatever request/response protocol that host defines.
+ *
+ * Provider packages import ONLY this interface — no platform-specific imports.
+ */
+export interface IBackendsTransport {
+  /**
+   * Acquire (or share) a running backend. Resolves once the backend is healthy.
+   *
+   * Multiple callers requesting the same `(backend, modelPath, opts)` triple
+   * will share one process via the broker's refcounting. `release()` on the
+   * returned handle decrements the refcount.
+   */
+  ensureRunning(req: IEnsureRunningRequest): Promise<IRunningHandle>;
+
+  /**
+   * Subscribe to status updates for a backend.
+   *
+   * The callback fires on every subsequent broker `status` event; callers
+   * wanting an initial snapshot must invoke `list()`. Subscriptions persist
+   * across port reconnects. Implementations MUST be idempotent: calling the
+   * returned unsubscribe twice is a no-op; subscribing the same callback
+   * twice is allowed and de-duplicated.
+   *
+   * @returns An unsubscribe function. Call it to stop receiving updates.
+   */
+  subscribeStatus(backend: string, callback: (status: IBackendStatus) => void): () => void;
+
+  /**
+   * Install a backend (download + verify + extract). Resolves when the backend
+   * reaches the "installed" state. Rejects on download / verification failure.
+   *
+   * Progress is reported via the optional callback as `(bytesReceived, totalBytes)`.
+   * `total` may be 0 if the content-length is unknown.
+   */
+  install(backend: string, onProgress?: (bytes: number, total: number) => void): Promise<void>;
+
+  /**
+   * Fire-and-forget request for the broker to broadcast a `status`
+   * event for every backend in its registry. Resolves once the request
+   * has been posted (the broker does not send a discrete reply).
+   */
+  list(): Promise<void>;
+
+  /**
+   * Removes the backend's installed binary. In v1 the broker rejects
+   * this with an error; callers should handle the rejection. Future
+   * versions may implement teardown semantics.
+   */
+  uninstall(backend: string): Promise<void>;
+}
diff --git a/packages/test/package.json b/packages/test/package.json
index c52ace208..faf78286a 100644
--- a/packages/test/package.json
+++ b/packages/test/package.json
@@ -47,13 +47,16 @@
     "@workglow/javascript": "workspace:*",
     "@workglow/job-queue": "workspace:*",
     "@workglow/knowledge-base": "workspace:*",
+    "@workglow/llamacpp-server": "workspace:*",
     "@workglow/mcp": "workspace:*",
+    "@workglow/mlx": "workspace:*",
     "@workglow/node-llama-cpp": "workspace:*",
     "@workglow/ollama": "workspace:*",
     "@workglow/openai": "workspace:*",
     "@workglow/playwright": "workspace:*",
     "@workglow/postgres": "workspace:*",
     "@workglow/sqlite": "workspace:*",
+    "@workglow/stable-diffusion-server": "workspace:*",
     "@workglow/storage": "workspace:*",
     "@workglow/supabase": "workspace:*",
     "@workglow/task-graph": "workspace:*",
diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
new file mode 100644
index 000000000..7e0f04c5d
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
@@ -0,0 +1,197 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn } from "@workglow/ai";
+import {
+  AiProviderRegistry,
+  createEmitQueue,
+  getAiProviderRegistry,
+  setAiProviderRegistry,
+} from "@workglow/ai";
+import type {
+  IBackendStatus,
+  IBackendsTransport,
+  IEnsureRunningRequest,
+  IRunningHandle,
+} from "@workglow/ai/provider-utils";
+import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
+import { LlamaCppServerProvider } from "@workglow/llamacpp-server/ai";
+import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("@workglow/ai/provider-utils", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@workglow/ai/provider-utils")>();
+  return {
+    ...actual,
+    pngBytesToImageValue: vi.fn(),
+  };
+});
+
+const originalFetch = globalThis.fetch;
+const MOCK_IMAGE = { kind: "mock-image" } as const;
+
+interface ITransportStub {
+  readonly ensureRunning: ReturnType<typeof vi.fn>;
+  readonly release: ReturnType<typeof vi.fn>;
+  readonly transport: IBackendsTransport;
+}
+
+function createTransportStub(url = "http://127.0.0.1:8765"): ITransportStub {
+  const release = vi.fn(async (): Promise<void> => undefined);
+  const ensureRunning = vi.fn(
+    async (_req: IEnsureRunningRequest): Promise<IRunningHandle> => ({
+      url,
+      release,
+    })
+  );
+  const transport: IBackendsTransport = {
+    ensureRunning,
+    subscribeStatus: (
+      _backend: string,
+      _callback: (status: IBackendStatus) => void
+    ): (() => void) => {
+      return (): void => undefined;
+    },
+    install: async (_backend: string): Promise<void> => undefined,
+    list: async (): Promise<void> => undefined,
+    uninstall: async (_backend: string): Promise<void> => undefined,
+  };
+
+  return { ensureRunning, release, transport };
+}
+
+async function runProviderStream(
+  runFn: AiProviderRunFn<any, any>,
+  input: Record<string, unknown>,
+  model: Record<string, unknown>,
+  timeoutMs = 100
+): Promise<unknown[]> {
+  const q = createEmitQueue<unknown>();
+  const events: unknown[] = [];
+  const controller = new AbortController();
+
+  const runPromise = runFn(input as any, model as any, controller.signal, (event: unknown) =>
+    q.push(event)
+  ).then(
+    () => q.close(),
+    (error: unknown) => q.fail(error)
+  );
+  const consumePromise = (async (): Promise<void> => {
+    for await (const event of q.iterable) {
+      events.push(event);
+    }
+  })();
+
+  await Promise.race([
+    Promise.all([runPromise, consumePromise]),
+    new Promise<never>((_, reject) => {
+      setTimeout(() => reject(new Error(`timed out after ${timeoutMs}ms`)), timeoutMs);
+    }),
+  ]);
+
+  return events;
+}
+
+describe("local backend provider stream contracts", () => {
+  beforeEach(() => {
+    setAiProviderRegistry(new AiProviderRegistry());
+    globalThis.fetch = originalFetch;
+    vi.clearAllMocks();
+    vi.mocked(pngBytesToImageValue).mockResolvedValue(MOCK_IMAGE as any);
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+  });
+
+  it("llama.cpp stops after [DONE] even if the server keeps the stream open", async () => {
+    const { release, transport } = createTransportStub();
+    const provider = new LlamaCppServerProvider({ transport });
+    await provider.register();
+
+    const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["text.generation"]);
+    expect(runFn).toBeDefined();
+
+    const payload = new TextEncoder().encode(
+      'data: {"choices":[{"delta":{"content":"hello"}}]}\n\ndata: [DONE]\n\n'
+    );
+
+    let resolvePendingRead:
+      | ((value: { readonly done: boolean; readonly value?: Uint8Array }) => void)
+      | undefined;
+    let readCount = 0;
+    const reader = {
+      read: vi.fn(() => {
+        readCount += 1;
+        if (readCount === 1) {
+          return Promise.resolve({ done: false, value: payload });
+        }
+        return new Promise<{ readonly done: boolean; readonly value?: Uint8Array }>((resolve) => {
+          resolvePendingRead = resolve;
+        });
+      }),
+      cancel: vi.fn(async (): Promise<void> => {
+        resolvePendingRead?.({ done: true });
+      }),
+      releaseLock: vi.fn((): void => undefined),
+    };
+
+    globalThis.fetch = vi.fn(
+      async () =>
+        ({
+          ok: true,
+          body: { getReader: () => reader },
+        }) as unknown as Response
+    ) as unknown as typeof fetch;
+
+    const events = await runProviderStream(
+      runFn!,
+      { prompt: "hello" },
+      { model_id: "/models/llama.gguf" }
+    );
+
+    expect(events).toEqual([
+      { type: "text-delta", port: "text", textDelta: "hello" },
+      { type: "finish", data: {} },
+    ]);
+    expect(reader.releaseLock).toHaveBeenCalledTimes(1);
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+
+  it("stable-diffusion emits the generated image as a snapshot before finish", async () => {
+    const { release, transport } = createTransportStub();
+    const provider = new StableDiffusionCppProvider({ transport });
+    await provider.register();
+
+    const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["image.generation"]);
+    expect(runFn).toBeDefined();
+
+    globalThis.fetch = vi.fn(
+      async () =>
+        ({
+          ok: true,
+          json: async (): Promise<{ images: string[] }> => ({
+            images: [
+              "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/aFEAAAAASUVORK5CYII=",
+            ],
+          }),
+        }) as unknown as Response
+    ) as unknown as typeof fetch;
+
+    const events = await runProviderStream(
+      runFn!,
+      { prompt: "draw a cat" },
+      { model_id: "/models/stable-diffusion.gguf" }
+    );
+
+    expect(events).toEqual([
+      { type: "snapshot", data: { image: MOCK_IMAGE } },
+      { type: "finish", data: {} },
+    ]);
+    expect(pngBytesToImageValue).toHaveBeenCalledTimes(1);
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/packages/test/src/test/ai/IBackendsTransport.types.test.ts b/packages/test/src/test/ai/IBackendsTransport.types.test.ts
new file mode 100644
index 000000000..d002ae162
--- /dev/null
+++ b/packages/test/src/test/ai/IBackendsTransport.types.test.ts
@@ -0,0 +1,75 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// ────────────────────────────────────────────────────────────────────────────
+// Compile-time conformance tests for IBackendsTransport.
+//
+// These tests run via the test runner but their value is in `tsc` accepting
+// (or rejecting) the declarations below. No runtime assertions of substance:
+// if the file compiles, the contract holds.
+//
+// Lives under packages/test/src/test/ai/ so that scripts/test.ts picks it up
+// (the runner only scans packages/test/src/test).
+// ────────────────────────────────────────────────────────────────────────────
+
+import type {
+  IBackendsTransport,
+  IBackendStatus,
+  IEnsureRunningRequest,
+  IRunningHandle,
+} from "@workglow/ai/provider-utils";
+import { expect, test } from "vitest";
+
+// `opts` is open — accepts the historic llamacpp shape …
+const _checkOptsWithCtx: IEnsureRunningRequest["opts"] = { ctx: 4096 };
+// … the empty shape that sd-cpp uses today (no per-run options) …
+const _checkOptsEmpty: IEnsureRunningRequest["opts"] = {};
+// … and arbitrary shapes future backends may define.
+const _checkOptsArbitrary: IEnsureRunningRequest["opts"] = { foo: "bar", n: 42 };
+
+// Interface exposes `list` and `uninstall` alongside the existing methods.
+type _Methods = keyof IBackendsTransport;
+const _hasList: _Methods = "list";
+const _hasUninstall: _Methods = "uninstall";
+
+// Structural conformance using explicit parameter signatures: TypeScript
+// allows assigning `() => X` to `(arg: T) => X`, so a zero-arg dummy would
+// silently accept a parameter-list change. Spelling each signature out
+// forces a typecheck failure on any rename, type change, or return-type
+// change to a method of `IBackendsTransport`.
+const _conforms: IBackendsTransport = {
+  ensureRunning: (_req: IEnsureRunningRequest): Promise<IRunningHandle> => {
+    return Promise.resolve({
+      url: "http://127.0.0.1:0",
+      release: (): Promise<void> => Promise.resolve(),
+    });
+  },
+  subscribeStatus: (
+    _backend: string,
+    _callback: (status: IBackendStatus) => void
+  ): (() => void) => {
+    return (): void => undefined;
+  },
+  install: (
+    _backend: string,
+    _onProgress?: (bytes: number, total: number) => void
+  ): Promise<void> => Promise.resolve(),
+  list: (): Promise<void> => Promise.resolve(),
+  uninstall: (_backend: string): Promise<void> => Promise.resolve(),
+};
+
+// Silence `no-unused-vars` / `noUnusedLocals` on the type-only assertions.
+void _checkOptsWithCtx;
+void _checkOptsEmpty;
+void _checkOptsArbitrary;
+void _hasList;
+void _hasUninstall;
+void _conforms;
+
+// Vitest requires at least one runtime test in the file.
+test("IBackendsTransport conformance compiles", () => {
+  expect(true).toBe(true);
+});
diff --git a/providers/llamacpp-server/package.json b/providers/llamacpp-server/package.json
new file mode 100644
index 000000000..21f794b3a
--- /dev/null
+++ b/providers/llamacpp-server/package.json
@@ -0,0 +1,69 @@
+{
+  "name": "@workglow/llamacpp-server",
+  "type": "module",
+  "sideEffects": false,
+  "version": "0.0.1",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/workglow-dev/libs.git",
+    "directory": "providers/llamacpp-server"
+  },
+  "description": "OpenAI-compatible HTTP client for an external or embedded llama-server",
+  "scripts": {
+    "watch": "concurrently -c 'auto' 'bun:watch-*'",
+    "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "watch-types": "tsc --watch --preserveWatchOutput",
+    "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'",
+    "build-js": "bun run build-code",
+    "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo",
+    "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "build-types": "rm -f tsconfig.tsbuildinfo && tsgo",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0"
+  },
+  "exports": {
+    "./ai": {
+      "types": "./dist/ai.d.ts",
+      "import": "./dist/ai.js"
+    },
+    "./ai-runtime": {
+      "types": "./dist/ai-runtime.d.ts",
+      "import": "./dist/ai-runtime.js"
+    }
+  },
+  "dependencies": {},
+  "peerDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/job-queue": "workspace:*",
+    "@workglow/storage": "workspace:*",
+    "@workglow/task-graph": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "peerDependenciesMeta": {
+    "@workglow/ai": {
+      "optional": false
+    },
+    "@workglow/job-queue": {
+      "optional": false
+    },
+    "@workglow/storage": {
+      "optional": false
+    },
+    "@workglow/task-graph": {
+      "optional": false
+    },
+    "@workglow/util": {
+      "optional": false
+    }
+  },
+  "devDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "files": [
+    "dist",
+    "src/**/*.md"
+  ],
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/providers/llamacpp-server/src/ai-runtime.ts b/providers/llamacpp-server/src/ai-runtime.ts
new file mode 100644
index 000000000..a1fd9b608
--- /dev/null
+++ b/providers/llamacpp-server/src/ai-runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/runtime";
diff --git a/providers/llamacpp-server/src/ai.ts b/providers/llamacpp-server/src/ai.ts
new file mode 100644
index 000000000..2210c547d
--- /dev/null
+++ b/providers/llamacpp-server/src/ai.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/index";
diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
new file mode 100644
index 000000000..88c0eb394
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
@@ -0,0 +1,210 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderPreviewRunFn,
+  AiProviderRunFn,
+  AiProviderRunFnRegistration,
+  Capability,
+  ModelConfig,
+  ModelRecord,
+  TextGenerationTaskInput,
+  TextGenerationTaskOutput,
+} from "@workglow/ai";
+import { AiProvider } from "@workglow/ai";
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants";
+
+export interface ILlamaCppServerProviderOptions {
+  readonly transport: IBackendsTransport;
+  readonly externalUrl?: string;
+  /**
+   * Default context length passed to the broker when launching a backend.
+   * Picked per request; larger values trade RAM for prompt+output budget.
+   * Defaults to 4096 if unset.
+   */
+  readonly defaultCtx?: number;
+}
+
+/**
+ * OpenAI-compatible HTTP chat-completion provider that forwards requests to a
+ * running llama-server instance.  If `externalUrl` is provided the server is
+ * assumed to already be running; otherwise the provider acquires a handle via
+ * `transport.ensureRunning` before each request and releases it afterwards.
+ *
+ * v1 scope: chat completion only. Other capabilities are not registered; the
+ * provider serves only chat completion in v1.
+ */
+export class LlamaCppServerProvider extends AiProvider {
+  readonly name = LOCAL_LLAMACPP_SERVER;
+  readonly displayName = "Local llama-server (HTTP)";
+  readonly isLocal = true;
+  readonly supportsBrowser = false;
+
+  constructor(options: ILlamaCppServerProviderOptions) {
+    const runFns: readonly AiProviderRunFnRegistration<
+      TextGenerationTaskInput,
+      TextGenerationTaskOutput,
+      ModelConfig
+    >[] = [
+      {
+        serves: ["text.generation"] as readonly Capability[],
+        runFn: createLlamaCppServerTextGenerationStream(options) as AiProviderRunFn<
+          TextGenerationTaskInput,
+          TextGenerationTaskOutput,
+          ModelConfig
+        >,
+      },
+    ];
+
+    const previewTasks: Record<
+      string,
+      AiProviderPreviewRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, ModelConfig>
+    > = {};
+
+    super(runFns, previewTasks);
+  }
+
+  override inferCapabilities(model: ModelRecord): readonly Capability[] {
+    return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"];
+  }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Chat-completion run-fn
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface UnifiedTextGenerationInput extends TextGenerationTaskInput {
+  readonly messages?: readonly { readonly role: string; readonly content: string }[];
+  readonly systemPrompt?: string;
+}
+
+/**
+ * Build and stream a chat-completion request against a llama-server
+ * `/v1/chat/completions` endpoint.
+ *
+ * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0`
+ * so {@link AiChatTask} (chat path) and {@link TextGenerationTask}
+ * (prompt-only path) share the same registered run-fn, consistent with
+ * the pattern used across workglow providers.
+ */
+function createLlamaCppServerTextGenerationStream(
+  options: ILlamaCppServerProviderOptions
+): AiProviderRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, ModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+
+    const unified = input as UnifiedTextGenerationInput;
+    const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0;
+
+    const messages = hasMessages
+      ? [
+          ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []),
+          ...unified.messages!.map((m) => ({ role: m.role, content: m.content })),
+        ]
+      : [{ role: "user", content: input.prompt }];
+
+    const body = JSON.stringify({
+      model: model?.model_id ?? "",
+      messages,
+      stream: true,
+      ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}),
+      ...(input.temperature !== undefined ? { temperature: input.temperature } : {}),
+      ...(input.topP !== undefined ? { top_p: input.topP } : {}),
+      ...(input.frequencyPenalty !== undefined
+        ? { frequency_penalty: input.frequencyPenalty }
+        : {}),
+      ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}),
+    });
+
+    // Acquire base URL — either from external override or via transport.
+    let baseUrl: string;
+    let handle: IRunningHandle | undefined;
+
+    if (options.externalUrl) {
+      baseUrl = options.externalUrl.replace(/\/$/, "");
+    } else {
+      if (!model?.model_id) {
+        throw new Error("LlamaCppServerProvider: model.model_id is required to acquire a backend");
+      }
+      handle = await options.transport.ensureRunning({
+        backend: "llamacpp-server",
+        modelPath: model.model_id,
+        opts: { ctx: options.defaultCtx ?? 4096 },
+      });
+      baseUrl = handle.url.replace(/\/$/, "");
+    }
+
+    try {
+      signal?.throwIfAborted?.();
+
+      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServerProvider: HTTP ${response.status} from /v1/chat/completions — ${text}`
+        );
+      }
+
+      const reader = response.body?.getReader();
+      if (!reader) {
+        throw new Error("LlamaCppServerProvider: response body is null");
+      }
+
+      const decoder = new TextDecoder();
+      let buffer = "";
+
+      try {
+        let sawDone = false;
+        while (!sawDone) {
+          signal?.throwIfAborted?.();
+          const { done, value } = await reader.read();
+          if (done) break;
+
+          buffer += decoder.decode(value, { stream: true });
+          const lines = buffer.split("\n");
+          buffer = lines.pop() ?? "";
+
+          for (const line of lines) {
+            const trimmed = line.trim();
+            if (!trimmed.startsWith("data:")) continue;
+            const data = trimmed.slice(5).trim();
+            if (data === "[DONE]") {
+              sawDone = true;
+              await reader.cancel().catch(() => undefined);
+              break;
+            }
+            if (!data) continue;
+
+            let chunk: { choices?: { delta?: { content?: string } }[] };
+            try {
+              chunk = JSON.parse(data) as typeof chunk;
+            } catch {
+              continue;
+            }
+
+            const delta = chunk.choices?.[0]?.delta?.content;
+            if (delta) {
+              emit({ type: "text-delta", port: "text", textDelta: delta });
+            }
+          }
+        }
+      } finally {
+        reader.releaseLock();
+      }
+
+      emit({ type: "finish", data: {} as TextGenerationTaskOutput });
+    } finally {
+      await handle?.release();
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
new file mode 100644
index 000000000..53cc7a8ee
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
@@ -0,0 +1,7 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export const LOCAL_LLAMACPP_SERVER = "LOCAL_LLAMACPP_SERVER";
diff --git a/providers/llamacpp-server/src/ai/index.ts b/providers/llamacpp-server/src/ai/index.ts
new file mode 100644
index 000000000..4f3d7f42a
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/index.ts
@@ -0,0 +1,11 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./common/LlamaCppServer_Constants";
+export * from "./LlamaCppServerProvider";
+export * from "./registerLlamaCppServer";
diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
new file mode 100644
index 000000000..b0f4406aa
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
@@ -0,0 +1,27 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRegisterOptions } from "@workglow/ai";
+import type { IBackendsTransport } from "@workglow/ai/provider-utils";
+import { registerProviderInline } from "@workglow/ai/provider-utils";
+import { LlamaCppServerProvider } from "./LlamaCppServerProvider";
+
+export interface IRegisterLlamaCppServerOptions extends AiProviderRegisterOptions {
+  readonly transport: IBackendsTransport;
+  readonly externalUrl?: string;
+  readonly defaultCtx?: number;
+}
+
+export async function registerLlamaCppServer(
+  options: IRegisterLlamaCppServerOptions
+): Promise<void> {
+  const { transport, externalUrl, defaultCtx, ...registerOptions } = options;
+  await registerProviderInline(
+    new LlamaCppServerProvider({ transport, externalUrl, defaultCtx }),
+    "LlamaCppServer",
+    registerOptions
+  );
+}
diff --git a/providers/llamacpp-server/src/ai/runtime.ts b/providers/llamacpp-server/src/ai/runtime.ts
new file mode 100644
index 000000000..5a1f42e73
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./registerLlamaCppServer";
diff --git a/providers/llamacpp-server/tsconfig.json b/providers/llamacpp-server/tsconfig.json
new file mode 100644
index 000000000..4d9631e9b
--- /dev/null
+++ b/providers/llamacpp-server/tsconfig.json
@@ -0,0 +1,29 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "composite": true,
+    "rootDir": "src",
+    "outDir": "dist",
+    "tsBuildInfoFile": "tsconfig.tsbuildinfo"
+  },
+  "include": [
+    "src/**/*"
+  ],
+  "references": [
+    {
+      "path": "../../packages/util"
+    },
+    {
+      "path": "../../packages/task-graph"
+    },
+    {
+      "path": "../../packages/storage"
+    },
+    {
+      "path": "../../packages/job-queue"
+    },
+    {
+      "path": "../../packages/ai"
+    }
+  ]
+}
diff --git a/providers/mlx/package.json b/providers/mlx/package.json
new file mode 100644
index 000000000..0c490acdd
--- /dev/null
+++ b/providers/mlx/package.json
@@ -0,0 +1,69 @@
+{
+  "name": "@workglow/mlx",
+  "type": "module",
+  "sideEffects": false,
+  "version": "0.0.1",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/workglow-dev/libs.git",
+    "directory": "providers/mlx"
+  },
+  "description": "MLX provider stub — Python runtime not bundled in v1; see roadmap",
+  "scripts": {
+    "watch": "concurrently -c 'auto' 'bun:watch-*'",
+    "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "watch-types": "tsc --watch --preserveWatchOutput",
+    "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'",
+    "build-js": "bun run build-code",
+    "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo",
+    "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "build-types": "rm -f tsconfig.tsbuildinfo && tsgo",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0"
+  },
+  "exports": {
+    "./ai": {
+      "types": "./dist/ai.d.ts",
+      "import": "./dist/ai.js"
+    },
+    "./ai-runtime": {
+      "types": "./dist/ai-runtime.d.ts",
+      "import": "./dist/ai-runtime.js"
+    }
+  },
+  "dependencies": {},
+  "peerDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/job-queue": "workspace:*",
+    "@workglow/storage": "workspace:*",
+    "@workglow/task-graph": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "peerDependenciesMeta": {
+    "@workglow/ai": {
+      "optional": false
+    },
+    "@workglow/job-queue": {
+      "optional": false
+    },
+    "@workglow/storage": {
+      "optional": false
+    },
+    "@workglow/task-graph": {
+      "optional": false
+    },
+    "@workglow/util": {
+      "optional": false
+    }
+  },
+  "devDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "files": [
+    "dist",
+    "src/**/*.md"
+  ],
+  "publishConfig": {
+    "access": "public"
+  }
+}
diff --git a/providers/mlx/src/ai-runtime.ts b/providers/mlx/src/ai-runtime.ts
new file mode 100644
index 000000000..a1fd9b608
--- /dev/null
+++ b/providers/mlx/src/ai-runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/runtime";
diff --git a/providers/mlx/src/ai.ts b/providers/mlx/src/ai.ts
new file mode 100644
index 000000000..2210c547d
--- /dev/null
+++ b/providers/mlx/src/ai.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/index";
diff --git a/providers/mlx/src/ai/MlxProvider.ts b/providers/mlx/src/ai/MlxProvider.ts
new file mode 100644
index 000000000..a73a87aac
--- /dev/null
+++ b/providers/mlx/src/ai/MlxProvider.ts
@@ -0,0 +1,76 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// TODO(roadmap): when Python runtime bundling lands, replace this stub with a real MLX provider.
+
+import type {
+  AiProviderPreviewRunFn,
+  AiProviderRunFn,
+  AiProviderRunFnRegistration,
+  Capability,
+  ModelConfig,
+  ModelRecord,
+  TextGenerationTaskInput,
+  TextGenerationTaskOutput,
+} from "@workglow/ai";
+import { AiProvider } from "@workglow/ai";
+import { LOCAL_MLX } from "./common/Mlx_Constants";
+
+/**
+ * MLX provider stub.
+ *
+ * The MLX runtime requires a Python environment which is not bundled in v1.
+ * The provider registers cleanly so the UI can list it, but all inference
+ * calls throw immediately.  See roadmap for Python runtime bundling plans.
+ */
+export class MlxProvider extends AiProvider {
+  readonly name = LOCAL_MLX;
+  readonly displayName = "Local MLX (Apple Silicon)";
+  readonly isLocal = true;
+  readonly supportsBrowser = false;
+
+  constructor() {
+    const runFns: readonly AiProviderRunFnRegistration<
+      TextGenerationTaskInput,
+      TextGenerationTaskOutput,
+      ModelConfig
+    >[] = [
+      {
+        serves: ["text.generation"] as readonly Capability[],
+        runFn: mlxNotAvailableRunFn as AiProviderRunFn<
+          TextGenerationTaskInput,
+          TextGenerationTaskOutput,
+          ModelConfig
+        >,
+      },
+    ];
+
+    const previewTasks: Record<
+      string,
+      AiProviderPreviewRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, ModelConfig>
+    > = {};
+
+    super(runFns, previewTasks);
+  }
+
+  override inferCapabilities(model: ModelRecord): readonly Capability[] {
+    return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"];
+  }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Stub run-fn — always throws
+// ─────────────────────────────────────────────────────────────────────────────
+
+const mlxNotAvailableRunFn: AiProviderRunFn<
+  TextGenerationTaskInput,
+  TextGenerationTaskOutput,
+  ModelConfig
+> = async (_input, _model, _signal, _emit) => {
+  throw new Error(
+    "MLX provider not available: Python runtime not bundled in v1; see roadmap."
+  );
+};
diff --git a/providers/mlx/src/ai/common/Mlx_Constants.ts b/providers/mlx/src/ai/common/Mlx_Constants.ts
new file mode 100644
index 000000000..0c8268ac3
--- /dev/null
+++ b/providers/mlx/src/ai/common/Mlx_Constants.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// TODO(roadmap): when Python runtime bundling lands, replace this stub with a real MLX provider.
+
+export const LOCAL_MLX = "LOCAL_MLX";
diff --git a/providers/mlx/src/ai/index.ts b/providers/mlx/src/ai/index.ts
new file mode 100644
index 000000000..c5e3ca5f3
--- /dev/null
+++ b/providers/mlx/src/ai/index.ts
@@ -0,0 +1,11 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./common/Mlx_Constants";
+export * from "./MlxProvider";
+export * from "./registerMlx";
diff --git a/providers/mlx/src/ai/registerMlx.ts b/providers/mlx/src/ai/registerMlx.ts
new file mode 100644
index 000000000..ce0cbd3bc
--- /dev/null
+++ b/providers/mlx/src/ai/registerMlx.ts
@@ -0,0 +1,15 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRegisterOptions } from "@workglow/ai";
+import { registerProviderInline } from "@workglow/ai/provider-utils";
+import { MlxProvider } from "./MlxProvider";
+
+export interface IRegisterMlxOptions extends AiProviderRegisterOptions {}
+
+export async function registerMlx(options: IRegisterMlxOptions): Promise<void> {
+  await registerProviderInline(new MlxProvider(), "Mlx", options);
+}
diff --git a/providers/mlx/src/ai/runtime.ts b/providers/mlx/src/ai/runtime.ts
new file mode 100644
index 000000000..78cd855e6
--- /dev/null
+++ b/providers/mlx/src/ai/runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./registerMlx";
diff --git a/providers/mlx/tsconfig.json b/providers/mlx/tsconfig.json
new file mode 100644
index 000000000..4d9631e9b
--- /dev/null
+++ b/providers/mlx/tsconfig.json
@@ -0,0 +1,29 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "composite": true,
+    "rootDir": "src",
+    "outDir": "dist",
+    "tsBuildInfoFile": "tsconfig.tsbuildinfo"
+  },
+  "include": [
+    "src/**/*"
+  ],
+  "references": [
+    {
+      "path": "../../packages/util"
+    },
+    {
+      "path": "../../packages/task-graph"
+    },
+    {
+      "path": "../../packages/storage"
+    },
+    {
+      "path": "../../packages/job-queue"
+    },
+    {
+      "path": "../../packages/ai"
+    }
+  ]
+}
diff --git a/providers/stable-diffusion-server/package.json b/providers/stable-diffusion-server/package.json
new file mode 100644
index 000000000..b3718e830
--- /dev/null
+++ b/providers/stable-diffusion-server/package.json
@@ -0,0 +1,69 @@
+{
+  "name": "@workglow/stable-diffusion-server",
+  "type": "module",
+  "sideEffects": false,
+  "version": "0.0.1",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/workglow-dev/libs.git",
+    "directory": "providers/stable-diffusion-server"
+  },
+  "description": "Local sd.cpp HTTP client for an external or embedded stable-diffusion.cpp server",
+  "scripts": {
+    "watch": "concurrently -c 'auto' 'bun:watch-*'",
+    "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "watch-types": "tsc --watch --preserveWatchOutput",
+    "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'",
+    "build-js": "bun run build-code",
+    "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo",
+    "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "build-types": "rm -f tsconfig.tsbuildinfo && tsgo",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0"
+  },
+  "exports": {
+    "./ai": {
+      "types": "./dist/ai.d.ts",
+      "import": "./dist/ai.js"
+    },
+    "./ai-runtime": {
+      "types": "./dist/ai-runtime.d.ts",
+      "import": "./dist/ai-runtime.js"
+    }
+  },
+  "dependencies": {},
+  "peerDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/job-queue": "workspace:*",
+    "@workglow/storage": "workspace:*",
+    "@workglow/task-graph": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "peerDependenciesMeta": {
+    "@workglow/ai": {
+      "optional": false
+    },
+    "@workglow/job-queue": {
+      "optional": false
+    },
+    "@workglow/storage": {
+      "optional": false
+    },
+    "@workglow/task-graph": {
+      "optional": false
+    },
+    "@workglow/util": {
+      "optional": false
+    }
+  },
+  "devDependencies": {
+    "@workglow/ai": "workspace:*",
+    "@workglow/util": "workspace:*"
+  },
+  "files": [
+    "dist",
+    "src/**/*.md"
+  ],
+  "publishConfig": {
+    "access": "public"
+  }
+}
\ No newline at end of file
diff --git a/providers/stable-diffusion-server/src/ai-runtime.ts b/providers/stable-diffusion-server/src/ai-runtime.ts
new file mode 100644
index 000000000..a1fd9b608
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai-runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/runtime";
diff --git a/providers/stable-diffusion-server/src/ai.ts b/providers/stable-diffusion-server/src/ai.ts
new file mode 100644
index 000000000..2210c547d
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/index";
diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
new file mode 100644
index 000000000..2d38c2e99
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
@@ -0,0 +1,156 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderPreviewRunFn,
+  AiProviderRunFn,
+  AiProviderRunFnRegistration,
+  Capability,
+  ImageGenerateTaskInput,
+  ImageGenerateTaskOutput,
+  ModelConfig,
+  ModelRecord,
+} from "@workglow/ai";
+import { AiProvider } from "@workglow/ai";
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
+import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants";
+
+/**
+ * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img`
+ * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used
+ * by OpenAI-compatible builds. Configurable so callers can switch without
+ * forking the provider while the Phase-8 integration spike is pending.
+ */
+export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations";
+
+export interface IStableDiffusionCppProviderOptions {
+  readonly transport: IBackendsTransport;
+  readonly externalUrl?: string;
+  readonly endpoint?: StableDiffusionCppEndpoint;
+}
+
+/**
+ * HTTP client for a local stable-diffusion.cpp server. If `externalUrl` is
+ * provided the server is assumed to already be running; otherwise the provider
+ * acquires a handle via `transport.ensureRunning` before each request and
+ * releases it afterwards.
+ *
+ * v1 scope: text-to-image only. Other capabilities are not registered; the
+ * provider serves only image generation in v1.
+ */
+export class StableDiffusionCppProvider extends AiProvider {
+  readonly name = LOCAL_STABLE_DIFFUSION_CPP;
+  readonly displayName = "Local stable-diffusion.cpp (HTTP)";
+  readonly isLocal = true;
+  readonly supportsBrowser = false;
+
+  constructor(options: IStableDiffusionCppProviderOptions) {
+    const runFns: readonly AiProviderRunFnRegistration<
+      ImageGenerateTaskInput,
+      ImageGenerateTaskOutput,
+      ModelConfig
+    >[] = [
+      {
+        serves: ["image.generation"] as readonly Capability[],
+        runFn: createStableDiffusionCppImageGenerateRunFn(options) as AiProviderRunFn<
+          ImageGenerateTaskInput,
+          ImageGenerateTaskOutput,
+          ModelConfig
+        >,
+      },
+    ];
+
+    const previewTasks: Record<
+      string,
+      AiProviderPreviewRunFn<ImageGenerateTaskInput, ImageGenerateTaskOutput, ModelConfig>
+    > = {};
+
+    super(runFns, previewTasks);
+  }
+
+  override inferCapabilities(model: ModelRecord): readonly Capability[] {
+    return (model.capabilities as readonly Capability[] | undefined) ?? ["image.generation"];
+  }
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// Image-generation run-fn
+// ─────────────────────────────────────────────────────────────────────────────
+
+/**
+ * One-shot run-fn for text-to-image generation via stable-diffusion.cpp HTTP server.
+ *
+ * Endpoint is selected via {@link IStableDiffusionCppProviderOptions.endpoint}
+ * (defaults to `/txt2img`). Request: `POST <endpoint>` with `{ "prompt": "..." }`.
+ * Response: `{ "images": ["<base64-png>", ...] }` — the first image is used.
+ */
+function createStableDiffusionCppImageGenerateRunFn(
+  options: IStableDiffusionCppProviderOptions
+): AiProviderRunFn<ImageGenerateTaskInput, ImageGenerateTaskOutput, ModelConfig> {
+  const endpoint = options.endpoint ?? "/txt2img";
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+
+    const body = JSON.stringify({ prompt: input.prompt });
+
+    // Acquire base URL — either from external override or via transport.
+    let baseUrl: string;
+    let handle: IRunningHandle | undefined;
+
+    if (options.externalUrl) {
+      baseUrl = options.externalUrl.replace(/\/$/, "");
+    } else {
+      if (!model?.model_id) {
+        throw new Error(
+          "StableDiffusionCppProvider: model.model_id is required to acquire a backend"
+        );
+      }
+      handle = await options.transport.ensureRunning({
+        backend: "stable-diffusion-server",
+        modelPath: model.model_id,
+        opts: {},
+      });
+      baseUrl = handle.url.replace(/\/$/, "");
+    }
+
+    try {
+      signal?.throwIfAborted?.();
+
+      const response = await fetch(`${baseUrl}${endpoint}`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `StableDiffusionCppProvider: HTTP ${response.status} from ${endpoint} — ${text}`
+        );
+      }
+
+      const json = (await response.json()) as { images?: string[] };
+      const base64 = json.images?.[0];
+      if (!base64) {
+        throw new Error("StableDiffusionCppProvider: response contained no images");
+      }
+
+      // Decode base64 PNG bytes platform-neutrally and wrap in an ImageValue.
+      // Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic.
+      const binary = atob(base64);
+      const bytes = new Uint8Array(binary.length);
+      for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
+      const image = await pngBytesToImageValue(bytes, "png");
+
+      emit({ type: "snapshot", data: { image } });
+      emit({ type: "finish", data: {} as ImageGenerateTaskOutput });
+    } finally {
+      await handle?.release();
+    }
+  };
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
new file mode 100644
index 000000000..56c976ccd
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
@@ -0,0 +1,7 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export const LOCAL_STABLE_DIFFUSION_CPP = "LOCAL_STABLE_DIFFUSION_CPP";
diff --git a/providers/stable-diffusion-server/src/ai/index.ts b/providers/stable-diffusion-server/src/ai/index.ts
new file mode 100644
index 000000000..5df0a99bc
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/index.ts
@@ -0,0 +1,11 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./common/StableDiffusionCpp_Constants";
+export * from "./StableDiffusionCppProvider";
+export * from "./registerStableDiffusionCpp";
diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
new file mode 100644
index 000000000..4b5db64dc
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
@@ -0,0 +1,28 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRegisterOptions } from "@workglow/ai";
+import type { IBackendsTransport } from "@workglow/ai/provider-utils";
+import { registerProviderInline } from "@workglow/ai/provider-utils";
+import type { StableDiffusionCppEndpoint } from "./StableDiffusionCppProvider";
+import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider";
+
+export interface IRegisterStableDiffusionCppOptions extends AiProviderRegisterOptions {
+  readonly transport: IBackendsTransport;
+  readonly externalUrl?: string;
+  readonly endpoint?: StableDiffusionCppEndpoint;
+}
+
+export async function registerStableDiffusionCpp(
+  options: IRegisterStableDiffusionCppOptions
+): Promise<void> {
+  const { transport, externalUrl, endpoint, ...registerOptions } = options;
+  await registerProviderInline(
+    new StableDiffusionCppProvider({ transport, externalUrl, endpoint }),
+    "StableDiffusionCpp",
+    registerOptions
+  );
+}
diff --git a/providers/stable-diffusion-server/src/ai/runtime.ts b/providers/stable-diffusion-server/src/ai/runtime.ts
new file mode 100644
index 000000000..094645c24
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/runtime.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./registerStableDiffusionCpp";
diff --git a/providers/stable-diffusion-server/tsconfig.json b/providers/stable-diffusion-server/tsconfig.json
new file mode 100644
index 000000000..4d9631e9b
--- /dev/null
+++ b/providers/stable-diffusion-server/tsconfig.json
@@ -0,0 +1,29 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "composite": true,
+    "rootDir": "src",
+    "outDir": "dist",
+    "tsBuildInfoFile": "tsconfig.tsbuildinfo"
+  },
+  "include": [
+    "src/**/*"
+  ],
+  "references": [
+    {
+      "path": "../../packages/util"
+    },
+    {
+      "path": "../../packages/task-graph"
+    },
+    {
+      "path": "../../packages/storage"
+    },
+    {
+      "path": "../../packages/job-queue"
+    },
+    {
+      "path": "../../packages/ai"
+    }
+  ]
+}

From 962dfc98f2cfb31475e409cec3c0d883d857a4fa Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Fri, 22 May 2026 22:55:32 +0000
Subject: [PATCH 2/8] ci: remove dependabot

---
 .github/dependabot.yml | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
deleted file mode 100644
index e4a93835d..000000000
--- a/.github/dependabot.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# To get started with Dependabot version updates, you'll need to specify which
-# package ecosystems to update and where the package manifests are located.
-# Please see the documentation for all configuration options:
-# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
-
-version: 2
-updates:
-  - package-ecosystem: "bun"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-    ignore:
-      - dependency-name: "@types/node"
-      - dependency-name: "@typescript/native-preview"

From c5d5254aefe165674a887c86379ef96ff5b20f12 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 21:12:35 +0000
Subject: [PATCH 3/8] =?UTF-8?q?feat(llamacpp-server):=20@workglow/llamacpp?=
 =?UTF-8?q?-server=20provider=20=E2=80=94=20run-fns,=20registration,=20and?=
 =?UTF-8?q?=20barrels?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 providers/llamacpp-server/package.json        |  14 +-
 .../llamacpp-server/src/ai-runtime.browser.ts |   9 +
 providers/llamacpp-server/src/ai.browser.ts   |   9 +
 .../src/ai/LlamaCppServerProvider.ts          | 222 +++---------------
 .../src/ai/LlamaCppServerQueuedProvider.ts    |  34 +++
 .../ai/common/LlamaCppServer_Capabilities.ts  |  80 +++++++
 .../common/LlamaCppServer_CapabilitySets.ts   |  38 +++
 .../src/ai/common/LlamaCppServer_Client.ts    | 161 +++++++++++++
 .../src/ai/common/LlamaCppServer_Constants.ts |   3 +
 .../src/ai/common/LlamaCppServer_JobRunFns.ts |  57 +++++
 .../src/ai/common/LlamaCppServer_ModelInfo.ts |  93 ++++++++
 .../ai/common/LlamaCppServer_ModelSchema.ts   |  85 +++++++
 .../ai/common/LlamaCppServer_ModelSearch.ts   |  58 +++++
 .../src/ai/common/LlamaCppServer_ModelUtil.ts |  31 +++
 .../ai/common/LlamaCppServer_TextEmbedding.ts |  64 +++++
 .../common/LlamaCppServer_TextGeneration.ts   | 103 ++++++++
 .../ai/common/LlamaCppServer_TextRewriter.ts  |  57 +++++
 .../ai/common/LlamaCppServer_TextSummary.ts   |  57 +++++
 .../ai/common/LlamaCppServer_ToolCalling.ts   | 138 +++++++++++
 providers/llamacpp-server/src/ai/index.ts     |  19 +-
 .../src/ai/registerLlamaCppServer.ts          |  29 +--
 .../src/ai/registerLlamaCppServerInline.ts    |  28 +++
 .../src/ai/registerLlamaCppServerWorker.ts    |  29 +++
 providers/llamacpp-server/src/ai/runtime.ts   |  12 +-
 24 files changed, 1216 insertions(+), 214 deletions(-)
 create mode 100644 providers/llamacpp-server/src/ai-runtime.browser.ts
 create mode 100644 providers/llamacpp-server/src/ai.browser.ts
 create mode 100644 providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
 create mode 100644 providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
 create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts
 create mode 100644 providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts

diff --git a/providers/llamacpp-server/package.json b/providers/llamacpp-server/package.json
index 21f794b3a..e90c5ca42 100644
--- a/providers/llamacpp-server/package.json
+++ b/providers/llamacpp-server/package.json
@@ -12,20 +12,30 @@
   "scripts": {
     "watch": "concurrently -c 'auto' 'bun:watch-*'",
     "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "watch-browser": "bun build --watch --no-clear-screen --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts",
     "watch-types": "tsc --watch --preserveWatchOutput",
-    "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'",
-    "build-js": "bun run build-code",
+    "build-package": "concurrently -c 'auto' -n 'code,browser,types' 'bun run build-code' 'bun run build-browser' 'bun run build-types'",
+    "build-js": "concurrently -c 'auto' -n 'code,browser' 'bun run build-code' 'bun run build-browser'",
     "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo",
     "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "build-browser": "bun build --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts",
     "build-types": "rm -f tsconfig.tsbuildinfo && tsgo",
     "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0"
   },
   "exports": {
     "./ai": {
+      "browser": {
+        "types": "./dist/ai.d.ts",
+        "import": "./dist/ai.browser.js"
+      },
       "types": "./dist/ai.d.ts",
       "import": "./dist/ai.js"
     },
     "./ai-runtime": {
+      "browser": {
+        "types": "./dist/ai-runtime.d.ts",
+        "import": "./dist/ai-runtime.browser.js"
+      },
       "types": "./dist/ai-runtime.d.ts",
       "import": "./dist/ai-runtime.js"
     }
diff --git a/providers/llamacpp-server/src/ai-runtime.browser.ts b/providers/llamacpp-server/src/ai-runtime.browser.ts
new file mode 100644
index 000000000..a1fd9b608
--- /dev/null
+++ b/providers/llamacpp-server/src/ai-runtime.browser.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/runtime";
diff --git a/providers/llamacpp-server/src/ai.browser.ts b/providers/llamacpp-server/src/ai.browser.ts
new file mode 100644
index 000000000..2210c547d
--- /dev/null
+++ b/providers/llamacpp-server/src/ai.browser.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/index";
diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
index 88c0eb394..8bc41a5e7 100644
--- a/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
+++ b/providers/llamacpp-server/src/ai/LlamaCppServerProvider.ts
@@ -4,207 +4,41 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type {
-  AiProviderPreviewRunFn,
-  AiProviderRunFn,
-  AiProviderRunFnRegistration,
-  Capability,
-  ModelConfig,
-  ModelRecord,
-  TextGenerationTaskInput,
-  TextGenerationTaskOutput,
-} from "@workglow/ai";
-import { AiProvider } from "@workglow/ai";
-import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { createCloudProviderClass } from "@workglow/ai/provider-utils";
+import type { Capability, ModelRecord } from "@workglow/ai/worker";
+import { AiProvider } from "@workglow/ai/worker";
+import {
+  inferLlamaCppServerCapabilities,
+  llamaCppServerWorkerRunFnSpecs,
+} from "./common/LlamaCppServer_Capabilities";
 import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants";
-
-export interface ILlamaCppServerProviderOptions {
-  readonly transport: IBackendsTransport;
-  readonly externalUrl?: string;
-  /**
-   * Default context length passed to the broker when launching a backend.
-   * Picked per request; larger values trade RAM for prompt+output budget.
-   * Defaults to 4096 if unset.
-   */
-  readonly defaultCtx?: number;
-}
+import type { LlamaCppServerModelConfig } from "./common/LlamaCppServer_ModelSchema";
 
 /**
- * OpenAI-compatible HTTP chat-completion provider that forwards requests to a
- * running llama-server instance.  If `externalUrl` is provided the server is
- * assumed to already be running; otherwise the provider acquires a handle via
- * `transport.ensureRunning` before each request and releases it afterwards.
+ * Worker-server registration shell for llamacpp-server. Imports `AiProvider`
+ * from `@workglow/ai/worker` so the worker module graph stays self-contained.
  *
- * v1 scope: chat completion only. Other capabilities are not registered; the
- * provider serves only chat completion in v1.
+ * Both transport and externalUrl modes are supported. The `IBackendsTransport`
+ * is constructed inside the worker runtime by the caller (e.g.,
+ * `MessagePortBackendsTransport` in the Builder's worker renderer) and held
+ * by closure inside the run-fns — no port transfer across the worker
+ * boundary. Worker registration is the primary production path; inline
+ * registration (`LlamaCppServerQueuedProvider`) is primarily a testing seam.
  */
-export class LlamaCppServerProvider extends AiProvider {
-  readonly name = LOCAL_LLAMACPP_SERVER;
-  readonly displayName = "Local llama-server (HTTP)";
-  readonly isLocal = true;
-  readonly supportsBrowser = false;
-
-  constructor(options: ILlamaCppServerProviderOptions) {
-    const runFns: readonly AiProviderRunFnRegistration<
-      TextGenerationTaskInput,
-      TextGenerationTaskOutput,
-      ModelConfig
-    >[] = [
-      {
-        serves: ["text.generation"] as readonly Capability[],
-        runFn: createLlamaCppServerTextGenerationStream(options) as AiProviderRunFn<
-          TextGenerationTaskInput,
-          TextGenerationTaskOutput,
-          ModelConfig
-        >,
-      },
-    ];
-
-    const previewTasks: Record<
-      string,
-      AiProviderPreviewRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, ModelConfig>
-    > = {};
-
-    super(runFns, previewTasks);
+export class LlamaCppServerProvider extends createCloudProviderClass<LlamaCppServerModelConfig>(
+  AiProvider,
+  {
+    name: LOCAL_LLAMACPP_SERVER,
+    displayName: "Local llama-server (HTTP)",
+    isLocal: true,
+    supportsBrowser: true,
   }
-
+) {
   override inferCapabilities(model: ModelRecord): readonly Capability[] {
-    return (model.capabilities as readonly Capability[] | undefined) ?? ["text.generation"];
+    return inferLlamaCppServerCapabilities(model);
   }
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Chat-completion run-fn
-// ─────────────────────────────────────────────────────────────────────────────
-
-interface UnifiedTextGenerationInput extends TextGenerationTaskInput {
-  readonly messages?: readonly { readonly role: string; readonly content: string }[];
-  readonly systemPrompt?: string;
-}
-
-/**
- * Build and stream a chat-completion request against a llama-server
- * `/v1/chat/completions` endpoint.
- *
- * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0`
- * so {@link AiChatTask} (chat path) and {@link TextGenerationTask}
- * (prompt-only path) share the same registered run-fn, consistent with
- * the pattern used across workglow providers.
- */
-function createLlamaCppServerTextGenerationStream(
-  options: ILlamaCppServerProviderOptions
-): AiProviderRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, ModelConfig> {
-  return async (input, model, signal, emit) => {
-    signal?.throwIfAborted?.();
-
-    const unified = input as UnifiedTextGenerationInput;
-    const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0;
 
-    const messages = hasMessages
-      ? [
-          ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []),
-          ...unified.messages!.map((m) => ({ role: m.role, content: m.content })),
-        ]
-      : [{ role: "user", content: input.prompt }];
-
-    const body = JSON.stringify({
-      model: model?.model_id ?? "",
-      messages,
-      stream: true,
-      ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}),
-      ...(input.temperature !== undefined ? { temperature: input.temperature } : {}),
-      ...(input.topP !== undefined ? { top_p: input.topP } : {}),
-      ...(input.frequencyPenalty !== undefined
-        ? { frequency_penalty: input.frequencyPenalty }
-        : {}),
-      ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}),
-    });
-
-    // Acquire base URL — either from external override or via transport.
-    let baseUrl: string;
-    let handle: IRunningHandle | undefined;
-
-    if (options.externalUrl) {
-      baseUrl = options.externalUrl.replace(/\/$/, "");
-    } else {
-      if (!model?.model_id) {
-        throw new Error("LlamaCppServerProvider: model.model_id is required to acquire a backend");
-      }
-      handle = await options.transport.ensureRunning({
-        backend: "llamacpp-server",
-        modelPath: model.model_id,
-        opts: { ctx: options.defaultCtx ?? 4096 },
-      });
-      baseUrl = handle.url.replace(/\/$/, "");
-    }
-
-    try {
-      signal?.throwIfAborted?.();
-
-      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
-        method: "POST",
-        headers: { "Content-Type": "application/json" },
-        body,
-        signal,
-      });
-
-      if (!response.ok) {
-        const text = await response.text().catch(() => "(no body)");
-        throw new Error(
-          `LlamaCppServerProvider: HTTP ${response.status} from /v1/chat/completions — ${text}`
-        );
-      }
-
-      const reader = response.body?.getReader();
-      if (!reader) {
-        throw new Error("LlamaCppServerProvider: response body is null");
-      }
-
-      const decoder = new TextDecoder();
-      let buffer = "";
-
-      try {
-        let sawDone = false;
-        while (!sawDone) {
-          signal?.throwIfAborted?.();
-          const { done, value } = await reader.read();
-          if (done) break;
-
-          buffer += decoder.decode(value, { stream: true });
-          const lines = buffer.split("\n");
-          buffer = lines.pop() ?? "";
-
-          for (const line of lines) {
-            const trimmed = line.trim();
-            if (!trimmed.startsWith("data:")) continue;
-            const data = trimmed.slice(5).trim();
-            if (data === "[DONE]") {
-              sawDone = true;
-              await reader.cancel().catch(() => undefined);
-              break;
-            }
-            if (!data) continue;
-
-            let chunk: { choices?: { delta?: { content?: string } }[] };
-            try {
-              chunk = JSON.parse(data) as typeof chunk;
-            } catch {
-              continue;
-            }
-
-            const delta = chunk.choices?.[0]?.delta?.content;
-            if (delta) {
-              emit({ type: "text-delta", port: "text", textDelta: delta });
-            }
-          }
-        }
-      } finally {
-        reader.releaseLock();
-      }
-
-      emit({ type: "finish", data: {} as TextGenerationTaskOutput });
-    } finally {
-      await handle?.release();
-    }
-  };
+  protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
+    return llamaCppServerWorkerRunFnSpecs();
+  }
 }
diff --git a/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts b/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts
new file mode 100644
index 000000000..70521c6ab
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/LlamaCppServerQueuedProvider.ts
@@ -0,0 +1,34 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability, ModelRecord } from "@workglow/ai";
+import { AiProvider } from "@workglow/ai";
+import { createCloudProviderClass } from "@workglow/ai/provider-utils";
+import {
+  inferLlamaCppServerCapabilities,
+  llamaCppServerWorkerRunFnSpecs,
+} from "./common/LlamaCppServer_Capabilities";
+import { LOCAL_LLAMACPP_SERVER } from "./common/LlamaCppServer_Constants";
+import type { LlamaCppServerModelConfig } from "./common/LlamaCppServer_ModelSchema";
+
+/** Main-thread registration (inline or worker-backed). */
+export class LlamaCppServerQueuedProvider extends createCloudProviderClass<LlamaCppServerModelConfig>(
+  AiProvider,
+  {
+    name: LOCAL_LLAMACPP_SERVER,
+    displayName: "Local llama-server (HTTP)",
+    isLocal: true,
+    supportsBrowser: true,
+  }
+) {
+  override inferCapabilities(model: ModelRecord): readonly Capability[] {
+    return inferLlamaCppServerCapabilities(model);
+  }
+
+  protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
+    return llamaCppServerWorkerRunFnSpecs();
+  }
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts
new file mode 100644
index 000000000..14b103184
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Capabilities.ts
@@ -0,0 +1,80 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability, ModelRecord } from "@workglow/ai/worker";
+import { LLAMACPP_SERVER_CAPABILITY_SETS } from "./LlamaCppServer_CapabilitySets";
+
+export const LLAMACPP_SERVER_RUN_FN_SPECS = LLAMACPP_SERVER_CAPABILITY_SETS.map((serves) => ({
+  serves,
+}));
+
+export function llamaCppServerWorkerRunFnSpecs(): readonly {
+  readonly serves: readonly Capability[];
+}[] {
+  return LLAMACPP_SERVER_RUN_FN_SPECS;
+}
+
+const EMBEDDING_NAME_PATTERNS: readonly RegExp[] = [
+  /embed/i,
+  /^nomic-embed/i,
+  /^mxbai-embed/i,
+  /^all-minilm/i,
+  /^snowflake-arctic-embed/i,
+  /^bge-/i,
+  /^gte-/i,
+];
+
+type CapabilityHints = Pick<ModelRecord, "model_id" | "provider_config" | "capabilities">;
+
+/**
+ * Heuristic capability inference. Like Ollama, default-permissive: a
+ * mis-routed model surfaces as a runtime HTTP error rather than a missed
+ * capability.
+ *
+ *   1. `provider_config.native_dimensions` set → embedding model
+ *   2. Filename matches an embedding pattern → embedding model
+ *   3. Filename matches llava / bakllava / -vision → vision-capable text-gen
+ *   4. Any other name → full text-gen + rewriter + summary + tool-use + meta
+ *   5. No id at all → declared caps OR baseline meta-ops
+ */
+export function inferLlamaCppServerCapabilities(model: CapabilityHints): readonly Capability[] {
+  const pc = model.provider_config as
+    | { model_path?: string; model_name?: string; native_dimensions?: number }
+    | undefined;
+  const id = String(pc?.model_path ?? pc?.model_name ?? model.model_id ?? "");
+  const base = (id.split("/").pop() ?? "").toLowerCase();
+
+  if (typeof pc?.native_dimensions === "number") {
+    return ["text.embedding", "model.info", "model.search"];
+  }
+  if (EMBEDDING_NAME_PATTERNS.some((rx) => rx.test(base))) {
+    return ["text.embedding", "model.info", "model.search"];
+  }
+  if (/llava|bakllava|-vision\b/.test(base)) {
+    return [
+      "text.generation",
+      "text.rewriter",
+      "text.summary",
+      "tool-use",
+      "vision-input",
+      "model.info",
+      "model.search",
+    ];
+  }
+  if (base.length > 0) {
+    return [
+      "text.generation",
+      "text.rewriter",
+      "text.summary",
+      "tool-use",
+      "model.info",
+      "model.search",
+    ];
+  }
+  const declared = (model.capabilities as readonly Capability[] | undefined) ?? [];
+  if (declared.length > 0) return declared;
+  return ["model.info", "model.search"];
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts
new file mode 100644
index 000000000..444b92946
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_CapabilitySets.ts
@@ -0,0 +1,38 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability } from "@workglow/ai/worker";
+
+/**
+ * Single source of truth for llamacpp-server's capability sets.
+ *
+ * Both `LLAMACPP_SERVER_RUN_FNS` (worker-side registration) and
+ * `workerRunFnSpecs()` derive their `serves` arrays from these named exports.
+ * SDK-free so the main thread can import them without pulling in fetch code.
+ */
+export const LLAMACPP_SERVER_TEXT_GENERATION = ["text.generation"] as const satisfies Capability[];
+
+export const LLAMACPP_SERVER_TOOL_USE = [
+  "text.generation",
+  "tool-use",
+] as const satisfies Capability[];
+
+export const LLAMACPP_SERVER_TEXT_REWRITER = ["text.rewriter"] as const satisfies Capability[];
+export const LLAMACPP_SERVER_TEXT_SUMMARY = ["text.summary"] as const satisfies Capability[];
+export const LLAMACPP_SERVER_TEXT_EMBEDDING = ["text.embedding"] as const satisfies Capability[];
+export const LLAMACPP_SERVER_MODEL_SEARCH = ["model.search"] as const satisfies Capability[];
+export const LLAMACPP_SERVER_MODEL_INFO = ["model.info"] as const satisfies Capability[];
+
+/** Aggregated list — for `workerRunFnSpecs()` derivation. Order MUST match `LLAMACPP_SERVER_RUN_FNS`. */
+export const LLAMACPP_SERVER_CAPABILITY_SETS = [
+  LLAMACPP_SERVER_TEXT_GENERATION,
+  LLAMACPP_SERVER_TOOL_USE,
+  LLAMACPP_SERVER_TEXT_REWRITER,
+  LLAMACPP_SERVER_TEXT_SUMMARY,
+  LLAMACPP_SERVER_TEXT_EMBEDDING,
+  LLAMACPP_SERVER_MODEL_SEARCH,
+  LLAMACPP_SERVER_MODEL_INFO,
+] as const;
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
new file mode 100644
index 000000000..232b4316c
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
@@ -0,0 +1,161 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { LLAMACPP_SERVER_DEFAULT_CTX } from "./LlamaCppServer_Constants";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+
+/**
+ * Provider-construction options shared across registrations.
+ *
+ * `transport` and `externalUrl` are both optional, but the resolver throws
+ * at acquisition time if no URL source resolves for a given request.
+ */
+export interface ILlamaCppServerProviderOptions {
+  readonly transport?: IBackendsTransport;
+  readonly externalUrl?: string;
+  /** Default context length forwarded to the broker. Falls back to {@link LLAMACPP_SERVER_DEFAULT_CTX}. */
+  readonly defaultCtx?: number;
+}
+
+/** Resolved base URL plus a release callback (no-op for externalUrl paths). */
+export interface IAcquiredBaseUrl {
+  readonly baseUrl: string;
+  readonly release: () => Promise<void>;
+}
+
+/**
+ * Resolve a base URL for one request.
+ *
+ * Precedence:
+ *   1. `model.provider_config.base_url`
+ *   2. `opts.externalUrl`
+ *   3. `opts.transport.ensureRunning({ ... })` — requires `provider_config.model_path`
+ *
+ * Throws with a clear message if none of the three resolves.
+ */
+export async function acquireBaseUrl(
+  model: LlamaCppServerModelConfig | undefined,
+  opts: ILlamaCppServerProviderOptions
+): Promise<IAcquiredBaseUrl> {
+  const modelBaseUrl = model?.provider_config?.base_url;
+  if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) {
+    return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease };
+  }
+  if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) {
+    return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease };
+  }
+  if (opts.transport) {
+    const modelPath = model?.provider_config?.model_path;
+    if (typeof modelPath !== "string" || modelPath.length === 0) {
+      throw new Error(
+        "LlamaCppServer: transport-mode acquisition requires provider_config.model_path."
+      );
+    }
+    const ctx =
+      typeof model?.provider_config?.ctx === "number"
+        ? model.provider_config.ctx
+        : (opts.defaultCtx ?? LLAMACPP_SERVER_DEFAULT_CTX);
+    const handle: IRunningHandle = await opts.transport.ensureRunning({
+      backend: "llamacpp-server",
+      modelPath,
+      opts: { ctx },
+    });
+    return {
+      baseUrl: stripTrailingSlash(handle.url),
+      release: () => handle.release(),
+    };
+  }
+  throw new Error(
+    "LlamaCppServer: no base URL source — set provider_config.base_url, opts.externalUrl, or opts.transport."
+  );
+}
+
+function stripTrailingSlash(url: string): string {
+  return url.replace(/\/+$/, "");
+}
+
+const noopRelease = async (): Promise<void> => {};
+
+// ── SSE helper ─────────────────────────────────────────────────────────────
+
+/** One parsed delta from an OpenAI-compatible `/v1/chat/completions` stream. */
+export interface IChatCompletionDelta {
+  readonly contentDelta?: string;
+  readonly toolCallDeltas?: ReadonlyArray<{
+    readonly index?: number;
+    readonly id?: string;
+    readonly type?: string;
+    readonly function?: { readonly name?: string; readonly arguments?: string };
+  }>;
+  readonly done?: boolean;
+  readonly finishReason?: string;
+}
+
+/**
+ * Iterate over `data:` lines from an SSE response body, parsing each into
+ * an {@link IChatCompletionDelta}. Yields `{ done: true }` on `data: [DONE]`.
+ *
+ * The caller passes the `AbortSignal` so per-line throws happen promptly.
+ * Cancels the reader on abort and on `[DONE]`.
+ */
+export async function* readChatCompletionDeltas(
+  response: Response,
+  signal: AbortSignal | undefined
+): AsyncGenerator<IChatCompletionDelta> {
+  const reader = response.body?.getReader();
+  if (!reader) {
+    throw new Error("LlamaCppServer: response body is null");
+  }
+  const decoder = new TextDecoder();
+  let buffer = "";
+  try {
+    let sawDone = false;
+    while (!sawDone) {
+      signal?.throwIfAborted?.();
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split("\n");
+      buffer = lines.pop() ?? "";
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed.startsWith("data:")) continue;
+        const data = trimmed.slice(5).trim();
+        if (data === "[DONE]") {
+          sawDone = true;
+          yield { done: true };
+          await reader.cancel().catch(() => undefined);
+          break;
+        }
+        if (!data) continue;
+        let chunk: {
+          choices?: Array<{
+            delta?: {
+              content?: string;
+              tool_calls?: IChatCompletionDelta["toolCallDeltas"];
+            };
+            finish_reason?: string;
+          }>;
+        };
+        try {
+          chunk = JSON.parse(data) as typeof chunk;
+        } catch {
+          continue;
+        }
+        const choice = chunk.choices?.[0];
+        const contentDelta = choice?.delta?.content;
+        const toolCallDeltas = choice?.delta?.tool_calls;
+        const finishReason = choice?.finish_reason;
+        if (contentDelta !== undefined || toolCallDeltas !== undefined || finishReason) {
+          yield { contentDelta, toolCallDeltas, finishReason };
+        }
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
index 53cc7a8ee..2c8601d0a 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Constants.ts
@@ -5,3 +5,6 @@
  */
 
 export const LOCAL_LLAMACPP_SERVER = "LOCAL_LLAMACPP_SERVER";
+
+/** Default llama-server context length when no per-request or per-model override is set. */
+export const LLAMACPP_SERVER_DEFAULT_CTX = 4096;
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts
new file mode 100644
index 000000000..1ad09c4b3
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_JobRunFns.ts
@@ -0,0 +1,57 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFnRegistration } from "@workglow/ai";
+import {
+  LLAMACPP_SERVER_MODEL_INFO,
+  LLAMACPP_SERVER_MODEL_SEARCH,
+  LLAMACPP_SERVER_TEXT_EMBEDDING,
+  LLAMACPP_SERVER_TEXT_GENERATION,
+  LLAMACPP_SERVER_TEXT_REWRITER,
+  LLAMACPP_SERVER_TEXT_SUMMARY,
+  LLAMACPP_SERVER_TOOL_USE,
+} from "./LlamaCppServer_CapabilitySets";
+import { type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import { createLlamaCppServerModelInfoStream } from "./LlamaCppServer_ModelInfo";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { createLlamaCppServerModelSearchStream } from "./LlamaCppServer_ModelSearch";
+import { createLlamaCppServerTextEmbeddingStream } from "./LlamaCppServer_TextEmbedding";
+import { createLlamaCppServerTextGenerationStream } from "./LlamaCppServer_TextGeneration";
+import { createLlamaCppServerTextRewriterStream } from "./LlamaCppServer_TextRewriter";
+import { createLlamaCppServerTextSummaryStream } from "./LlamaCppServer_TextSummary";
+import { createLlamaCppServerToolCallingStream } from "./LlamaCppServer_ToolCalling";
+
+/**
+ * Build the full set of capability-set run-fn registrations bound to a
+ * single set of provider options. Order is significant only as a
+ * tiebreaker — the dispatcher prefers the smallest `serves` superset of
+ * the task's `requires`.
+ */
+export function buildLlamaCppServerRunFns(
+  opts: ILlamaCppServerProviderOptions
+): readonly AiProviderRunFnRegistration<
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  any,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  any,
+  LlamaCppServerModelConfig
+>[] {
+  return [
+    {
+      serves: LLAMACPP_SERVER_TEXT_GENERATION,
+      runFn: createLlamaCppServerTextGenerationStream(opts),
+    },
+    { serves: LLAMACPP_SERVER_TOOL_USE, runFn: createLlamaCppServerToolCallingStream(opts) },
+    { serves: LLAMACPP_SERVER_TEXT_REWRITER, runFn: createLlamaCppServerTextRewriterStream(opts) },
+    { serves: LLAMACPP_SERVER_TEXT_SUMMARY, runFn: createLlamaCppServerTextSummaryStream(opts) },
+    {
+      serves: LLAMACPP_SERVER_TEXT_EMBEDDING,
+      runFn: createLlamaCppServerTextEmbeddingStream(opts),
+    },
+    { serves: LLAMACPP_SERVER_MODEL_SEARCH, runFn: createLlamaCppServerModelSearchStream(opts) },
+    { serves: LLAMACPP_SERVER_MODEL_INFO, runFn: createLlamaCppServerModelInfoStream(opts) },
+  ];
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
new file mode 100644
index 000000000..f2801948b
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
@@ -0,0 +1,93 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai";
+import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+export function createLlamaCppServerModelInfoStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<ModelInfoTaskInput, ModelInfoTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const pc = model?.provider_config;
+
+    if (input.detail === "dimensions") {
+      let native_dimensions =
+        typeof pc?.native_dimensions === "number" ? pc.native_dimensions : undefined;
+      if (native_dimensions === undefined) {
+        try {
+          const { baseUrl, release } = await acquire(model, opts);
+          try {
+            const res = await fetch(`${baseUrl}/props`, { signal });
+            if (res.ok) {
+              const props = (await res.json()) as {
+                default_generation_settings?: { n_embd?: number };
+              };
+              const n = props.default_generation_settings?.n_embd;
+              if (typeof n === "number") native_dimensions = n;
+            }
+          } finally {
+            await release();
+          }
+        } catch {
+          // Leave unset — caller handles missing dimensions.
+        }
+      }
+      emit({
+        type: "finish",
+        data: {
+          model: input.model,
+          is_local: true,
+          is_remote: false,
+          supports_browser: true,
+          supports_node: true,
+          is_cached: false,
+          is_loaded: false,
+          file_sizes: null,
+          ...(native_dimensions !== undefined ? { native_dimensions } : {}),
+        } as ModelInfoTaskOutput,
+      });
+      return;
+    }
+
+    // General info — try /v1/models. is_loaded = the server reports this model name.
+    let is_loaded = false;
+    const expectedName = getLlamaCppServerModelName(model);
+    try {
+      const { baseUrl, release } = await acquire(model, opts);
+      try {
+        const res = await fetch(`${baseUrl}/v1/models`, { signal });
+        if (res.ok) {
+          const body = (await res.json()) as { data?: Array<{ id?: string }> };
+          is_loaded = !!body.data?.some((m) => m.id === expectedName);
+        }
+      } finally {
+        await release();
+      }
+    } catch {
+      // Server unreachable — leave is_loaded false.
+    }
+
+    emit({
+      type: "finish",
+      data: {
+        model: input.model,
+        is_local: true,
+        is_remote: false,
+        supports_browser: true,
+        supports_node: true,
+        is_cached: false,
+        is_loaded,
+        file_sizes: null,
+      },
+    });
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts
new file mode 100644
index 000000000..91a1a3a4a
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSchema.ts
@@ -0,0 +1,85 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { ModelConfigSchema, ModelRecordSchema } from "@workglow/ai/worker";
+import type { DataPortSchemaObject, FromSchema } from "@workglow/util/worker";
+import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants";
+
+/**
+ * Provider-config schema for `@workglow/llamacpp-server`.
+ *
+ * Required fields:
+ * - `provider` — discriminator
+ * - `provider_config.model_path` — absolute path passed to the broker; OR `base_url` if externalUrl-mode
+ *
+ * Either `model_path` (transport mode) OR `base_url` (externalUrl mode) must be set
+ * for a usable record. The provider resolver throws at runtime if neither resolves.
+ */
+export const LlamaCppServerModelSchema = {
+  type: "object",
+  properties: {
+    provider: {
+      const: LOCAL_LLAMACPP_SERVER,
+      description: "Discriminator: local llama-server HTTP provider.",
+    },
+    provider_config: {
+      type: "object",
+      description: "llama-server-specific configuration.",
+      properties: {
+        model_path: {
+          type: "string",
+          description:
+            "Absolute filesystem path to the .gguf model. Required for transport-mode acquisition.",
+        },
+        model_name: {
+          type: "string",
+          description:
+            "Optional logical model name sent as OpenAI `model` field. llama-server ignores it.",
+        },
+        base_url: {
+          type: "string",
+          description:
+            "Optional per-record base URL override. Takes precedence over provider-level externalUrl. Used for records discovered via externalUrl-mode model.search.",
+        },
+        native_dimensions: {
+          type: "number",
+          description: "Embedding dimensions for embedding models. Skips /props lookup.",
+        },
+        ctx: {
+          type: "number",
+          description: "Per-model llama-server context length override.",
+        },
+      },
+      additionalProperties: false,
+    },
+  },
+  required: ["provider", "provider_config"],
+  additionalProperties: true,
+} as const satisfies DataPortSchemaObject;
+
+export const LlamaCppServerModelRecordSchema = {
+  type: "object",
+  properties: {
+    ...ModelRecordSchema.properties,
+    ...LlamaCppServerModelSchema.properties,
+  },
+  required: [...ModelRecordSchema.required, ...LlamaCppServerModelSchema.required],
+  additionalProperties: false,
+} as const satisfies DataPortSchemaObject;
+
+export type LlamaCppServerModelRecord = FromSchema<typeof LlamaCppServerModelRecordSchema>;
+
+export const LlamaCppServerModelConfigSchema = {
+  type: "object",
+  properties: {
+    ...ModelConfigSchema.properties,
+    ...LlamaCppServerModelSchema.properties,
+  },
+  required: [...ModelConfigSchema.required, ...LlamaCppServerModelSchema.required],
+  additionalProperties: false,
+} as const satisfies DataPortSchemaObject;
+
+export type LlamaCppServerModelConfig = FromSchema<typeof LlamaCppServerModelConfigSchema>;
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
new file mode 100644
index 000000000..ffd3eca39
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
@@ -0,0 +1,58 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai";
+import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils";
+import type { ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+
+/**
+ * Returns the single loaded model when the provider has a usable external URL
+ * (provider-level `externalUrl`). Otherwise returns `[]` — transport mode
+ * cannot search because `transport.ensureRunning` itself requires a model path.
+ */
+export function createLlamaCppServerModelSearchStream(
+  opts: ILlamaCppServerProviderOptions
+): AiProviderRunFn<ModelSearchTaskInput, ModelSearchTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, _model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    if (!opts.externalUrl) {
+      emit({ type: "finish", data: { results: [] } });
+      return;
+    }
+    const baseUrl = opts.externalUrl.replace(/\/+$/, "");
+    try {
+      const res = await fetch(`${baseUrl}/v1/models`, { signal });
+      if (!res.ok) {
+        emit({ type: "finish", data: { results: [] } });
+        return;
+      }
+      const body = (await res.json()) as { data?: Array<{ id: string }> };
+      const results = (body.data ?? []).map((m) => ({
+        id: m.id,
+        label: m.id,
+        description: "llama-server loaded model",
+        record: {
+          model_id: m.id,
+          provider: LOCAL_LLAMACPP_SERVER,
+          title: m.id,
+          description: "",
+          capabilities: [],
+          provider_config: { model_name: m.id, base_url: baseUrl },
+          metadata: {},
+        },
+        raw: m,
+      }));
+      emit({
+        type: "finish",
+        data: { results: filterModelSearchResultsByQuery(results, input.query) },
+      });
+    } catch {
+      emit({ type: "finish", data: { results: [] } });
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts
new file mode 100644
index 000000000..4678be11c
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelUtil.ts
@@ -0,0 +1,31 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+
+/**
+ * Returns the logical model name to send in the OpenAI `model` field.
+ * llama-server ignores this value (it serves one model per process), so we
+ * fall back to model_path, then model_id, then the empty string.
+ */
+export function getLlamaCppServerModelName(model: LlamaCppServerModelConfig | undefined): string {
+  const pc = model?.provider_config;
+  return String(pc?.model_name ?? pc?.model_path ?? model?.model_id ?? "");
+}
+
+/**
+ * Returns the absolute filesystem path used by `transport.ensureRunning`.
+ * Required for transport-mode acquisition; throws if missing.
+ */
+export function getLlamaCppServerModelPath(model: LlamaCppServerModelConfig | undefined): string {
+  const path = model?.provider_config?.model_path;
+  if (typeof path !== "string" || path.length === 0) {
+    throw new Error(
+      "LlamaCppServer: provider_config.model_path is required for transport-mode acquisition."
+    );
+  }
+  return path;
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
new file mode 100644
index 000000000..7f242c632
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
@@ -0,0 +1,64 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderRunFn,
+  TextEmbeddingTaskInput,
+  TextEmbeddingTaskOutput,
+} from "@workglow/ai";
+import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+/**
+ * One-shot embedding run-fn. Per the project convention, the run-fn emits
+ * a single `finish` event whose `data` is the full `TextEmbeddingTaskOutput`.
+ */
+export function createLlamaCppServerTextEmbeddingStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<TextEmbeddingTaskInput, TextEmbeddingTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const texts = Array.isArray(input.text) ? input.text : [input.text];
+    const body = JSON.stringify({
+      model: getLlamaCppServerModelName(model),
+      input: texts,
+    });
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      const response = await fetch(`${baseUrl}/v1/embeddings`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServer: HTTP ${response.status} from /v1/embeddings (embeddings) — ${text}`
+        );
+      }
+      const json = (await response.json()) as {
+        data?: Array<{ embedding: number[] }>;
+      };
+      const vectors = (json.data ?? []).map((d) => new Float32Array(d.embedding));
+      if (vectors.length !== texts.length) {
+        throw new Error(
+          `LlamaCppServer: /v1/embeddings returned ${vectors.length} embeddings for ${texts.length} input(s)`
+        );
+      }
+      const data: TextEmbeddingTaskOutput = Array.isArray(input.text)
+        ? { vector: vectors }
+        : { vector: vectors[0] };
+      emit({ type: "finish", data });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
new file mode 100644
index 000000000..29c674674
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
@@ -0,0 +1,103 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderRunFn,
+  TextGenerationTaskInput,
+  TextGenerationTaskOutput,
+} from "@workglow/ai";
+import {
+  acquireBaseUrl,
+  readChatCompletionDeltas,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+interface UnifiedTextGenerationInput extends TextGenerationTaskInput {
+  readonly messages?: ReadonlyArray<{
+    readonly role: string;
+    readonly content:
+      | string
+      | ReadonlyArray<
+          | { readonly type: "text"; readonly text: string }
+          | { readonly type: "image_url"; readonly image_url: { readonly url: string } }
+        >;
+  }>;
+  readonly systemPrompt?: string;
+}
+
+/**
+ * Streaming run-fn factory for `["text.generation"]` (and, when the model has
+ * `vision-input`, image-bearing chat content too).
+ *
+ * Discriminates on `Array.isArray(input.messages) && input.messages.length > 0`
+ * so {@link AiChatTask} and {@link TextGenerationTask} share the same
+ * registered run-fn, consistent with the project convention.
+ *
+ * Vision-input is folded into this run-fn rather than living separately:
+ * llava-family chat is still a `/v1/chat/completions` call — only the
+ * `content` shape changes. The provider's `inferCapabilities` decides
+ * whether `vision-input` is declared.
+ */
+export function createLlamaCppServerTextGenerationStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<TextGenerationTaskInput, TextGenerationTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const unified = input as UnifiedTextGenerationInput;
+    const hasMessages = Array.isArray(unified.messages) && unified.messages.length > 0;
+
+    const messages = hasMessages
+      ? [
+          ...(unified.systemPrompt ? [{ role: "system", content: unified.systemPrompt }] : []),
+          ...unified.messages!.map((m) => ({ role: m.role, content: m.content })),
+        ]
+      : [{ role: "user", content: input.prompt }];
+
+    const body = JSON.stringify({
+      model: getLlamaCppServerModelName(model),
+      messages,
+      stream: true,
+      ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}),
+      ...(input.temperature !== undefined ? { temperature: input.temperature } : {}),
+      ...(input.topP !== undefined ? { top_p: input.topP } : {}),
+      ...(input.frequencyPenalty !== undefined
+        ? { frequency_penalty: input.frequencyPenalty }
+        : {}),
+      ...(input.presencePenalty !== undefined ? { presence_penalty: input.presencePenalty } : {}),
+    });
+
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      signal?.throwIfAborted?.();
+      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (text-generation) — ${text}`
+        );
+      }
+      for await (const delta of readChatCompletionDeltas(response, signal)) {
+        if (delta.done) break;
+        if (delta.contentDelta) {
+          emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta });
+        }
+      }
+      emit({ type: "finish", data: {} as TextGenerationTaskOutput });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
new file mode 100644
index 000000000..c0266ad82
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
@@ -0,0 +1,57 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, TextRewriterTaskInput, TextRewriterTaskOutput } from "@workglow/ai";
+import {
+  acquireBaseUrl,
+  readChatCompletionDeltas,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+export function createLlamaCppServerTextRewriterStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<TextRewriterTaskInput, TextRewriterTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const body = JSON.stringify({
+      model: getLlamaCppServerModelName(model),
+      messages: [
+        { role: "system", content: input.prompt },
+        { role: "user", content: input.text },
+      ],
+      stream: true,
+    });
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (rewriter) — ${text}`
+        );
+      }
+      for await (const delta of readChatCompletionDeltas(response, signal)) {
+        if (delta.done) break;
+        if (delta.contentDelta) {
+          emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta });
+        }
+      }
+      emit({ type: "finish", data: {} as TextRewriterTaskOutput });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
new file mode 100644
index 000000000..14c914cd5
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
@@ -0,0 +1,57 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, TextSummaryTaskInput, TextSummaryTaskOutput } from "@workglow/ai";
+import {
+  acquireBaseUrl,
+  readChatCompletionDeltas,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+export function createLlamaCppServerTextSummaryStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<TextSummaryTaskInput, TextSummaryTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const body = JSON.stringify({
+      model: getLlamaCppServerModelName(model),
+      messages: [
+        { role: "system", content: "Summarize the following text concisely." },
+        { role: "user", content: input.text },
+      ],
+      stream: true,
+    });
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (summary) — ${text}`
+        );
+      }
+      for await (const delta of readChatCompletionDeltas(response, signal)) {
+        if (delta.done) break;
+        if (delta.contentDelta) {
+          emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta });
+        }
+      }
+      emit({ type: "finish", data: {} as TextSummaryTaskOutput });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
new file mode 100644
index 000000000..7f4575234
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
@@ -0,0 +1,138 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderRunFn,
+  ToolCallingTaskInput,
+  ToolCallingTaskOutput,
+  ToolCalls,
+  ToolDefinition,
+} from "@workglow/ai";
+import {
+  buildToolDescription,
+  filterValidToolCalls,
+  sanitizeToolArgs,
+  toTextFlatMessages,
+} from "@workglow/ai/worker";
+import { parsePartialJson } from "@workglow/util/worker";
+import {
+  acquireBaseUrl,
+  readChatCompletionDeltas,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
+import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
+import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+function mapTools(tools: readonly ToolDefinition[]) {
+  return tools.map((t) => ({
+    type: "function" as const,
+    function: {
+      name: t.name,
+      description: buildToolDescription(t),
+      parameters: t.inputSchema as unknown,
+    },
+  }));
+}
+
+export function createLlamaCppServerToolCallingStream(
+  opts: ILlamaCppServerProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<ToolCallingTaskInput, ToolCallingTaskOutput, LlamaCppServerModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const messages = toTextFlatMessages(input);
+    const tools = input.toolChoice === "none" ? undefined : mapTools(input.tools);
+    const body = JSON.stringify({
+      model: getLlamaCppServerModelName(model),
+      messages,
+      ...(tools ? { tools } : {}),
+      stream: true,
+      ...(input.temperature !== undefined ? { temperature: input.temperature } : {}),
+      ...(input.maxTokens !== undefined ? { max_tokens: input.maxTokens } : {}),
+    });
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `LlamaCppServer: HTTP ${response.status} from /v1/chat/completions (tool-use) — ${text}`
+        );
+      }
+
+      let accumulatedText = "";
+      const accumulatedArgs = new Map<number, string>();
+      const callMeta = new Map<number, { id?: string; name?: string }>();
+      let nextSyntheticIndex = 0;
+      let lastEmittedToolCalls: ToolCalls = [];
+
+      for await (const delta of readChatCompletionDeltas(response, signal)) {
+        if (delta.done) break;
+        if (delta.contentDelta) {
+          accumulatedText += delta.contentDelta;
+          emit({ type: "text-delta", port: "text", textDelta: delta.contentDelta });
+        }
+        if (delta.toolCallDeltas?.length) {
+          for (const tc of delta.toolCallDeltas) {
+            const idx = typeof tc.index === "number" ? tc.index : nextSyntheticIndex++;
+            const meta = callMeta.get(idx) ?? {};
+            if (tc.id) meta.id = tc.id;
+            if (tc.function?.name) meta.name = tc.function.name;
+            callMeta.set(idx, meta);
+            if (tc.function?.arguments) {
+              accumulatedArgs.set(idx, (accumulatedArgs.get(idx) ?? "") + tc.function.arguments);
+            }
+          }
+          lastEmittedToolCalls = buildToolCalls(accumulatedArgs, callMeta);
+          emit({ type: "object-delta", port: "toolCalls", objectDelta: [...lastEmittedToolCalls] });
+        }
+      }
+      const finalToolCalls = filterValidToolCalls(lastEmittedToolCalls, input.tools);
+      emit({
+        type: "finish",
+        data: { text: accumulatedText, toolCalls: finalToolCalls } as ToolCallingTaskOutput,
+      });
+    } finally {
+      await release();
+    }
+  };
+}
+
+function buildToolCalls(
+  argsByIndex: Map<number, string>,
+  metaByIndex: Map<number, { id?: string; name?: string }>
+): ToolCalls {
+  const result: ToolCalls = [];
+  const indices = [...argsByIndex.keys(), ...metaByIndex.keys()];
+  const unique = Array.from(new Set(indices)).sort((a, b) => a - b);
+  for (const idx of unique) {
+    const meta = metaByIndex.get(idx) ?? {};
+    if (!meta.name) continue;
+    const raw = argsByIndex.get(idx) ?? "";
+    let parsed: Record<string, unknown> = {};
+    if (raw.length > 0) {
+      try {
+        parsed = JSON.parse(raw) as Record<string, unknown>;
+      } catch {
+        const partial = parsePartialJson(raw);
+        parsed = (partial as Record<string, unknown>) ?? {};
+      }
+    }
+    result.push({
+      id: meta.id ?? `call_${idx}`,
+      name: meta.name,
+      input: sanitizeToolArgs(parsed) as Record<string, unknown>,
+    });
+  }
+  return result;
+}
diff --git a/providers/llamacpp-server/src/ai/index.ts b/providers/llamacpp-server/src/ai/index.ts
index 4f3d7f42a..6612079ff 100644
--- a/providers/llamacpp-server/src/ai/index.ts
+++ b/providers/llamacpp-server/src/ai/index.ts
@@ -7,5 +7,22 @@
 // organize-imports-ignore
 
 export * from "./common/LlamaCppServer_Constants";
-export * from "./LlamaCppServerProvider";
+export * from "./common/LlamaCppServer_ModelSchema";
+export * from "./common/LlamaCppServer_Capabilities";
+export * from "./common/LlamaCppServer_CapabilitySets";
 export * from "./registerLlamaCppServer";
+export * from "./registerLlamaCppServerInline";
+export * from "./registerLlamaCppServerWorker";
+
+import { LLAMACPP_SERVER_RUN_FN_SPECS } from "./common/LlamaCppServer_Capabilities";
+import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns";
+import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider";
+
+/**
+ * @internal Symbols exported only for use by `@workglow/test`. Not part of the stable public API.
+ */
+export const _testOnly = {
+  LlamaCppServerQueuedProvider,
+  LLAMACPP_SERVER_RUN_FN_SPECS,
+  buildLlamaCppServerRunFns,
+} as const;
diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
index b0f4406aa..b9b6b32bf 100644
--- a/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
+++ b/providers/llamacpp-server/src/ai/registerLlamaCppServer.ts
@@ -5,23 +5,20 @@
  */
 
 import type { AiProviderRegisterOptions } from "@workglow/ai";
-import type { IBackendsTransport } from "@workglow/ai/provider-utils";
-import { registerProviderInline } from "@workglow/ai/provider-utils";
-import { LlamaCppServerProvider } from "./LlamaCppServerProvider";
-
-export interface IRegisterLlamaCppServerOptions extends AiProviderRegisterOptions {
-  readonly transport: IBackendsTransport;
-  readonly externalUrl?: string;
-  readonly defaultCtx?: number;
-}
+import { registerProviderWithWorker } from "@workglow/ai/provider-utils";
+import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider";
 
+/**
+ * Main-thread worker-backed registration. The provider proxy lives on the
+ * main thread and forwards jobs to the worker, which holds the real run-fns.
+ *
+ * Use {@link registerLlamaCppServerInline} for transport mode (broker
+ * acquisition).
+ */
 export async function registerLlamaCppServer(
-  options: IRegisterLlamaCppServerOptions
+  options: AiProviderRegisterOptions & {
+    worker: Worker | (() => Worker);
+  }
 ): Promise<void> {
-  const { transport, externalUrl, defaultCtx, ...registerOptions } = options;
-  await registerProviderInline(
-    new LlamaCppServerProvider({ transport, externalUrl, defaultCtx }),
-    "LlamaCppServer",
-    registerOptions
-  );
+  await registerProviderWithWorker(new LlamaCppServerQueuedProvider(), "LlamaCppServer", options);
 }
diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts
new file mode 100644
index 000000000..ec85b055e
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/registerLlamaCppServerInline.ts
@@ -0,0 +1,28 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRegisterOptions } from "@workglow/ai";
+import { registerProviderInline } from "@workglow/ai/provider-utils";
+import { type ILlamaCppServerProviderOptions } from "./common/LlamaCppServer_Client";
+import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns";
+import { LlamaCppServerQueuedProvider } from "./LlamaCppServerQueuedProvider";
+
+export interface IRegisterLlamaCppServerInlineOptions
+  extends AiProviderRegisterOptions, ILlamaCppServerProviderOptions {}
+
+/** Main-thread inline registration. Supports transport mode. */
+export async function registerLlamaCppServerInline(
+  options: IRegisterLlamaCppServerInlineOptions = {}
+): Promise<void> {
+  const { transport, externalUrl, defaultCtx, ...registerOptions } = options;
+  await registerProviderInline(
+    new LlamaCppServerQueuedProvider(
+      buildLlamaCppServerRunFns({ transport, externalUrl, defaultCtx })
+    ),
+    "LlamaCppServer",
+    registerOptions
+  );
+}
diff --git a/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts b/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts
new file mode 100644
index 000000000..d25f64bd6
--- /dev/null
+++ b/providers/llamacpp-server/src/ai/registerLlamaCppServerWorker.ts
@@ -0,0 +1,29 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { registerProviderWorker } from "@workglow/ai/provider-utils";
+import type { ILlamaCppServerProviderOptions } from "./common/LlamaCppServer_Client";
+import { buildLlamaCppServerRunFns } from "./common/LlamaCppServer_JobRunFns";
+import { LlamaCppServerProvider } from "./LlamaCppServerProvider";
+
+/**
+ * Worker-server-side registration. Supports both transport and externalUrl
+ * modes — the transport object is constructed inside this worker runtime
+ * by the caller and held by closure in the run-fns. No port transfer.
+ *
+ * This is the primary registration path in production. Callers in the
+ * Builder construct `MessagePortBackendsTransport` locally in the worker
+ * renderer and pass it straight here.
+ */
+export async function registerLlamaCppServerWorker(
+  options: ILlamaCppServerProviderOptions = {}
+): Promise<void> {
+  await registerProviderWorker(
+    (ws) =>
+      new LlamaCppServerProvider(buildLlamaCppServerRunFns(options)).registerOnWorkerServer(ws),
+    "LlamaCppServer"
+  );
+}
diff --git a/providers/llamacpp-server/src/ai/runtime.ts b/providers/llamacpp-server/src/ai/runtime.ts
index 5a1f42e73..5ba10d546 100644
--- a/providers/llamacpp-server/src/ai/runtime.ts
+++ b/providers/llamacpp-server/src/ai/runtime.ts
@@ -6,4 +6,14 @@
 
 // organize-imports-ignore
 
-export * from "./registerLlamaCppServer";
+export * from "./common/LlamaCppServer_Client";
+export * from "./common/LlamaCppServer_TextGeneration";
+export * from "./common/LlamaCppServer_TextRewriter";
+export * from "./common/LlamaCppServer_TextSummary";
+export * from "./common/LlamaCppServer_TextEmbedding";
+export * from "./common/LlamaCppServer_ToolCalling";
+export * from "./common/LlamaCppServer_ModelInfo";
+export * from "./common/LlamaCppServer_ModelSearch";
+export * from "./common/LlamaCppServer_JobRunFns";
+export * from "./registerLlamaCppServerInline";
+export * from "./registerLlamaCppServerWorker";

From 0f0e4566fa708ca8b4579d094bdb4c2dceacc148 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 21:48:44 +0000
Subject: [PATCH 4/8] test(llamacpp-server): full unit and integration test
 suite

---
 .../LlamaCppServerProvider.test.ts            | 173 ++++++++++++++++++
 .../LlamaCppServer_Client.test.ts             |  96 ++++++++++
 ...LlamaCppServer_Generic.integration.test.ts |  71 +++++++
 .../LlamaCppServer_ModelInfo.test.ts          |  78 ++++++++
 .../LlamaCppServer_ModelSearch.test.ts        |  60 ++++++
 .../LlamaCppServer_TextEmbedding.test.ts      |  74 ++++++++
 ...lamaCppServer_TextGenerationStream.test.ts |  94 ++++++++++
 .../LlamaCppServer_ToolCalling.test.ts        |  81 ++++++++
 .../LocalBackendsProviderContracts.test.ts    |  14 +-
 .../ai/common/LlamaCppServer_ModelSearch.ts   |   2 +-
 10 files changed, 737 insertions(+), 6 deletions(-)
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts

diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
new file mode 100644
index 000000000..9150c1585
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
@@ -0,0 +1,173 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { ModelRecord } from "@workglow/ai";
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { _testOnly } from "@workglow/llamacpp-server/ai";
+import { describe, expect, it, vi } from "vitest";
+
+const { LlamaCppServerQueuedProvider, LLAMACPP_SERVER_RUN_FN_SPECS, buildLlamaCppServerRunFns } =
+  _testOnly;
+
+function model(
+  model_id: string,
+  provider_config: Record<string, unknown> = { model_path: `/models/${model_id}` },
+  capabilities: readonly string[] = []
+): ModelRecord {
+  return {
+    model_id,
+    title: model_id,
+    description: "",
+    provider: "LOCAL_LLAMACPP_SERVER",
+    provider_config,
+    capabilities: [...capabilities],
+    metadata: {},
+  } as ModelRecord;
+}
+
+describe("LlamaCppServerQueuedProvider.inferCapabilities", () => {
+  const provider = new LlamaCppServerQueuedProvider(buildLlamaCppServerRunFns({}));
+
+  it("infers full text-gen set for a generic .gguf", () => {
+    const caps = provider.inferCapabilities(model("llama-3-8b-q4_k_m.gguf"));
+    expect(caps).toContain("text.generation");
+    expect(caps).toContain("tool-use");
+    expect(caps).toContain("text.rewriter");
+    expect(caps).toContain("text.summary");
+    expect(caps).toContain("model.info");
+    expect(caps).toContain("model.search");
+    expect(caps).not.toContain("vision-input");
+  });
+
+  it("infers vision-input for llava-family", () => {
+    const caps = provider.inferCapabilities(
+      model("llava-7b-v1.6-q4_k_m.gguf", { model_path: "/models/llava-7b-v1.6-q4_k_m.gguf" })
+    );
+    expect(caps).toContain("vision-input");
+    expect(caps).toContain("text.generation");
+  });
+
+  it("infers vision-input for bakllava", () => {
+    const caps = provider.inferCapabilities(
+      model("bakllava-q5.gguf", { model_path: "/models/bakllava-q5.gguf" })
+    );
+    expect(caps).toContain("vision-input");
+  });
+
+  it("infers text.embedding for nomic-embed gguf", () => {
+    const caps = provider.inferCapabilities(
+      model("nomic-embed-text.gguf", { model_path: "/models/nomic-embed-text.gguf" })
+    );
+    expect(caps).toContain("text.embedding");
+    expect(caps).not.toContain("text.generation");
+  });
+
+  it("infers text.embedding when native_dimensions is set explicitly", () => {
+    const caps = provider.inferCapabilities(
+      model("custom.gguf", { model_path: "/models/custom.gguf", native_dimensions: 768 })
+    );
+    expect(caps).toEqual(["text.embedding", "model.info", "model.search"]);
+  });
+
+  it("falls back to declared caps when id is empty", () => {
+    const caps = provider.inferCapabilities(model("", {}, ["text.classification"]));
+    expect(caps).toEqual(["text.classification"]);
+  });
+
+  it("falls back to baseline meta-ops when nothing matches and nothing is declared", () => {
+    const caps = provider.inferCapabilities(model("", {}));
+    expect(caps).toEqual(["model.info", "model.search"]);
+  });
+});
+
+describe("LlamaCppServer capability-set parity", () => {
+  it("LLAMACPP_SERVER_RUN_FN_SPECS matches buildLlamaCppServerRunFns({}) serves shapes", () => {
+    const fns = buildLlamaCppServerRunFns({});
+    const fnsServes = fns.map((r) => [...r.serves].sort().join(","));
+    const specsServes = LLAMACPP_SERVER_RUN_FN_SPECS.map((s) => [...s.serves].sort().join(","));
+    expect(specsServes).toEqual(fnsServes);
+  });
+});
+
+describe("LlamaCppServer run-fn shape", () => {
+  it("registers a runFn for every canonical capability set", () => {
+    const sets = buildLlamaCppServerRunFns({}).map((r) => [...r.serves].sort().join(","));
+    expect(sets).toContain("text.generation");
+    expect(sets).toContain("text.generation,tool-use");
+    expect(sets).toContain("text.rewriter");
+    expect(sets).toContain("text.summary");
+    expect(sets).toContain("text.embedding");
+    expect(sets).toContain("model.search");
+    expect(sets).toContain("model.info");
+  });
+
+  it("tiebreaks text.generation to the smallest serves entry", () => {
+    const candidates = buildLlamaCppServerRunFns({}).filter((r) =>
+      r.serves.includes("text.generation")
+    );
+    expect(candidates.some((r) => r.serves.length === 1)).toBe(true);
+  });
+});
+
+function fakeTransport(): IBackendsTransport & {
+  ensureRunning: ReturnType<typeof vi.fn>;
+} {
+  return {
+    ensureRunning: vi.fn(),
+    subscribeStatus: vi.fn(() => () => undefined),
+    install: vi.fn(),
+    list: vi.fn(),
+    uninstall: vi.fn(),
+  } as unknown as IBackendsTransport & { ensureRunning: ReturnType<typeof vi.fn> };
+}
+
+describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)", () => {
+  it("acquires URL via transport and releases the handle (text.generation)", async () => {
+    const release = vi.fn().mockResolvedValue(undefined);
+    const transport = fakeTransport();
+    transport.ensureRunning.mockResolvedValue({
+      url: "http://broker:9999",
+      release,
+    } as IRunningHandle);
+
+    const enc = new TextEncoder();
+    const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(
+        new ReadableStream({
+          start(controller) {
+            controller.enqueue(
+              enc.encode(`data: ${JSON.stringify({ choices: [{ delta: { content: "ok" } }] })}\n`)
+            );
+            controller.enqueue(enc.encode("data: [DONE]\n"));
+            controller.close();
+          },
+        }),
+        { status: 200 }
+      )
+    );
+
+    const fns = buildLlamaCppServerRunFns({ transport });
+    const textGen = fns.find((r) => r.serves.join(",") === "text.generation")!;
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await textGen.runFn(
+      { prompt: "hi" } as any,
+      { provider_config: { model_path: "/abs/m.gguf", ctx: 4096 } } as any,
+      undefined as any,
+      emit
+    );
+
+    expect(transport.ensureRunning).toHaveBeenCalledWith({
+      backend: "llamacpp-server",
+      modelPath: "/abs/m.gguf",
+      opts: { ctx: 4096 },
+    });
+    const fetchedUrl = String(fetchSpy.mock.calls[0]![0]);
+    expect(fetchedUrl).toBe("http://broker:9999/v1/chat/completions");
+    expect(release).toHaveBeenCalledTimes(1);
+    fetchSpy.mockRestore();
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
new file mode 100644
index 000000000..fecfa6516
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
@@ -0,0 +1,96 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { acquireBaseUrl } from "@workglow/llamacpp-server/ai-runtime";
+import { describe, expect, it, vi } from "vitest";
+
+function fakeTransport(): IBackendsTransport & {
+  ensureRunning: ReturnType<typeof vi.fn>;
+} {
+  return {
+    ensureRunning: vi.fn(),
+    subscribeStatus: vi.fn(() => () => undefined),
+    install: vi.fn(),
+    list: vi.fn(),
+    uninstall: vi.fn(),
+  } as unknown as IBackendsTransport & { ensureRunning: ReturnType<typeof vi.fn> };
+}
+
+describe("acquireBaseUrl precedence", () => {
+  it("prefers model.provider_config.base_url over everything", async () => {
+    const transport = fakeTransport();
+    const result = await acquireBaseUrl(
+      { provider_config: { base_url: "http://from-model:8080/" } } as any,
+      { externalUrl: "http://from-opts:8080", transport }
+    );
+    expect(result.baseUrl).toBe("http://from-model:8080");
+    expect(transport.ensureRunning).not.toHaveBeenCalled();
+    await result.release(); // no-op
+  });
+
+  it("prefers opts.externalUrl over transport when no model.base_url", async () => {
+    const transport = fakeTransport();
+    const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, {
+      externalUrl: "http://from-opts:8080",
+      transport,
+    });
+    expect(result.baseUrl).toBe("http://from-opts:8080");
+    expect(transport.ensureRunning).not.toHaveBeenCalled();
+    await result.release(); // no-op
+  });
+
+  it("falls back to transport.ensureRunning when neither URL is set", async () => {
+    const release = vi.fn().mockResolvedValue(undefined);
+    const transport = fakeTransport();
+    transport.ensureRunning.mockResolvedValue({
+      url: "http://broker:9999/",
+      release,
+    } as IRunningHandle);
+    const result = await acquireBaseUrl(
+      { provider_config: { model_path: "/abs/m.gguf", ctx: 8192 } } as any,
+      { transport, defaultCtx: 4096 }
+    );
+    expect(transport.ensureRunning).toHaveBeenCalledWith({
+      backend: "llamacpp-server",
+      modelPath: "/abs/m.gguf",
+      opts: { ctx: 8192 },
+    });
+    expect(result.baseUrl).toBe("http://broker:9999");
+    await result.release();
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+
+  it("uses defaultCtx when model has no ctx override", async () => {
+    const transport = fakeTransport();
+    transport.ensureRunning.mockResolvedValue({
+      url: "http://broker:9999",
+      release: vi.fn(),
+    } as IRunningHandle);
+    await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, {
+      transport,
+      defaultCtx: 12345,
+    });
+    expect(transport.ensureRunning).toHaveBeenCalledWith({
+      backend: "llamacpp-server",
+      modelPath: "/abs/m.gguf",
+      opts: { ctx: 12345 },
+    });
+  });
+
+  it("throws when transport mode is selected but model_path is missing", async () => {
+    const transport = fakeTransport();
+    await expect(acquireBaseUrl({ provider_config: {} } as any, { transport })).rejects.toThrow(
+      /model_path/
+    );
+  });
+
+  it("throws when no source resolves", async () => {
+    await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow(
+      /no base URL source/
+    );
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts
new file mode 100644
index 000000000..2eeeba10d
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Generic.integration.test.ts
@@ -0,0 +1,71 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  createLlamaCppServerModelInfoStream,
+  createLlamaCppServerModelSearchStream,
+  createLlamaCppServerTextEmbeddingStream,
+  createLlamaCppServerTextGenerationStream,
+} from "@workglow/llamacpp-server/ai-runtime";
+import { describe, expect, it } from "vitest";
+
+const RUN = process.env.RUN_LLAMACPP_SERVER_TESTS === "1";
+const BASE_URL = process.env.LLAMACPP_SERVER_URL ?? "http://localhost:8080";
+
+describe.skipIf(!RUN)("LlamaCppServer integration (real server)", () => {
+  const model = {
+    provider_config: { base_url: BASE_URL, model_name: "model" },
+  } as any;
+
+  it("text.generation streams non-empty content", async () => {
+    const fn = createLlamaCppServerTextGenerationStream({ externalUrl: BASE_URL });
+    let text = "";
+    const emit = (e: any) => {
+      if (e.type === "text-delta") text += e.textDelta;
+    };
+    await fn({ prompt: "Say hi.", maxTokens: 16 } as any, model, undefined as any, emit);
+    expect(text.length).toBeGreaterThan(0);
+  });
+
+  it("model.search returns at least one entry via /v1/models", async () => {
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: BASE_URL });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(events.at(-1)!.data.results.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it("model.info reports is_loaded=true for the running model", async () => {
+    const search = createLlamaCppServerModelSearchStream({ externalUrl: BASE_URL });
+    const searchEvents: any[] = [];
+    const searchEmit = (e: any) => searchEvents.push(e);
+    await search({ query: "" } as any, undefined as any, undefined as any, searchEmit);
+    const loaded = searchEvents.at(-1)!.data.results[0]!;
+    const fn = createLlamaCppServerModelInfoStream({ externalUrl: BASE_URL });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: loaded.id } as any,
+      { provider_config: { base_url: BASE_URL, model_name: loaded.id } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(true);
+  });
+
+  it("text.embedding returns a Float32Array (skipped if /v1/embeddings 404s)", async () => {
+    const fn = createLlamaCppServerTextEmbeddingStream({ externalUrl: BASE_URL });
+    try {
+      const events: any[] = [];
+      const emit = (e: any) => events.push(e);
+      await fn({ text: "hello" } as any, model, undefined as any, emit);
+      expect(events.at(-1)!.data.vector).toBeInstanceOf(Float32Array);
+    } catch (err) {
+      if (/HTTP 404/.test(String(err))) return; // server not started with --embedding
+      throw err;
+    }
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
new file mode 100644
index 000000000..654f75f8e
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
@@ -0,0 +1,78 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createLlamaCppServerModelInfoStream } from "@workglow/llamacpp-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+afterEach(() => vi.restoreAllMocks());
+
+describe("createLlamaCppServerModelInfoStream", () => {
+  it("trusts native_dimensions when set", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch");
+    const fn = createLlamaCppServerModelInfoStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { detail: "dimensions", model: "m" } as any,
+      { provider_config: { base_url: "http://x:8080", native_dimensions: 768 } } as any,
+      undefined as any,
+      emit
+    );
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(events.at(-1)!.data.native_dimensions).toBe(768);
+  });
+
+  it("falls back to /props for embedding dimensions", async () => {
+    vi.spyOn(globalThis, "fetch").mockImplementation(async (url) => {
+      if (String(url).endsWith("/props")) {
+        return new Response(JSON.stringify({ default_generation_settings: { n_embd: 1024 } }), {
+          status: 200,
+        });
+      }
+      return new Response("", { status: 404 });
+    });
+    const fn = createLlamaCppServerModelInfoStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { detail: "dimensions", model: "m" } as any,
+      { provider_config: { base_url: "http://x:8080" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.native_dimensions).toBe(1024);
+  });
+
+  it("reports is_loaded=true when /v1/models includes the model name", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "m" }, { id: "other" }] }), { status: 200 })
+    );
+    const fn = createLlamaCppServerModelInfoStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: "m" } as any,
+      { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(true);
+  });
+
+  it("reports is_loaded=false when server unreachable", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED"));
+    const fn = createLlamaCppServerModelInfoStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: "m" } as any,
+      { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(false);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
new file mode 100644
index 000000000..4c934d408
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
@@ -0,0 +1,60 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createLlamaCppServerModelSearchStream } from "@workglow/llamacpp-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+afterEach(() => vi.restoreAllMocks());
+
+describe("createLlamaCppServerModelSearchStream", () => {
+  it("returns [] when no externalUrl set", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch");
+    const fn = createLlamaCppServerModelSearchStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
+  it("returns mapped results from /v1/models when externalUrl set", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 })
+    );
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    const results = events.at(-1)!.data.results;
+    expect(results).toHaveLength(1);
+    expect(results[0].id).toBe("loaded-model");
+    expect(results[0].record.provider).toBe("LOCAL_LLAMACPP_SERVER");
+    expect(results[0].record.provider_config.base_url).toBe("http://x:8080");
+  });
+
+  it("returns [] when fetch fails", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED"));
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
+  it("filters by query case-insensitively", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "Llama-3" }, { id: "Mistral" }] }), {
+        status: 200,
+      })
+    );
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "llama" } as any, undefined as any, undefined as any, emit);
+    const results = events.at(-1)!.data.results;
+    expect(results.map((r: any) => r.id)).toEqual(["Llama-3"]);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
new file mode 100644
index 000000000..3f991b362
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
@@ -0,0 +1,74 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createLlamaCppServerTextEmbeddingStream } from "@workglow/llamacpp-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+afterEach(() => vi.restoreAllMocks());
+
+const model = { provider_config: { base_url: "http://x:8080", model_name: "emb" } } as any;
+
+describe("createLlamaCppServerTextEmbeddingStream", () => {
+  it("returns a single Float32Array for string input", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ embedding: [0.1, 0.2, 0.3] }] }), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      })
+    );
+    const fn = createLlamaCppServerTextEmbeddingStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ text: "hello" } as any, model, undefined as any, emit);
+    const finish = events.find((e) => e.type === "finish")!;
+    expect(finish.data.vector).toBeInstanceOf(Float32Array);
+    const arr = Array.from(finish.data.vector as Float32Array);
+    expect(arr).toHaveLength(3);
+    expect(arr[0]).toBeCloseTo(0.1, 5);
+    expect(arr[1]).toBeCloseTo(0.2, 5);
+    expect(arr[2]).toBeCloseTo(0.3, 5);
+  });
+
+  it("returns an array of Float32Arrays for string[] input", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ embedding: [1, 2] }, { embedding: [3, 4] }] }), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      })
+    );
+    const fn = createLlamaCppServerTextEmbeddingStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ text: ["a", "b"] } as any, model, undefined as any, emit);
+    const finish = events.find((e) => e.type === "finish")!;
+    expect(Array.isArray(finish.data.vector)).toBe(true);
+    expect(finish.data.vector).toHaveLength(2);
+    expect(finish.data.vector[0]).toBeInstanceOf(Float32Array);
+  });
+
+  it("throws on HTTP error", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("oops", { status: 500 }));
+    const fn = createLlamaCppServerTextEmbeddingStream({});
+    const emit = (_e: any) => undefined;
+    await expect(fn({ text: "x" } as any, model, undefined as any, emit)).rejects.toThrow(
+      /embeddings/
+    );
+  });
+
+  it("throws when /v1/embeddings returns fewer embeddings than inputs", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [] }), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      })
+    );
+    const fn = createLlamaCppServerTextEmbeddingStream({});
+    const emit = (_e: any) => undefined;
+    await expect(fn({ text: "x" } as any, model, undefined as any, emit)).rejects.toThrow(
+      /returned 0 embeddings for 1 input/
+    );
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
new file mode 100644
index 000000000..5efa1ced7
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
@@ -0,0 +1,94 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createLlamaCppServerTextGenerationStream } from "@workglow/llamacpp-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+function sseResponse(chunks: string[]): Response {
+  const stream = new ReadableStream({
+    start(controller) {
+      const enc = new TextEncoder();
+      for (const c of chunks) controller.enqueue(enc.encode(c));
+      controller.close();
+    },
+  });
+  return new Response(stream, { status: 200, headers: { "Content-Type": "text/event-stream" } });
+}
+
+function dataLine(delta: string): string {
+  return `data: ${JSON.stringify({ choices: [{ delta: { content: delta } }] })}\n`;
+}
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe("createLlamaCppServerTextGenerationStream", () => {
+  const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any;
+
+  it("yields text-delta events for each delta line and a final finish", async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(sseResponse([dataLine("Hel"), dataLine("lo"), "data: [DONE]\n"]));
+    const fn = createLlamaCppServerTextGenerationStream({});
+
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ prompt: "hi" } as any, model, undefined as any, emit);
+
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+    const [url] = fetchSpy.mock.calls[0]!;
+    expect(String(url)).toBe("http://x:8080/v1/chat/completions");
+    expect(events.filter((e) => e.type === "text-delta").map((e) => e.textDelta)).toEqual([
+      "Hel",
+      "lo",
+    ]);
+    expect(events[events.length - 1].type).toBe("finish");
+  });
+
+  it("uses chat messages when input.messages is non-empty", async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(sseResponse([dataLine("ok"), "data: [DONE]\n"]));
+    const fn = createLlamaCppServerTextGenerationStream({});
+    const emit = (_e: any) => undefined;
+    await fn(
+      {
+        prompt: "ignored",
+        messages: [{ role: "user", content: "hi" }],
+        systemPrompt: "be helpful",
+      } as any,
+      model,
+      undefined as any,
+      emit
+    );
+    const body = JSON.parse(String((fetchSpy.mock.calls[0]![1] as RequestInit).body));
+    expect(body.messages).toEqual([
+      { role: "system", content: "be helpful" },
+      { role: "user", content: "hi" },
+    ]);
+  });
+
+  it("throws on non-2xx with informative message", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("nope", { status: 500 }));
+    const fn = createLlamaCppServerTextGenerationStream({});
+    const emit = (_e: any) => undefined;
+    await expect(fn({ prompt: "x" } as any, model, undefined as any, emit)).rejects.toThrow(
+      /HTTP 500/
+    );
+  });
+
+  it("aborts pending fetch when signal aborts before request", async () => {
+    const controller = new AbortController();
+    controller.abort();
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      sseResponse([dataLine("ok"), "data: [DONE]\n"])
+    );
+    const fn = createLlamaCppServerTextGenerationStream({});
+    const emit = (_e: any) => undefined;
+    await expect(fn({ prompt: "x" } as any, model, controller.signal, emit)).rejects.toThrow();
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts
new file mode 100644
index 000000000..7030fe8a5
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts
@@ -0,0 +1,81 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createLlamaCppServerToolCallingStream } from "@workglow/llamacpp-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+function sseChunks(chunks: object[]): Response {
+  const enc = new TextEncoder();
+  const lines = chunks.map((c) => `data: ${JSON.stringify(c)}\n`).concat("data: [DONE]\n");
+  const stream = new ReadableStream({
+    start(controller) {
+      for (const l of lines) controller.enqueue(enc.encode(l));
+      controller.close();
+    },
+  });
+  return new Response(stream, { status: 200 });
+}
+
+afterEach(() => vi.restoreAllMocks());
+
+const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any;
+const TOOLS = [
+  {
+    name: "add",
+    description: "add",
+    inputSchema: { type: "object", properties: { a: { type: "number" }, b: { type: "number" } } },
+  },
+];
+
+describe("createLlamaCppServerToolCallingStream", () => {
+  it("accumulates partial-JSON args across deltas and emits final tool calls", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      sseChunks([
+        {
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  { index: 0, id: "c0", function: { name: "add", arguments: '{"a":1' } },
+                ],
+              },
+            },
+          ],
+        },
+        {
+          choices: [{ delta: { tool_calls: [{ index: 0, function: { arguments: ',"b":2}' } }] } }],
+        },
+      ])
+    );
+    const fn = createLlamaCppServerToolCallingStream({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { prompt: "p", tools: TOOLS, toolChoice: "auto" } as any,
+      model,
+      undefined as any,
+      emit
+    );
+    const finish = events.find((e) => e.type === "finish")!;
+    expect(finish.data.toolCalls).toEqual([{ id: "c0", name: "add", input: { a: 1, b: 2 } }]);
+  });
+
+  it("omits tools[] when toolChoice='none'", async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(sseChunks([{ choices: [{ delta: { content: "hi" } }] }]));
+    const fn = createLlamaCppServerToolCallingStream({});
+    const emit = (_e: any) => undefined;
+    await fn(
+      { prompt: "p", tools: TOOLS, toolChoice: "none" } as any,
+      model,
+      undefined as any,
+      emit
+    );
+    const body = JSON.parse(String((fetchSpy.mock.calls[0]![1] as RequestInit).body));
+    expect(body.tools).toBeUndefined();
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
index 7e0f04c5d..d7d3db18c 100644
--- a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
+++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
@@ -18,7 +18,7 @@ import type {
   IRunningHandle,
 } from "@workglow/ai/provider-utils";
 import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
-import { LlamaCppServerProvider } from "@workglow/llamacpp-server/ai";
+import { LOCAL_LLAMACPP_SERVER, registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai";
 import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
@@ -109,10 +109,9 @@ describe("local backend provider stream contracts", () => {
 
   it("llama.cpp stops after [DONE] even if the server keeps the stream open", async () => {
     const { release, transport } = createTransportStub();
-    const provider = new LlamaCppServerProvider({ transport });
-    await provider.register();
+    await registerLlamaCppServerInline({ transport });
 
-    const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["text.generation"]);
+    const runFn = getAiProviderRegistry().getRunFnFor(LOCAL_LLAMACPP_SERVER, ["text.generation"]);
     expect(runFn).toBeDefined();
 
     const payload = new TextEncoder().encode(
@@ -144,13 +143,18 @@ describe("local backend provider stream contracts", () => {
         ({
           ok: true,
           body: { getReader: () => reader },
+          text: async (): Promise<string> => "",
         }) as unknown as Response
     ) as unknown as typeof fetch;
 
     const events = await runProviderStream(
       runFn!,
       { prompt: "hello" },
-      { model_id: "/models/llama.gguf" }
+      {
+        model_id: "llama-test",
+        provider: LOCAL_LLAMACPP_SERVER,
+        provider_config: { model_path: "/models/llama.gguf" },
+      }
     );
 
     expect(events).toEqual([
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
index ffd3eca39..5e97e4acb 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
@@ -35,7 +35,7 @@ export function createLlamaCppServerModelSearchStream(
       const results = (body.data ?? []).map((m) => ({
         id: m.id,
         label: m.id,
-        description: "llama-server loaded model",
+        description: m.id,
         record: {
           model_id: m.id,
           provider: LOCAL_LLAMACPP_SERVER,

From 1d69434a60126c79bbe5b2b58b88973422edcb43 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 22:04:55 +0000
Subject: [PATCH 5/8] =?UTF-8?q?docs(llamacpp-server):=20README=20=E2=80=94?=
 =?UTF-8?q?=20install,=20quickstart,=20capability=20table,=20browser/worke?=
 =?UTF-8?q?r=20constraints?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 providers/llamacpp-server/README.md | 121 ++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 providers/llamacpp-server/README.md

diff --git a/providers/llamacpp-server/README.md b/providers/llamacpp-server/README.md
new file mode 100644
index 000000000..622bd1b5e
--- /dev/null
+++ b/providers/llamacpp-server/README.md
@@ -0,0 +1,121 @@
+# `@workglow/llamacpp-server`
+
+OpenAI-compatible HTTP client for an upstream
+[`llama-server`](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)
+instance.
+
+This package **does not bundle llama.cpp**. It speaks to a running
+`llama-server` process — either one you start yourself (`externalUrl` mode)
+or one acquired through an `IBackendsTransport` (`transport` mode, used by
+the Workglow Builder's broker).
+
+## Install
+
+```bash
+bun add @workglow/llamacpp-server
+```
+
+You also need `@workglow/ai`, `@workglow/task-graph`, `@workglow/storage`,
+`@workglow/job-queue`, and `@workglow/util` (peer dependencies).
+
+## Quickstart — `externalUrl` mode
+
+Start `llama-server` yourself, then point the provider at it:
+
+```bash
+llama-server -m ./models/llama-3-8b-q4_k_m.gguf --port 8080 --embedding
+```
+
+```ts
+import { registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai-runtime";
+
+await registerLlamaCppServerInline({
+  externalUrl: "http://localhost:8080",
+});
+```
+
+The provider is now visible to the registry as `LOCAL_LLAMACPP_SERVER`.
+
+## Quickstart — `transport` mode (Electron + broker)
+
+```ts
+import { registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai-runtime";
+
+await registerLlamaCppServerInline({
+  transport: backendsTransport, // your IBackendsTransport implementation
+  defaultCtx: 4096,
+});
+```
+
+In transport mode each model record must include
+`provider_config.model_path` — the absolute path to the `.gguf` file. The
+broker spawns one `llama-server` per `(modelPath, ctx)` triple, shared by
+refcount.
+
+## Model record shape
+
+```ts
+{
+  model_id: "llama-3-8b",
+  provider: "LOCAL_LLAMACPP_SERVER",
+  provider_config: {
+    model_path: "/abs/path/to/llama-3-8b.gguf", // required for transport mode
+    model_name: "llama-3-8b",                    // optional; sent as OpenAI `model` field
+    base_url: "http://localhost:8080",           // optional per-record override
+    native_dimensions: 768,                       // optional embedding-dim override
+    ctx: 8192,                                    // optional ctx override
+  },
+  capabilities: [],
+  metadata: {},
+}
+```
+
+The provider's `inferCapabilities` heuristic populates the capability set
+at runtime based on the file name (llava → vision, `*embed*` → embedding,
+otherwise full text-gen + tool-use).
+
+## Supported capabilities
+
+| Capability | Endpoint | Notes |
+|---|---|---|
+| `text.generation` | `POST /v1/chat/completions` | Chat + prompt unified |
+| `text.generation` + `tool-use` | `POST /v1/chat/completions` with `tools[]` | OpenAI tool calls |
+| `text.rewriter` | `POST /v1/chat/completions` | System=prompt, user=text |
+| `text.summary` | `POST /v1/chat/completions` | Fixed summary instruction |
+| `text.embedding` | `POST /v1/embeddings` | Requires `--embedding` flag |
+| `vision-input` | `POST /v1/chat/completions` with `image_url` parts | llava-family models |
+| `model.info` | `GET /v1/models` + `GET /props` | Embedding dims via `n_embd` |
+| `model.search` | `GET /v1/models` | externalUrl mode only — see below |
+
+### Why `model.search` returns `[]` in transport mode
+
+`transport.ensureRunning` requires a `modelPath`, which is what
+`model.search` is meant to help the user pick. The broker's catalog of
+installed models is the Builder UI's concern, not the provider's. In
+`externalUrl` mode `GET /v1/models` works and returns the one model the
+server has loaded.
+
+## Registration shapes
+
+Three registration entry points, all sharing the same options:
+
+- **`registerLlamaCppServerInline({ transport?, externalUrl?, defaultCtx? })`**
+  — main-thread inline. Primarily used in tests and any single-thread
+  embedding scenario.
+- **`registerLlamaCppServerWorker({ transport?, externalUrl?, defaultCtx? })`**
+  — called inside a worker runtime. This is the primary production path.
+  The worker constructs its own `IBackendsTransport` (e.g.,
+  `MessagePortBackendsTransport`) and passes it here directly — no port
+  transfer happens.
+- **`registerLlamaCppServer({ worker })`** — main-thread proxy that
+  forwards jobs to a worker. The actual run-fns and transport live in
+  the worker; this side only exposes the provider identifier to the
+  registry.
+
+## Browser
+
+`@workglow/llamacpp-server/ai` resolves to a browser bundle that uses the
+exact same source as the node bundle. Pure `fetch` works the same in
+both. In a plain browser there is no broker to construct an
+`IBackendsTransport` against, so practical use is `externalUrl` mode;
+nothing in the code forbids passing a custom transport if one exists.

From 5e5c8d9ca39f542d74cd0d0e4cc39a7391b1f789 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 22:41:00 +0000
Subject: [PATCH 6/8] =?UTF-8?q?feat(stable-diffusion-server):=20@workglow/?=
 =?UTF-8?q?stable-diffusion-server=20provider=20=E2=80=94=20run-fns,=20reg?=
 =?UTF-8?q?istration,=20and=20barrels?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../stable-diffusion-server/package.json      |  16 +-
 .../src/ai-runtime.browser.ts                 |   9 +
 .../stable-diffusion-server/src/ai.browser.ts |   9 +
 .../src/ai/StableDiffusionCppProvider.ts      | 166 +++---------------
 .../ai/StableDiffusionCppQueuedProvider.ts    |  34 ++++
 .../common/StableDiffusionCpp_Capabilities.ts |  37 ++++
 .../StableDiffusionCpp_CapabilitySets.ts      |  22 +++
 .../ai/common/StableDiffusionCpp_Client.ts    | 113 ++++++++++++
 .../ai/common/StableDiffusionCpp_Constants.ts |   3 +
 .../ai/common/StableDiffusionCpp_ImageEdit.ts |  75 ++++++++
 .../StableDiffusionCpp_ImageGenerate.ts       |  77 ++++++++
 .../ai/common/StableDiffusionCpp_JobRunFns.ts |  48 +++++
 .../ai/common/StableDiffusionCpp_ModelInfo.ts |  55 ++++++
 .../common/StableDiffusionCpp_ModelSchema.ts  |  82 +++++++++
 .../common/StableDiffusionCpp_ModelSearch.ts  |  53 ++++++
 .../ai/common/StableDiffusionCpp_ModelUtil.ts |  35 ++++
 .../stable-diffusion-server/src/ai/index.ts   |  17 +-
 .../src/ai/registerStableDiffusionCpp.ts      |  30 ++--
 .../ai/registerStableDiffusionCppInline.ts    |  28 +++
 .../ai/registerStableDiffusionCppWorker.ts    |  27 +++
 .../stable-diffusion-server/src/ai/runtime.ts |   9 +-
 21 files changed, 785 insertions(+), 160 deletions(-)
 create mode 100644 providers/stable-diffusion-server/src/ai-runtime.browser.ts
 create mode 100644 providers/stable-diffusion-server/src/ai.browser.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts
 create mode 100644 providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts

diff --git a/providers/stable-diffusion-server/package.json b/providers/stable-diffusion-server/package.json
index b3718e830..02845c13a 100644
--- a/providers/stable-diffusion-server/package.json
+++ b/providers/stable-diffusion-server/package.json
@@ -12,20 +12,30 @@
   "scripts": {
     "watch": "concurrently -c 'auto' 'bun:watch-*'",
     "watch-code": "bun build --watch --no-clear-screen --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "watch-browser": "bun build --watch --no-clear-screen --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts",
     "watch-types": "tsc --watch --preserveWatchOutput",
-    "build-package": "concurrently -c 'auto' -n 'code,types' 'bun run build-code' 'bun run build-types'",
-    "build-js": "bun run build-code",
+    "build-package": "concurrently -c 'auto' -n 'code,browser,types' 'bun run build-code' 'bun run build-browser' 'bun run build-types'",
+    "build-js": "concurrently -c 'auto' -n 'code,browser' 'bun run build-code' 'bun run build-browser'",
     "build-clean": "rm -fr dist/* tsconfig.tsbuildinfo",
     "build-code": "bun build --sourcemap=external --packages=external --root ./src --outdir ./dist ./src/ai.ts ./src/ai-runtime.ts",
+    "build-browser": "bun build --target=browser --sourcemap=external --packages=external --outdir ./dist ./src/ai.browser.ts ./src/ai-runtime.browser.ts",
     "build-types": "rm -f tsconfig.tsbuildinfo && tsgo",
     "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0"
   },
   "exports": {
     "./ai": {
+      "browser": {
+        "types": "./dist/ai.d.ts",
+        "import": "./dist/ai.browser.js"
+      },
       "types": "./dist/ai.d.ts",
       "import": "./dist/ai.js"
     },
     "./ai-runtime": {
+      "browser": {
+        "types": "./dist/ai-runtime.d.ts",
+        "import": "./dist/ai-runtime.browser.js"
+      },
       "types": "./dist/ai-runtime.d.ts",
       "import": "./dist/ai-runtime.js"
     }
@@ -66,4 +76,4 @@
   "publishConfig": {
     "access": "public"
   }
-}
\ No newline at end of file
+}
diff --git a/providers/stable-diffusion-server/src/ai-runtime.browser.ts b/providers/stable-diffusion-server/src/ai-runtime.browser.ts
new file mode 100644
index 000000000..a1fd9b608
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai-runtime.browser.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/runtime";
diff --git a/providers/stable-diffusion-server/src/ai.browser.ts b/providers/stable-diffusion-server/src/ai.browser.ts
new file mode 100644
index 000000000..2210c547d
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai.browser.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+// organize-imports-ignore
+
+export * from "./ai/index";
diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
index 2d38c2e99..eb94b10d8 100644
--- a/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
+++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppProvider.ts
@@ -4,153 +4,39 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type {
-  AiProviderPreviewRunFn,
-  AiProviderRunFn,
-  AiProviderRunFnRegistration,
-  Capability,
-  ImageGenerateTaskInput,
-  ImageGenerateTaskOutput,
-  ModelConfig,
-  ModelRecord,
-} from "@workglow/ai";
-import { AiProvider } from "@workglow/ai";
-import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
-import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
+import { createCloudProviderClass } from "@workglow/ai/provider-utils";
+import type { Capability, ModelRecord } from "@workglow/ai/worker";
+import { AiProvider } from "@workglow/ai/worker";
+import {
+  inferStableDiffusionCppCapabilities,
+  stableDiffusionCppWorkerRunFnSpecs,
+} from "./common/StableDiffusionCpp_Capabilities";
 import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants";
+import type { StableDiffusionCppModelConfig } from "./common/StableDiffusionCpp_ModelSchema";
 
 /**
- * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img`
- * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used
- * by OpenAI-compatible builds. Configurable so callers can switch without
- * forking the provider while the Phase-8 integration spike is pending.
- */
-export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations";
-
-export interface IStableDiffusionCppProviderOptions {
-  readonly transport: IBackendsTransport;
-  readonly externalUrl?: string;
-  readonly endpoint?: StableDiffusionCppEndpoint;
-}
-
-/**
- * HTTP client for a local stable-diffusion.cpp server. If `externalUrl` is
- * provided the server is assumed to already be running; otherwise the provider
- * acquires a handle via `transport.ensureRunning` before each request and
- * releases it afterwards.
+ * Worker-server registration shell for stable-diffusion.cpp.
  *
- * v1 scope: text-to-image only. Other capabilities are not registered; the
- * provider serves only image generation in v1.
+ * Both transport and externalUrl modes are supported. The `IBackendsTransport`
+ * is constructed inside the worker runtime by the caller and held by closure
+ * inside the run-fns — no port transfer across the worker boundary.
+ * Worker registration is the primary production path; inline registration
+ * (`StableDiffusionCppQueuedProvider`) is primarily a testing seam.
  */
-export class StableDiffusionCppProvider extends AiProvider {
-  readonly name = LOCAL_STABLE_DIFFUSION_CPP;
-  readonly displayName = "Local stable-diffusion.cpp (HTTP)";
-  readonly isLocal = true;
-  readonly supportsBrowser = false;
-
-  constructor(options: IStableDiffusionCppProviderOptions) {
-    const runFns: readonly AiProviderRunFnRegistration<
-      ImageGenerateTaskInput,
-      ImageGenerateTaskOutput,
-      ModelConfig
-    >[] = [
-      {
-        serves: ["image.generation"] as readonly Capability[],
-        runFn: createStableDiffusionCppImageGenerateRunFn(options) as AiProviderRunFn<
-          ImageGenerateTaskInput,
-          ImageGenerateTaskOutput,
-          ModelConfig
-        >,
-      },
-    ];
-
-    const previewTasks: Record<
-      string,
-      AiProviderPreviewRunFn<ImageGenerateTaskInput, ImageGenerateTaskOutput, ModelConfig>
-    > = {};
-
-    super(runFns, previewTasks);
+export class StableDiffusionCppProvider extends createCloudProviderClass<StableDiffusionCppModelConfig>(
+  AiProvider,
+  {
+    name: LOCAL_STABLE_DIFFUSION_CPP,
+    displayName: "Local stable-diffusion.cpp (HTTP)",
+    isLocal: true,
+    supportsBrowser: true,
   }
-
+) {
   override inferCapabilities(model: ModelRecord): readonly Capability[] {
-    return (model.capabilities as readonly Capability[] | undefined) ?? ["image.generation"];
+    return inferStableDiffusionCppCapabilities(model);
   }
-}
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Image-generation run-fn
-// ─────────────────────────────────────────────────────────────────────────────
-
-/**
- * One-shot run-fn for text-to-image generation via stable-diffusion.cpp HTTP server.
- *
- * Endpoint is selected via {@link IStableDiffusionCppProviderOptions.endpoint}
- * (defaults to `/txt2img`). Request: `POST <endpoint>` with `{ "prompt": "..." }`.
- * Response: `{ "images": ["<base64-png>", ...] }` — the first image is used.
- */
-function createStableDiffusionCppImageGenerateRunFn(
-  options: IStableDiffusionCppProviderOptions
-): AiProviderRunFn<ImageGenerateTaskInput, ImageGenerateTaskOutput, ModelConfig> {
-  const endpoint = options.endpoint ?? "/txt2img";
-  return async (input, model, signal, emit) => {
-    signal?.throwIfAborted?.();
-
-    const body = JSON.stringify({ prompt: input.prompt });
-
-    // Acquire base URL — either from external override or via transport.
-    let baseUrl: string;
-    let handle: IRunningHandle | undefined;
-
-    if (options.externalUrl) {
-      baseUrl = options.externalUrl.replace(/\/$/, "");
-    } else {
-      if (!model?.model_id) {
-        throw new Error(
-          "StableDiffusionCppProvider: model.model_id is required to acquire a backend"
-        );
-      }
-      handle = await options.transport.ensureRunning({
-        backend: "stable-diffusion-server",
-        modelPath: model.model_id,
-        opts: {},
-      });
-      baseUrl = handle.url.replace(/\/$/, "");
-    }
 
-    try {
-      signal?.throwIfAborted?.();
-
-      const response = await fetch(`${baseUrl}${endpoint}`, {
-        method: "POST",
-        headers: { "Content-Type": "application/json" },
-        body,
-        signal,
-      });
-
-      if (!response.ok) {
-        const text = await response.text().catch(() => "(no body)");
-        throw new Error(
-          `StableDiffusionCppProvider: HTTP ${response.status} from ${endpoint} — ${text}`
-        );
-      }
-
-      const json = (await response.json()) as { images?: string[] };
-      const base64 = json.images?.[0];
-      if (!base64) {
-        throw new Error("StableDiffusionCppProvider: response contained no images");
-      }
-
-      // Decode base64 PNG bytes platform-neutrally and wrap in an ImageValue.
-      // Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic.
-      const binary = atob(base64);
-      const bytes = new Uint8Array(binary.length);
-      for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
-      const image = await pngBytesToImageValue(bytes, "png");
-
-      emit({ type: "snapshot", data: { image } });
-      emit({ type: "finish", data: {} as ImageGenerateTaskOutput });
-    } finally {
-      await handle?.release();
-    }
-  };
+  protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
+    return stableDiffusionCppWorkerRunFnSpecs();
+  }
 }
diff --git a/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts b/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts
new file mode 100644
index 000000000..4574544e1
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/StableDiffusionCppQueuedProvider.ts
@@ -0,0 +1,34 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability, ModelRecord } from "@workglow/ai";
+import { AiProvider } from "@workglow/ai";
+import { createCloudProviderClass } from "@workglow/ai/provider-utils";
+import {
+  inferStableDiffusionCppCapabilities,
+  stableDiffusionCppWorkerRunFnSpecs,
+} from "./common/StableDiffusionCpp_Capabilities";
+import { LOCAL_STABLE_DIFFUSION_CPP } from "./common/StableDiffusionCpp_Constants";
+import type { StableDiffusionCppModelConfig } from "./common/StableDiffusionCpp_ModelSchema";
+
+/** Main-thread registration (inline or worker-backed). */
+export class StableDiffusionCppQueuedProvider extends createCloudProviderClass<StableDiffusionCppModelConfig>(
+  AiProvider,
+  {
+    name: LOCAL_STABLE_DIFFUSION_CPP,
+    displayName: "Local stable-diffusion.cpp (HTTP)",
+    isLocal: true,
+    supportsBrowser: true,
+  }
+) {
+  override inferCapabilities(model: ModelRecord): readonly Capability[] {
+    return inferStableDiffusionCppCapabilities(model);
+  }
+
+  protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
+    return stableDiffusionCppWorkerRunFnSpecs();
+  }
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts
new file mode 100644
index 000000000..2e551d1bf
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Capabilities.ts
@@ -0,0 +1,37 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability, ModelRecord } from "@workglow/ai/worker";
+import { STABLE_DIFFUSION_CPP_CAPABILITY_SETS } from "./StableDiffusionCpp_CapabilitySets";
+
+export const STABLE_DIFFUSION_CPP_RUN_FN_SPECS = STABLE_DIFFUSION_CPP_CAPABILITY_SETS.map(
+  (serves) => ({ serves })
+);
+
+export function stableDiffusionCppWorkerRunFnSpecs(): readonly {
+  readonly serves: readonly Capability[];
+}[] {
+  return STABLE_DIFFUSION_CPP_RUN_FN_SPECS;
+}
+
+type CapabilityHints = Pick<ModelRecord, "model_id" | "provider_config" | "capabilities">;
+
+/**
+ * sd-server hosts generative image models. Every valid record gets the full
+ * generative set (image.generation + image.editing + meta-ops). If the record
+ * has explicit capabilities and no identifying fields, declared wins;
+ * otherwise the baseline is meta-ops only.
+ */
+export function inferStableDiffusionCppCapabilities(model: CapabilityHints): readonly Capability[] {
+  const pc = model.provider_config as { model_path?: string; model_name?: string } | undefined;
+  const id = String(pc?.model_path ?? pc?.model_name ?? model.model_id ?? "");
+  if (id.length > 0) {
+    return ["image.generation", "image.editing", "model.info", "model.search"];
+  }
+  const declared = (model.capabilities as readonly Capability[] | undefined) ?? [];
+  if (declared.length > 0) return declared;
+  return ["model.info", "model.search"];
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts
new file mode 100644
index 000000000..99a5a38a6
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_CapabilitySets.ts
@@ -0,0 +1,22 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { Capability } from "@workglow/ai/worker";
+
+export const STABLE_DIFFUSION_CPP_IMAGE_GENERATION = [
+  "image.generation",
+] as const satisfies Capability[];
+export const STABLE_DIFFUSION_CPP_IMAGE_EDITING = ["image.editing"] as const satisfies Capability[];
+export const STABLE_DIFFUSION_CPP_MODEL_INFO = ["model.info"] as const satisfies Capability[];
+export const STABLE_DIFFUSION_CPP_MODEL_SEARCH = ["model.search"] as const satisfies Capability[];
+
+/** Order MUST match STABLE_DIFFUSION_CPP_RUN_FNS in JobRunFns. */
+export const STABLE_DIFFUSION_CPP_CAPABILITY_SETS = [
+  STABLE_DIFFUSION_CPP_IMAGE_GENERATION,
+  STABLE_DIFFUSION_CPP_IMAGE_EDITING,
+  STABLE_DIFFUSION_CPP_MODEL_SEARCH,
+  STABLE_DIFFUSION_CPP_MODEL_INFO,
+] as const;
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
new file mode 100644
index 000000000..205be3448
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
@@ -0,0 +1,113 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+
+/**
+ * Endpoint variants for stable-diffusion.cpp HTTP servers. Default `/txt2img`
+ * matches the conventional sd.cpp HTTP API; `/v1/images/generations` is used
+ * by OpenAI-compatible builds.
+ */
+export type StableDiffusionCppEndpoint = "/txt2img" | "/v1/images/generations";
+
+/**
+ * Provider-construction options shared across registrations.
+ *
+ * `transport` and `externalUrl` are both optional, but the resolver throws
+ * at acquisition time if no URL source resolves for a given request.
+ */
+export interface IStableDiffusionCppProviderOptions {
+  readonly transport?: IBackendsTransport;
+  readonly externalUrl?: string;
+  /** Default endpoint used when neither the model nor the request overrides it. */
+  readonly endpoint?: StableDiffusionCppEndpoint;
+}
+
+/** Resolved base URL plus a release callback (no-op for externalUrl paths). */
+export interface IAcquiredBaseUrl {
+  readonly baseUrl: string;
+  readonly release: () => Promise<void>;
+}
+
+/**
+ * Resolve a base URL for one request.
+ *
+ * Precedence:
+ *   1. `model.provider_config.base_url`
+ *   2. `opts.externalUrl`
+ *   3. `opts.transport.ensureRunning({ ... })` — requires `provider_config.model_path`
+ *
+ * Throws with a clear message if none of the three resolves.
+ */
+export async function acquireBaseUrl(
+  model: StableDiffusionCppModelConfig | undefined,
+  opts: IStableDiffusionCppProviderOptions
+): Promise<IAcquiredBaseUrl> {
+  const modelBaseUrl = model?.provider_config?.base_url;
+  if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) {
+    return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease };
+  }
+  if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) {
+    return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease };
+  }
+  if (opts.transport) {
+    const modelPath = model?.provider_config?.model_path;
+    if (typeof modelPath !== "string" || modelPath.length === 0) {
+      throw new Error(
+        "StableDiffusionCpp: transport-mode acquisition requires provider_config.model_path."
+      );
+    }
+    const handle: IRunningHandle = await opts.transport.ensureRunning({
+      backend: "stable-diffusion-server",
+      modelPath,
+      opts: {},
+    });
+    return {
+      baseUrl: stripTrailingSlash(handle.url),
+      release: () => handle.release(),
+    };
+  }
+  throw new Error(
+    "StableDiffusionCpp: no base URL source — set provider_config.base_url, opts.externalUrl, or opts.transport."
+  );
+}
+
+function stripTrailingSlash(url: string): string {
+  return url.replace(/\/+$/, "");
+}
+
+const noopRelease = async (): Promise<void> => {};
+
+// ── Base64 PNG helpers ─────────────────────────────────────────────────────
+
+/**
+ * Decodes a base64-encoded PNG string to bytes platform-neutrally.
+ * Avoids Node-only `Buffer.from(...)` so the provider stays runtime-agnostic.
+ */
+export function decodeBase64Png(b64: string): Uint8Array {
+  const binary = atob(b64);
+  const bytes = new Uint8Array(binary.length);
+  for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
+  return bytes;
+}
+
+/**
+ * Encodes raw bytes to a base64 string platform-neutrally.
+ * Used for `image.editing` to send the source image as a base64 PNG.
+ */
+export function encodeBytesToBase64(bytes: Uint8Array): string {
+  let binary = "";
+  // Process in chunks to avoid blowing the call stack for large images.
+  const CHUNK = 0x8000;
+  for (let i = 0; i < bytes.length; i += CHUNK) {
+    binary += String.fromCharCode.apply(
+      null,
+      Array.from(bytes.subarray(i, Math.min(i + CHUNK, bytes.length)))
+    );
+  }
+  return btoa(binary);
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
index 56c976ccd..97dc9c9e0 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Constants.ts
@@ -5,3 +5,6 @@
  */
 
 export const LOCAL_STABLE_DIFFUSION_CPP = "LOCAL_STABLE_DIFFUSION_CPP";
+
+/** Default sd.cpp HTTP endpoint when no per-request or per-model override is set. */
+export const STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT = "/txt2img" as const;
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
new file mode 100644
index 000000000..cc4dbac24
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
@@ -0,0 +1,75 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, ImageEditTaskInput, ImageEditTaskOutput } from "@workglow/ai";
+import { imageValueToPngBytes, pngBytesToImageValue } from "@workglow/ai/provider-utils";
+import {
+  acquireBaseUrl,
+  decodeBase64Png,
+  encodeBytesToBase64,
+  type IStableDiffusionCppProviderOptions,
+} from "./StableDiffusionCpp_Client";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+/**
+ * One-shot run-fn for image + prompt -> image (img2img) via stable-diffusion.cpp.
+ *
+ * Request: `POST /img2img` with `{ prompt, init_image: base64Png, model? }`.
+ * Response: `{ images: [base64Png, ...] }` — first image used.
+ * Emits `snapshot` then `finish`.
+ *
+ * Always uses `/img2img` — no OpenAI-compat alternative because
+ * `/v1/images/edits` is multipart and sd.cpp doesn't speak that shape.
+ */
+export function createStableDiffusionCppImageEditRunFn(
+  opts: IStableDiffusionCppProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<ImageEditTaskInput, ImageEditTaskOutput, StableDiffusionCppModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    const modelName = getStableDiffusionCppModelName(model);
+
+    const inputBytes = await imageValueToPngBytes(input.image);
+    const initImageB64 = encodeBytesToBase64(inputBytes);
+
+    const body = JSON.stringify({
+      prompt: input.prompt,
+      init_image: initImageB64,
+      ...(modelName ? { model: modelName } : {}),
+    });
+
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      signal?.throwIfAborted?.();
+      const response = await fetch(`${baseUrl}/img2img`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `StableDiffusionCpp: HTTP ${response.status} from /img2img (image-editing) — ${text}`
+        );
+      }
+      const json = (await response.json()) as { images?: string[] };
+      const base64 = json.images?.[0];
+      if (!base64) {
+        throw new Error("StableDiffusionCpp: response contained no images");
+      }
+      const bytes = decodeBase64Png(base64);
+      const image = await pngBytesToImageValue(bytes, "png");
+      emit({ type: "snapshot", data: { image } });
+      emit({ type: "finish", data: {} as ImageEditTaskOutput });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
new file mode 100644
index 000000000..fa2666069
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
@@ -0,0 +1,77 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type {
+  AiProviderRunFn,
+  ImageGenerateTaskInput,
+  ImageGenerateTaskOutput,
+} from "@workglow/ai";
+import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
+import {
+  acquireBaseUrl,
+  decodeBase64Png,
+  type IStableDiffusionCppProviderOptions,
+} from "./StableDiffusionCpp_Client";
+import { STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT } from "./StableDiffusionCpp_Constants";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+/**
+ * One-shot run-fn for text -> image via stable-diffusion.cpp HTTP server.
+ * Endpoint resolution: model.provider_config.endpoint > opts.endpoint >
+ * STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT (`/txt2img`).
+ *
+ * Request: `POST <endpoint>` with `{ prompt, model?, ...optional params }`.
+ * Response: `{ images: [base64Png, ...] }` — first image used.
+ * Emits a `snapshot` with the decoded image, then `finish`.
+ */
+export function createStableDiffusionCppImageGenerateRunFn(
+  opts: IStableDiffusionCppProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<ImageGenerateTaskInput, ImageGenerateTaskOutput, StableDiffusionCppModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+
+    const endpoint =
+      model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT;
+    const modelName = getStableDiffusionCppModelName(model);
+
+    const body = JSON.stringify({
+      prompt: input.prompt,
+      ...(modelName ? { model: modelName } : {}),
+    });
+
+    const { baseUrl, release } = await acquire(model, opts);
+    try {
+      signal?.throwIfAborted?.();
+      const response = await fetch(`${baseUrl}${endpoint}`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body,
+        signal,
+      });
+      if (!response.ok) {
+        const text = await response.text().catch(() => "(no body)");
+        throw new Error(
+          `StableDiffusionCpp: HTTP ${response.status} from ${endpoint} (image-generation) — ${text}`
+        );
+      }
+      const json = (await response.json()) as { images?: string[] };
+      const base64 = json.images?.[0];
+      if (!base64) {
+        throw new Error("StableDiffusionCpp: response contained no images");
+      }
+      const bytes = decodeBase64Png(base64);
+      const image = await pngBytesToImageValue(bytes, "png");
+      emit({ type: "snapshot", data: { image } });
+      emit({ type: "finish", data: {} as ImageGenerateTaskOutput });
+    } finally {
+      await release();
+    }
+  };
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts
new file mode 100644
index 000000000..9256058ca
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_JobRunFns.ts
@@ -0,0 +1,48 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFnRegistration } from "@workglow/ai";
+import {
+  STABLE_DIFFUSION_CPP_IMAGE_EDITING,
+  STABLE_DIFFUSION_CPP_IMAGE_GENERATION,
+  STABLE_DIFFUSION_CPP_MODEL_INFO,
+  STABLE_DIFFUSION_CPP_MODEL_SEARCH,
+} from "./StableDiffusionCpp_CapabilitySets";
+import { type IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client";
+import { createStableDiffusionCppImageEditRunFn } from "./StableDiffusionCpp_ImageEdit";
+import { createStableDiffusionCppImageGenerateRunFn } from "./StableDiffusionCpp_ImageGenerate";
+import { createStableDiffusionCppModelInfoRunFn } from "./StableDiffusionCpp_ModelInfo";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+import { createStableDiffusionCppModelSearchRunFn } from "./StableDiffusionCpp_ModelSearch";
+
+export function buildStableDiffusionCppRunFns(
+  opts: IStableDiffusionCppProviderOptions
+): readonly AiProviderRunFnRegistration<
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  any,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  any,
+  StableDiffusionCppModelConfig
+>[] {
+  return [
+    {
+      serves: STABLE_DIFFUSION_CPP_IMAGE_GENERATION,
+      runFn: createStableDiffusionCppImageGenerateRunFn(opts),
+    },
+    {
+      serves: STABLE_DIFFUSION_CPP_IMAGE_EDITING,
+      runFn: createStableDiffusionCppImageEditRunFn(opts),
+    },
+    {
+      serves: STABLE_DIFFUSION_CPP_MODEL_SEARCH,
+      runFn: createStableDiffusionCppModelSearchRunFn(opts),
+    },
+    {
+      serves: STABLE_DIFFUSION_CPP_MODEL_INFO,
+      runFn: createStableDiffusionCppModelInfoRunFn(opts),
+    },
+  ];
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
new file mode 100644
index 000000000..03726b6ad
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
@@ -0,0 +1,55 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai";
+import {
+  acquireBaseUrl,
+  type IStableDiffusionCppProviderOptions,
+} from "./StableDiffusionCpp_Client";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+import { getStableDiffusionCppModelName } from "./StableDiffusionCpp_ModelUtil";
+
+type AcquireFn = typeof acquireBaseUrl;
+
+export function createStableDiffusionCppModelInfoRunFn(
+  opts: IStableDiffusionCppProviderOptions,
+  acquire: AcquireFn = acquireBaseUrl
+): AiProviderRunFn<ModelInfoTaskInput, ModelInfoTaskOutput, StableDiffusionCppModelConfig> {
+  return async (input, model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    let is_loaded = false;
+    const expectedName = getStableDiffusionCppModelName(model);
+
+    try {
+      const { baseUrl, release } = await acquire(model, opts);
+      try {
+        const res = await fetch(`${baseUrl}/v1/models`, { signal });
+        if (res.ok) {
+          const body = (await res.json()) as { data?: Array<{ id?: string }> };
+          is_loaded = !!body.data?.some((m) => m.id === expectedName);
+        }
+      } finally {
+        await release();
+      }
+    } catch {
+      // Server unreachable or /v1/models not implemented — leave is_loaded false.
+    }
+
+    emit({
+      type: "finish",
+      data: {
+        model: input.model,
+        is_local: true,
+        is_remote: false,
+        supports_browser: true,
+        supports_node: true,
+        is_cached: false,
+        is_loaded,
+        file_sizes: null,
+      },
+    });
+  };
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts
new file mode 100644
index 000000000..4090723c2
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSchema.ts
@@ -0,0 +1,82 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { ModelConfigSchema, ModelRecordSchema } from "@workglow/ai/worker";
+import type { DataPortSchemaObject, FromSchema } from "@workglow/util/worker";
+import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants";
+
+/**
+ * Provider-config schema for `@workglow/stable-diffusion-server`.
+ *
+ * Required fields:
+ * - `provider` — discriminator
+ * - `provider_config.model_path` — absolute path passed to the broker; OR `base_url` if externalUrl-mode
+ *
+ * Either `model_path` (transport mode) OR `base_url` (externalUrl mode) must be set
+ * for a usable record. The provider resolver throws at runtime if neither resolves.
+ */
+export const StableDiffusionCppModelSchema = {
+  type: "object",
+  properties: {
+    provider: {
+      const: LOCAL_STABLE_DIFFUSION_CPP,
+      description: "Discriminator: local stable-diffusion.cpp HTTP provider.",
+    },
+    provider_config: {
+      type: "object",
+      description: "stable-diffusion.cpp-specific configuration.",
+      properties: {
+        model_path: {
+          type: "string",
+          description:
+            "Absolute filesystem path to the .gguf or .safetensors model. Required for transport-mode acquisition.",
+        },
+        model_name: {
+          type: "string",
+          description:
+            "Optional logical model name sent as OpenAI `model` field when using the `/v1/images/generations` endpoint.",
+        },
+        base_url: {
+          type: "string",
+          description:
+            "Optional per-record base URL override. Takes precedence over provider-level externalUrl. Used for records discovered via externalUrl-mode model.search.",
+        },
+        endpoint: {
+          type: "string",
+          description:
+            "Optional per-record endpoint override. Either `/txt2img` (default sd.cpp HTTP API) or `/v1/images/generations` (OpenAI-compatible builds). Overrides provider-level default.",
+        },
+      },
+      additionalProperties: false,
+    },
+  },
+  required: ["provider", "provider_config"],
+  additionalProperties: true,
+} as const satisfies DataPortSchemaObject;
+
+export const StableDiffusionCppModelRecordSchema = {
+  type: "object",
+  properties: {
+    ...ModelRecordSchema.properties,
+    ...StableDiffusionCppModelSchema.properties,
+  },
+  required: [...ModelRecordSchema.required, ...StableDiffusionCppModelSchema.required],
+  additionalProperties: false,
+} as const satisfies DataPortSchemaObject;
+
+export type StableDiffusionCppModelRecord = FromSchema<typeof StableDiffusionCppModelRecordSchema>;
+
+export const StableDiffusionCppModelConfigSchema = {
+  type: "object",
+  properties: {
+    ...ModelConfigSchema.properties,
+    ...StableDiffusionCppModelSchema.properties,
+  },
+  required: [...ModelConfigSchema.required, ...StableDiffusionCppModelSchema.required],
+  additionalProperties: false,
+} as const satisfies DataPortSchemaObject;
+
+export type StableDiffusionCppModelConfig = FromSchema<typeof StableDiffusionCppModelConfigSchema>;
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
new file mode 100644
index 000000000..bba4b0e4f
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
@@ -0,0 +1,53 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai";
+import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils";
+import type { IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client";
+import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants";
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+
+export function createStableDiffusionCppModelSearchRunFn(
+  opts: IStableDiffusionCppProviderOptions
+): AiProviderRunFn<ModelSearchTaskInput, ModelSearchTaskOutput, StableDiffusionCppModelConfig> {
+  return async (input, _model, signal, emit) => {
+    signal?.throwIfAborted?.();
+    if (!opts.externalUrl) {
+      emit({ type: "finish", data: { results: [] } });
+      return;
+    }
+    const baseUrl = opts.externalUrl.replace(/\/+$/, "");
+    try {
+      const res = await fetch(`${baseUrl}/v1/models`, { signal });
+      if (!res.ok) {
+        emit({ type: "finish", data: { results: [] } });
+        return;
+      }
+      const body = (await res.json()) as { data?: Array<{ id: string }> };
+      const results = (body.data ?? []).map((m) => ({
+        id: m.id,
+        label: m.id,
+        description: m.id,
+        record: {
+          model_id: m.id,
+          provider: LOCAL_STABLE_DIFFUSION_CPP,
+          title: m.id,
+          description: "",
+          capabilities: [],
+          provider_config: { model_name: m.id, base_url: baseUrl },
+          metadata: {},
+        },
+        raw: m,
+      }));
+      emit({
+        type: "finish",
+        data: { results: filterModelSearchResultsByQuery(results, input.query) },
+      });
+    } catch {
+      emit({ type: "finish", data: { results: [] } });
+    }
+  };
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts
new file mode 100644
index 000000000..2d2190c65
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelUtil.ts
@@ -0,0 +1,35 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
+
+/**
+ * Returns the logical model name to send in the OpenAI `model` field for
+ * `/v1/images/generations`-shape requests. sd.cpp's native `/txt2img` ignores
+ * this value, so we fall back to model_path, then model_id, then the empty string.
+ */
+export function getStableDiffusionCppModelName(
+  model: StableDiffusionCppModelConfig | undefined
+): string {
+  const pc = model?.provider_config;
+  return String(pc?.model_name ?? pc?.model_path ?? model?.model_id ?? "");
+}
+
+/**
+ * Returns the absolute filesystem path used by `transport.ensureRunning`.
+ * Required for transport-mode acquisition; throws if missing.
+ */
+export function getStableDiffusionCppModelPath(
+  model: StableDiffusionCppModelConfig | undefined
+): string {
+  const path = model?.provider_config?.model_path;
+  if (typeof path !== "string" || path.length === 0) {
+    throw new Error(
+      "StableDiffusionCpp: provider_config.model_path is required for transport-mode acquisition."
+    );
+  }
+  return path;
+}
diff --git a/providers/stable-diffusion-server/src/ai/index.ts b/providers/stable-diffusion-server/src/ai/index.ts
index 5df0a99bc..a511a3750 100644
--- a/providers/stable-diffusion-server/src/ai/index.ts
+++ b/providers/stable-diffusion-server/src/ai/index.ts
@@ -7,5 +7,20 @@
 // organize-imports-ignore
 
 export * from "./common/StableDiffusionCpp_Constants";
-export * from "./StableDiffusionCppProvider";
+export * from "./common/StableDiffusionCpp_ModelSchema";
+export * from "./common/StableDiffusionCpp_Capabilities";
+export * from "./common/StableDiffusionCpp_CapabilitySets";
 export * from "./registerStableDiffusionCpp";
+export * from "./registerStableDiffusionCppInline";
+export * from "./registerStableDiffusionCppWorker";
+
+import { STABLE_DIFFUSION_CPP_RUN_FN_SPECS } from "./common/StableDiffusionCpp_Capabilities";
+import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns";
+import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider";
+
+/** @internal */
+export const _testOnly = {
+  StableDiffusionCppQueuedProvider,
+  STABLE_DIFFUSION_CPP_RUN_FN_SPECS,
+  buildStableDiffusionCppRunFns,
+} as const;
diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
index 4b5db64dc..7e8d12fd0 100644
--- a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
+++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCpp.ts
@@ -5,24 +5,24 @@
  */
 
 import type { AiProviderRegisterOptions } from "@workglow/ai";
-import type { IBackendsTransport } from "@workglow/ai/provider-utils";
-import { registerProviderInline } from "@workglow/ai/provider-utils";
-import type { StableDiffusionCppEndpoint } from "./StableDiffusionCppProvider";
-import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider";
-
-export interface IRegisterStableDiffusionCppOptions extends AiProviderRegisterOptions {
-  readonly transport: IBackendsTransport;
-  readonly externalUrl?: string;
-  readonly endpoint?: StableDiffusionCppEndpoint;
-}
+import { registerProviderWithWorker } from "@workglow/ai/provider-utils";
+import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider";
 
+/**
+ * Main-thread worker-backed registration. The provider proxy lives on the
+ * main thread and forwards jobs to the worker, which holds the real run-fns.
+ *
+ * Use {@link registerStableDiffusionCppInline} for transport mode within a
+ * single thread.
+ */
 export async function registerStableDiffusionCpp(
-  options: IRegisterStableDiffusionCppOptions
+  options: AiProviderRegisterOptions & {
+    worker: Worker | (() => Worker);
+  }
 ): Promise<void> {
-  const { transport, externalUrl, endpoint, ...registerOptions } = options;
-  await registerProviderInline(
-    new StableDiffusionCppProvider({ transport, externalUrl, endpoint }),
+  await registerProviderWithWorker(
+    new StableDiffusionCppQueuedProvider(),
     "StableDiffusionCpp",
-    registerOptions
+    options
   );
 }
diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts
new file mode 100644
index 000000000..c2b8271fa
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppInline.ts
@@ -0,0 +1,28 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { AiProviderRegisterOptions } from "@workglow/ai";
+import { registerProviderInline } from "@workglow/ai/provider-utils";
+import { type IStableDiffusionCppProviderOptions } from "./common/StableDiffusionCpp_Client";
+import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns";
+import { StableDiffusionCppQueuedProvider } from "./StableDiffusionCppQueuedProvider";
+
+export interface IRegisterStableDiffusionCppInlineOptions
+  extends AiProviderRegisterOptions, IStableDiffusionCppProviderOptions {}
+
+/** Main-thread inline registration. Supports transport mode. */
+export async function registerStableDiffusionCppInline(
+  options: IRegisterStableDiffusionCppInlineOptions = {}
+): Promise<void> {
+  const { transport, externalUrl, endpoint, ...registerOptions } = options;
+  await registerProviderInline(
+    new StableDiffusionCppQueuedProvider(
+      buildStableDiffusionCppRunFns({ transport, externalUrl, endpoint })
+    ),
+    "StableDiffusionCpp",
+    registerOptions
+  );
+}
diff --git a/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts
new file mode 100644
index 000000000..6c9dfa166
--- /dev/null
+++ b/providers/stable-diffusion-server/src/ai/registerStableDiffusionCppWorker.ts
@@ -0,0 +1,27 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { registerProviderWorker } from "@workglow/ai/provider-utils";
+import type { IStableDiffusionCppProviderOptions } from "./common/StableDiffusionCpp_Client";
+import { buildStableDiffusionCppRunFns } from "./common/StableDiffusionCpp_JobRunFns";
+import { StableDiffusionCppProvider } from "./StableDiffusionCppProvider";
+
+/**
+ * Worker-server-side registration. Supports both transport and externalUrl modes.
+ * Transport is constructed inside this worker runtime by the caller and held
+ * by closure in the run-fns. Primary production path.
+ */
+export async function registerStableDiffusionCppWorker(
+  options: IStableDiffusionCppProviderOptions = {}
+): Promise<void> {
+  await registerProviderWorker(
+    (ws) =>
+      new StableDiffusionCppProvider(buildStableDiffusionCppRunFns(options)).registerOnWorkerServer(
+        ws
+      ),
+    "StableDiffusionCpp"
+  );
+}
diff --git a/providers/stable-diffusion-server/src/ai/runtime.ts b/providers/stable-diffusion-server/src/ai/runtime.ts
index 094645c24..4fea72a1a 100644
--- a/providers/stable-diffusion-server/src/ai/runtime.ts
+++ b/providers/stable-diffusion-server/src/ai/runtime.ts
@@ -6,4 +6,11 @@
 
 // organize-imports-ignore
 
-export * from "./registerStableDiffusionCpp";
+export * from "./common/StableDiffusionCpp_Client";
+export * from "./common/StableDiffusionCpp_ImageGenerate";
+export * from "./common/StableDiffusionCpp_ImageEdit";
+export * from "./common/StableDiffusionCpp_ModelInfo";
+export * from "./common/StableDiffusionCpp_ModelSearch";
+export * from "./common/StableDiffusionCpp_JobRunFns";
+export * from "./registerStableDiffusionCppInline";
+export * from "./registerStableDiffusionCppWorker";

From 4638a83ab45290cdbde3d20a087720f5edd2f238 Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 22:58:13 +0000
Subject: [PATCH 7/8] test(stable-diffusion-server): full unit and integration
 test suite

---
 .../LlamaCppServerProvider.test.ts            |   4 +-
 .../LlamaCppServer_Client.test.ts             |  30 ++--
 .../LlamaCppServer_ModelInfo.test.ts          |   8 +-
 .../LlamaCppServer_ModelSearch.test.ts        |  18 ++-
 .../LlamaCppServer_TextEmbedding.test.ts      |   2 +-
 ...lamaCppServer_TextGenerationStream.test.ts |   4 +-
 .../LlamaCppServer_ToolCalling.test.ts        |   2 +-
 .../LocalBackendsProviderContracts.test.ts    |  18 ++-
 .../StableDiffusionCppProvider.test.ts        | 134 ++++++++++++++++++
 .../StableDiffusionCpp_Client.test.ts         | 111 +++++++++++++++
 ...leDiffusionCpp_Generic.integration.test.ts |  37 +++++
 .../StableDiffusionCpp_ImageEdit.test.ts      |  77 ++++++++++
 .../StableDiffusionCpp_ImageGenerate.test.ts  |  79 +++++++++++
 .../StableDiffusionCpp_ModelInfo.test.ts      |  56 ++++++++
 .../StableDiffusionCpp_ModelSearch.test.ts    |  72 ++++++++++
 .../src/ai/common/LlamaCppServer_Client.ts    |  96 ++++++++++++-
 .../src/ai/common/LlamaCppServer_ModelInfo.ts |  10 +-
 .../ai/common/LlamaCppServer_ModelSearch.ts   |  10 +-
 .../ai/common/LlamaCppServer_TextEmbedding.ts |   8 +-
 .../common/LlamaCppServer_TextGeneration.ts   |   3 +-
 .../ai/common/LlamaCppServer_TextRewriter.ts  |   3 +-
 .../ai/common/LlamaCppServer_TextSummary.ts   |   3 +-
 .../ai/common/LlamaCppServer_ToolCalling.ts   |   3 +-
 .../ai/common/StableDiffusionCpp_Client.ts    |  96 ++++++++++++-
 .../ai/common/StableDiffusionCpp_ImageEdit.ts |   3 +-
 .../StableDiffusionCpp_ImageGenerate.ts       |  15 +-
 .../ai/common/StableDiffusionCpp_ModelInfo.ts |   3 +-
 .../common/StableDiffusionCpp_ModelSearch.ts  |  10 +-
 28 files changed, 858 insertions(+), 57 deletions(-)
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts
 create mode 100644 packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts

diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
index 9150c1585..b55f1ede1 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServerProvider.test.ts
@@ -129,7 +129,7 @@ describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)",
     const release = vi.fn().mockResolvedValue(undefined);
     const transport = fakeTransport();
     transport.ensureRunning.mockResolvedValue({
-      url: "http://broker:9999",
+      url: "http://127.0.0.1:9999",
       release,
     } as IRunningHandle);
 
@@ -166,7 +166,7 @@ describe("LlamaCppServer transport-mode run-fn (parity across inline + worker)",
       opts: { ctx: 4096 },
     });
     const fetchedUrl = String(fetchSpy.mock.calls[0]![0]);
-    expect(fetchedUrl).toBe("http://broker:9999/v1/chat/completions");
+    expect(fetchedUrl).toBe("http://127.0.0.1:9999/v1/chat/completions");
     expect(release).toHaveBeenCalledTimes(1);
     fetchSpy.mockRestore();
   });
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
index fecfa6516..721819f62 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_Client.test.ts
@@ -24,10 +24,10 @@ describe("acquireBaseUrl precedence", () => {
   it("prefers model.provider_config.base_url over everything", async () => {
     const transport = fakeTransport();
     const result = await acquireBaseUrl(
-      { provider_config: { base_url: "http://from-model:8080/" } } as any,
-      { externalUrl: "http://from-opts:8080", transport }
+      { provider_config: { base_url: "http://localhost:8080/" } } as any,
+      { externalUrl: "http://127.0.0.1:8081", transport }
     );
-    expect(result.baseUrl).toBe("http://from-model:8080");
+    expect(result.baseUrl).toBe("http://localhost:8080");
     expect(transport.ensureRunning).not.toHaveBeenCalled();
     await result.release(); // no-op
   });
@@ -35,10 +35,10 @@ describe("acquireBaseUrl precedence", () => {
   it("prefers opts.externalUrl over transport when no model.base_url", async () => {
     const transport = fakeTransport();
     const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, {
-      externalUrl: "http://from-opts:8080",
+      externalUrl: "http://127.0.0.1:8081",
       transport,
     });
-    expect(result.baseUrl).toBe("http://from-opts:8080");
+    expect(result.baseUrl).toBe("http://127.0.0.1:8081");
     expect(transport.ensureRunning).not.toHaveBeenCalled();
     await result.release(); // no-op
   });
@@ -47,7 +47,7 @@ describe("acquireBaseUrl precedence", () => {
     const release = vi.fn().mockResolvedValue(undefined);
     const transport = fakeTransport();
     transport.ensureRunning.mockResolvedValue({
-      url: "http://broker:9999/",
+      url: "http://127.0.0.1:9999/",
       release,
     } as IRunningHandle);
     const result = await acquireBaseUrl(
@@ -59,7 +59,7 @@ describe("acquireBaseUrl precedence", () => {
       modelPath: "/abs/m.gguf",
       opts: { ctx: 8192 },
     });
-    expect(result.baseUrl).toBe("http://broker:9999");
+    expect(result.baseUrl).toBe("http://127.0.0.1:9999");
     await result.release();
     expect(release).toHaveBeenCalledTimes(1);
   });
@@ -67,7 +67,7 @@ describe("acquireBaseUrl precedence", () => {
   it("uses defaultCtx when model has no ctx override", async () => {
     const transport = fakeTransport();
     transport.ensureRunning.mockResolvedValue({
-      url: "http://broker:9999",
+      url: "http://127.0.0.1:9999",
       release: vi.fn(),
     } as IRunningHandle);
     await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, {
@@ -88,6 +88,20 @@ describe("acquireBaseUrl precedence", () => {
     );
   });
 
+  it("rejects public model URLs before requests can use them", async () => {
+    await expect(
+      acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {})
+    ).rejects.toThrow(/local HTTP/);
+  });
+
+  it("normalizes slash-heavy local URLs", async () => {
+    const result = await acquireBaseUrl(
+      { provider_config: { base_url: `http://127.0.0.1:8080${"/".repeat(1_000)}` } } as any,
+      {}
+    );
+    expect(result.baseUrl).toBe("http://127.0.0.1:8080");
+  });
+
   it("throws when no source resolves", async () => {
     await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow(
       /no base URL source/
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
index 654f75f8e..2ba9e3aa2 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelInfo.test.ts
@@ -17,7 +17,7 @@ describe("createLlamaCppServerModelInfoStream", () => {
     const emit = (e: any) => events.push(e);
     await fn(
       { detail: "dimensions", model: "m" } as any,
-      { provider_config: { base_url: "http://x:8080", native_dimensions: 768 } } as any,
+      { provider_config: { base_url: "http://localhost:8080", native_dimensions: 768 } } as any,
       undefined as any,
       emit
     );
@@ -39,7 +39,7 @@ describe("createLlamaCppServerModelInfoStream", () => {
     const emit = (e: any) => events.push(e);
     await fn(
       { detail: "dimensions", model: "m" } as any,
-      { provider_config: { base_url: "http://x:8080" } } as any,
+      { provider_config: { base_url: "http://localhost:8080" } } as any,
       undefined as any,
       emit
     );
@@ -55,7 +55,7 @@ describe("createLlamaCppServerModelInfoStream", () => {
     const emit = (e: any) => events.push(e);
     await fn(
       { model: "m" } as any,
-      { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any,
+      { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any,
       undefined as any,
       emit
     );
@@ -69,7 +69,7 @@ describe("createLlamaCppServerModelInfoStream", () => {
     const emit = (e: any) => events.push(e);
     await fn(
       { model: "m" } as any,
-      { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any,
+      { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any,
       undefined as any,
       emit
     );
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
index 4c934d408..41248708d 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ModelSearch.test.ts
@@ -24,7 +24,7 @@ describe("createLlamaCppServerModelSearchStream", () => {
     vi.spyOn(globalThis, "fetch").mockResolvedValue(
       new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 })
     );
-    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" });
     const events: any[] = [];
     const emit = (e: any) => events.push(e);
     await fn({ query: "" } as any, undefined as any, undefined as any, emit);
@@ -32,25 +32,35 @@ describe("createLlamaCppServerModelSearchStream", () => {
     expect(results).toHaveLength(1);
     expect(results[0].id).toBe("loaded-model");
     expect(results[0].record.provider).toBe("LOCAL_LLAMACPP_SERVER");
-    expect(results[0].record.provider_config.base_url).toBe("http://x:8080");
+    expect(results[0].record.provider_config.base_url).toBe("http://localhost:8080");
   });
 
   it("returns [] when fetch fails", async () => {
     vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED"));
-    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" });
     const events: any[] = [];
     const emit = (e: any) => events.push(e);
     await fn({ query: "" } as any, undefined as any, undefined as any, emit);
     expect(events.at(-1)!.data.results).toEqual([]);
   });
 
+  it("does not fetch public externalUrl values", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch");
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "https://example.com:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
   it("filters by query case-insensitively", async () => {
     vi.spyOn(globalThis, "fetch").mockResolvedValue(
       new Response(JSON.stringify({ data: [{ id: "Llama-3" }, { id: "Mistral" }] }), {
         status: 200,
       })
     );
-    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://x:8080" });
+    const fn = createLlamaCppServerModelSearchStream({ externalUrl: "http://localhost:8080" });
     const events: any[] = [];
     const emit = (e: any) => events.push(e);
     await fn({ query: "llama" } as any, undefined as any, undefined as any, emit);
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
index 3f991b362..18fe9dba6 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextEmbedding.test.ts
@@ -9,7 +9,7 @@ import { afterEach, describe, expect, it, vi } from "vitest";
 
 afterEach(() => vi.restoreAllMocks());
 
-const model = { provider_config: { base_url: "http://x:8080", model_name: "emb" } } as any;
+const model = { provider_config: { base_url: "http://localhost:8080", model_name: "emb" } } as any;
 
 describe("createLlamaCppServerTextEmbeddingStream", () => {
   it("returns a single Float32Array for string input", async () => {
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
index 5efa1ced7..49ad0974b 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_TextGenerationStream.test.ts
@@ -27,7 +27,7 @@ afterEach(() => {
 });
 
 describe("createLlamaCppServerTextGenerationStream", () => {
-  const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any;
+  const model = { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any;
 
   it("yields text-delta events for each delta line and a final finish", async () => {
     const fetchSpy = vi
@@ -41,7 +41,7 @@ describe("createLlamaCppServerTextGenerationStream", () => {
 
     expect(fetchSpy).toHaveBeenCalledTimes(1);
     const [url] = fetchSpy.mock.calls[0]!;
-    expect(String(url)).toBe("http://x:8080/v1/chat/completions");
+    expect(String(url)).toBe("http://localhost:8080/v1/chat/completions");
     expect(events.filter((e) => e.type === "text-delta").map((e) => e.textDelta)).toEqual([
       "Hel",
       "lo",
diff --git a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts
index 7030fe8a5..9b1279cb2 100644
--- a/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts
+++ b/packages/test/src/test/ai-provider-api/LlamaCppServer_ToolCalling.test.ts
@@ -21,7 +21,7 @@ function sseChunks(chunks: object[]): Response {
 
 afterEach(() => vi.restoreAllMocks());
 
-const model = { provider_config: { base_url: "http://x:8080", model_name: "m" } } as any;
+const model = { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any;
 const TOOLS = [
   {
     name: "add",
diff --git a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
index d7d3db18c..cbe6cf032 100644
--- a/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
+++ b/packages/test/src/test/ai-provider-api/LocalBackendsProviderContracts.test.ts
@@ -19,7 +19,10 @@ import type {
 } from "@workglow/ai/provider-utils";
 import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
 import { LOCAL_LLAMACPP_SERVER, registerLlamaCppServerInline } from "@workglow/llamacpp-server/ai";
-import { StableDiffusionCppProvider } from "@workglow/stable-diffusion-server/ai";
+import {
+  LOCAL_STABLE_DIFFUSION_CPP,
+  registerStableDiffusionCppInline,
+} from "@workglow/stable-diffusion-server/ai";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 vi.mock("@workglow/ai/provider-utils", async (importOriginal) => {
@@ -167,10 +170,11 @@ describe("local backend provider stream contracts", () => {
 
   it("stable-diffusion emits the generated image as a snapshot before finish", async () => {
     const { release, transport } = createTransportStub();
-    const provider = new StableDiffusionCppProvider({ transport });
-    await provider.register();
+    await registerStableDiffusionCppInline({ transport });
 
-    const runFn = getAiProviderRegistry().getRunFnFor(provider.name, ["image.generation"]);
+    const runFn = getAiProviderRegistry().getRunFnFor(LOCAL_STABLE_DIFFUSION_CPP, [
+      "image.generation",
+    ]);
     expect(runFn).toBeDefined();
 
     globalThis.fetch = vi.fn(
@@ -188,7 +192,11 @@ describe("local backend provider stream contracts", () => {
     const events = await runProviderStream(
       runFn!,
       { prompt: "draw a cat" },
-      { model_id: "/models/stable-diffusion.gguf" }
+      {
+        model_id: "sd-test",
+        provider: LOCAL_STABLE_DIFFUSION_CPP,
+        provider_config: { model_path: "/models/stable-diffusion.gguf" },
+      }
     );
 
     expect(events).toEqual([
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts
new file mode 100644
index 000000000..2d227ce86
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCppProvider.test.ts
@@ -0,0 +1,134 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { ModelRecord } from "@workglow/ai";
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import { _testOnly } from "@workglow/stable-diffusion-server/ai";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("@workglow/ai/provider-utils", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@workglow/ai/provider-utils")>();
+  return {
+    ...actual,
+    pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })),
+  };
+});
+
+const {
+  StableDiffusionCppQueuedProvider,
+  STABLE_DIFFUSION_CPP_RUN_FN_SPECS,
+  buildStableDiffusionCppRunFns,
+} = _testOnly;
+
+function model(
+  model_id: string,
+  provider_config: Record<string, unknown> = { model_path: `/models/${model_id}` },
+  capabilities: readonly string[] = []
+): ModelRecord {
+  return {
+    model_id,
+    title: model_id,
+    description: "",
+    provider: "LOCAL_STABLE_DIFFUSION_CPP",
+    provider_config,
+    capabilities: [...capabilities],
+    metadata: {},
+  } as ModelRecord;
+}
+
+describe("StableDiffusionCppQueuedProvider.inferCapabilities", () => {
+  const provider = new StableDiffusionCppQueuedProvider(buildStableDiffusionCppRunFns({}));
+
+  it("infers full generative set for any non-empty model id", () => {
+    const caps = provider.inferCapabilities(model("sd-1.5.gguf"));
+    expect([...caps].sort()).toEqual([
+      "image.editing",
+      "image.generation",
+      "model.info",
+      "model.search",
+    ]);
+  });
+
+  it("falls back to declared caps when id is empty", () => {
+    const caps = provider.inferCapabilities(model("", {}, ["image.generation"]));
+    expect(caps).toEqual(["image.generation"]);
+  });
+
+  it("falls back to baseline meta-ops when nothing declared and nothing matches", () => {
+    const caps = provider.inferCapabilities(model("", {}));
+    expect(caps).toEqual(["model.info", "model.search"]);
+  });
+});
+
+describe("StableDiffusionCpp capability-set parity", () => {
+  it("STABLE_DIFFUSION_CPP_RUN_FN_SPECS matches buildStableDiffusionCppRunFns({}) serves shapes", () => {
+    const fns = buildStableDiffusionCppRunFns({});
+    const fnsServes = fns.map((r) => [...r.serves].sort().join(","));
+    const specsServes = STABLE_DIFFUSION_CPP_RUN_FN_SPECS.map((s) =>
+      [...s.serves].sort().join(",")
+    );
+    expect(specsServes).toEqual(fnsServes);
+  });
+});
+
+describe("StableDiffusionCpp run-fn shape", () => {
+  it("registers a runFn for every canonical capability set", () => {
+    const sets = buildStableDiffusionCppRunFns({}).map((r) => [...r.serves].sort().join(","));
+    expect(sets).toContain("image.generation");
+    expect(sets).toContain("image.editing");
+    expect(sets).toContain("model.search");
+    expect(sets).toContain("model.info");
+  });
+});
+
+function fakeTransport(): IBackendsTransport & {
+  ensureRunning: ReturnType<typeof vi.fn>;
+} {
+  return {
+    ensureRunning: vi.fn(),
+    subscribeStatus: vi.fn(() => () => undefined),
+    install: vi.fn(),
+    list: vi.fn(),
+    uninstall: vi.fn(),
+  } as unknown as IBackendsTransport & { ensureRunning: ReturnType<typeof vi.fn> };
+}
+
+describe("StableDiffusionCpp transport-mode run-fn (parity across inline + worker)", () => {
+  afterEach(() => vi.restoreAllMocks());
+
+  it("acquires URL via transport and releases the handle (image.generation)", async () => {
+    const release = vi.fn().mockResolvedValue(undefined);
+    const transport = fakeTransport();
+    transport.ensureRunning.mockResolvedValue({
+      url: "http://127.0.0.1:9999",
+      release,
+    } as IRunningHandle);
+
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({ images: ["aGk="] }), { status: 200 }));
+
+    const fns = buildStableDiffusionCppRunFns({ transport });
+    const imageGen = fns.find((r) => r.serves.join(",") === "image.generation")!;
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await imageGen.runFn(
+      { prompt: "hi" } as any,
+      { provider_config: { model_path: "/abs/m.gguf" } } as any,
+      undefined as any,
+      emit
+    );
+
+    expect(transport.ensureRunning).toHaveBeenCalledWith({
+      backend: "stable-diffusion-server",
+      modelPath: "/abs/m.gguf",
+      opts: {},
+    });
+    const fetchedUrl = String(fetchSpy.mock.calls[0]![0]);
+    expect(fetchedUrl).toBe("http://127.0.0.1:9999/txt2img");
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts
new file mode 100644
index 000000000..68573fa1b
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Client.test.ts
@@ -0,0 +1,111 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import type { IBackendsTransport, IRunningHandle } from "@workglow/ai/provider-utils";
+import {
+  acquireBaseUrl,
+  decodeBase64Png,
+  encodeBytesToBase64,
+} from "@workglow/stable-diffusion-server/ai-runtime";
+import { describe, expect, it, vi } from "vitest";
+
+function fakeTransport(): IBackendsTransport & {
+  ensureRunning: ReturnType<typeof vi.fn>;
+} {
+  return {
+    ensureRunning: vi.fn(),
+    subscribeStatus: vi.fn(() => () => undefined),
+    install: vi.fn(),
+    list: vi.fn(),
+    uninstall: vi.fn(),
+  } as unknown as IBackendsTransport & { ensureRunning: ReturnType<typeof vi.fn> };
+}
+
+describe("acquireBaseUrl precedence", () => {
+  it("prefers model.provider_config.base_url over everything", async () => {
+    const transport = fakeTransport();
+    const result = await acquireBaseUrl(
+      { provider_config: { base_url: "http://localhost:8080/" } } as any,
+      { externalUrl: "http://127.0.0.1:8081", transport }
+    );
+    expect(result.baseUrl).toBe("http://localhost:8080");
+    expect(transport.ensureRunning).not.toHaveBeenCalled();
+    await result.release(); // no-op
+  });
+
+  it("prefers opts.externalUrl over transport when no model.base_url", async () => {
+    const transport = fakeTransport();
+    const result = await acquireBaseUrl({ provider_config: { model_path: "/x.gguf" } } as any, {
+      externalUrl: "http://127.0.0.1:8081",
+      transport,
+    });
+    expect(result.baseUrl).toBe("http://127.0.0.1:8081");
+    expect(transport.ensureRunning).not.toHaveBeenCalled();
+    await result.release(); // no-op
+  });
+
+  it("falls back to transport.ensureRunning when neither URL is set", async () => {
+    const release = vi.fn().mockResolvedValue(undefined);
+    const transport = fakeTransport();
+    transport.ensureRunning.mockResolvedValue({
+      url: "http://127.0.0.1:9999/",
+      release,
+    } as IRunningHandle);
+    const result = await acquireBaseUrl({ provider_config: { model_path: "/abs/m.gguf" } } as any, {
+      transport,
+    });
+    expect(transport.ensureRunning).toHaveBeenCalledWith({
+      backend: "stable-diffusion-server",
+      modelPath: "/abs/m.gguf",
+      opts: {},
+    });
+    expect(result.baseUrl).toBe("http://127.0.0.1:9999");
+    await result.release();
+    expect(release).toHaveBeenCalledTimes(1);
+  });
+
+  it("throws when transport mode is selected but model_path is missing", async () => {
+    const transport = fakeTransport();
+    await expect(acquireBaseUrl({ provider_config: {} } as any, { transport })).rejects.toThrow(
+      /model_path/
+    );
+  });
+
+  it("rejects public model URLs before requests can use them", async () => {
+    await expect(
+      acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {})
+    ).rejects.toThrow(/local HTTP/);
+  });
+
+  it("normalizes slash-heavy local URLs", async () => {
+    const result = await acquireBaseUrl(
+      { provider_config: { base_url: `http://127.0.0.1:8080${"/".repeat(1_000)}` } } as any,
+      {}
+    );
+    expect(result.baseUrl).toBe("http://127.0.0.1:8080");
+  });
+
+  it("throws when no source resolves", async () => {
+    await expect(acquireBaseUrl({ provider_config: {} } as any, {})).rejects.toThrow(
+      /no base URL source/
+    );
+  });
+});
+
+describe("decodeBase64Png / encodeBytesToBase64 roundtrip", () => {
+  it("decode then encode produces the same string for small payloads", () => {
+    const original = btoa("hello PNG bytes");
+    const bytes = decodeBase64Png(original);
+    expect(encodeBytesToBase64(bytes)).toBe(original);
+  });
+
+  it("handles binary bytes (high values)", () => {
+    const bytes = new Uint8Array([0, 1, 254, 255, 128, 64]);
+    const b64 = encodeBytesToBase64(bytes);
+    const decoded = decodeBase64Png(b64);
+    expect(Array.from(decoded)).toEqual(Array.from(bytes));
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts
new file mode 100644
index 000000000..67ab62c6b
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_Generic.integration.test.ts
@@ -0,0 +1,37 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import {
+  createStableDiffusionCppImageGenerateRunFn,
+  createStableDiffusionCppModelSearchRunFn,
+} from "@workglow/stable-diffusion-server/ai-runtime";
+import { describe, expect, it } from "vitest";
+
+const RUN = process.env.RUN_SD_SERVER_TESTS === "1";
+const BASE_URL = process.env.SD_SERVER_URL ?? "http://localhost:7860";
+
+describe.skipIf(!RUN)("StableDiffusionCpp integration (real server)", () => {
+  const model = {
+    provider_config: { base_url: BASE_URL, model_name: "model" },
+  } as any;
+
+  it("image.generation produces a snapshot with an image", async () => {
+    const fn = createStableDiffusionCppImageGenerateRunFn({ externalUrl: BASE_URL });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ prompt: "a small red square" } as any, model, undefined as any, emit);
+    expect(events.some((e) => e.type === "snapshot")).toBe(true);
+    expect(events.at(-1)!.type).toBe("finish");
+  });
+
+  it("model.search returns at least one entry via /v1/models", async () => {
+    const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: BASE_URL });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(events.at(-1)!.data.results.length).toBeGreaterThanOrEqual(1);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts
new file mode 100644
index 000000000..c1ca66b59
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageEdit.test.ts
@@ -0,0 +1,77 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createStableDiffusionCppImageEditRunFn } from "@workglow/stable-diffusion-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("@workglow/ai/provider-utils", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@workglow/ai/provider-utils")>();
+  return {
+    ...actual,
+    imageValueToPngBytes: vi.fn(async () => new Uint8Array([1, 2, 3, 4])),
+    pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })),
+  };
+});
+
+afterEach(() => vi.restoreAllMocks());
+
+const model = {
+  provider_config: { base_url: "http://localhost:8080", model_name: "sd1.5" },
+} as any;
+
+describe("createStableDiffusionCppImageEditRunFn", () => {
+  it("encodes input image as base64 PNG and POSTs to /img2img", async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({ images: ["aGVsbG8="] }), { status: 200 }));
+    const fn = createStableDiffusionCppImageEditRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { prompt: "make it blue", image: { kind: "input-image" } } as any,
+      model,
+      undefined as any,
+      emit
+    );
+    const [url, init] = fetchSpy.mock.calls[0]!;
+    expect(String(url)).toBe("http://localhost:8080/img2img");
+    const body = JSON.parse(String((init as RequestInit).body));
+    expect(body.prompt).toBe("make it blue");
+    expect(typeof body.init_image).toBe("string");
+    expect(body.init_image.length).toBeGreaterThan(0); // base64 of [1,2,3,4]
+    expect(body.model).toBe("sd1.5");
+    expect(events.some((e) => e.type === "snapshot")).toBe(true);
+    expect(events.at(-1)!.type).toBe("finish");
+  });
+
+  it("throws on non-2xx", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("bad", { status: 400 }));
+    const fn = createStableDiffusionCppImageEditRunFn({});
+    await expect(
+      fn(
+        { prompt: "x", image: { kind: "input-image" } } as any,
+        model,
+        undefined as any,
+        () => undefined
+      )
+    ).rejects.toThrow(/HTTP 400/);
+  });
+
+  it("throws when response contains no images", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 })
+    );
+    const fn = createStableDiffusionCppImageEditRunFn({});
+    await expect(
+      fn(
+        { prompt: "x", image: { kind: "input-image" } } as any,
+        model,
+        undefined as any,
+        () => undefined
+      )
+    ).rejects.toThrow(/no images/);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts
new file mode 100644
index 000000000..bcc141e77
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ImageGenerate.test.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createStableDiffusionCppImageGenerateRunFn } from "@workglow/stable-diffusion-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("@workglow/ai/provider-utils", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@workglow/ai/provider-utils")>();
+  return {
+    ...actual,
+    pngBytesToImageValue: vi.fn(async () => ({ kind: "mock-image" })),
+  };
+});
+
+afterEach(() => vi.restoreAllMocks());
+
+const model = {
+  provider_config: { base_url: "http://localhost:8080", model_name: "sd1.5" },
+} as any;
+
+describe("createStableDiffusionCppImageGenerateRunFn", () => {
+  it("POSTs to /txt2img by default and emits snapshot + finish", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ images: ["aGVsbG8="] }), {
+        status: 200,
+        headers: { "Content-Type": "application/json" },
+      })
+    );
+    const fn = createStableDiffusionCppImageGenerateRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ prompt: "draw a cat" } as any, model, undefined as any, emit);
+    const [url] = fetchSpy.mock.calls[0]!;
+    expect(String(url)).toBe("http://localhost:8080/txt2img");
+    expect(events.some((e) => e.type === "snapshot")).toBe(true);
+    expect(events.at(-1)!.type).toBe("finish");
+  });
+
+  it("uses the OpenAI-compat endpoint when configured at the model level", async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({ images: ["aGk="] }), { status: 200 }));
+    const fn = createStableDiffusionCppImageGenerateRunFn({});
+    await fn(
+      { prompt: "x" } as any,
+      {
+        provider_config: {
+          base_url: "http://localhost:8080",
+          model_name: "sd1.5",
+          endpoint: "/v1/images/generations",
+        },
+      } as any,
+      undefined as any,
+      () => undefined
+    );
+    expect(String(fetchSpy.mock.calls[0]![0])).toBe("http://localhost:8080/v1/images/generations");
+  });
+
+  it("throws on non-2xx with informative message", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("nope", { status: 500 }));
+    const fn = createStableDiffusionCppImageGenerateRunFn({});
+    await expect(
+      fn({ prompt: "x" } as any, model, undefined as any, () => undefined)
+    ).rejects.toThrow(/HTTP 500/);
+  });
+
+  it("throws when response contains no images", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ images: [] }), { status: 200 })
+    );
+    const fn = createStableDiffusionCppImageGenerateRunFn({});
+    await expect(
+      fn({ prompt: "x" } as any, model, undefined as any, () => undefined)
+    ).rejects.toThrow(/no images/);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts
new file mode 100644
index 000000000..b39c5bf2b
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelInfo.test.ts
@@ -0,0 +1,56 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createStableDiffusionCppModelInfoRunFn } from "@workglow/stable-diffusion-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+afterEach(() => vi.restoreAllMocks());
+
+describe("createStableDiffusionCppModelInfoRunFn", () => {
+  it("reports is_loaded=true when /v1/models includes the model name", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "m" }, { id: "other" }] }), { status: 200 })
+    );
+    const fn = createStableDiffusionCppModelInfoRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: "m" } as any,
+      { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(true);
+  });
+
+  it("reports is_loaded=false when server unreachable", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED"));
+    const fn = createStableDiffusionCppModelInfoRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: "m" } as any,
+      { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(false);
+  });
+
+  it("reports is_loaded=false when /v1/models 404s", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response("", { status: 404 }));
+    const fn = createStableDiffusionCppModelInfoRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn(
+      { model: "m" } as any,
+      { provider_config: { base_url: "http://localhost:8080", model_name: "m" } } as any,
+      undefined as any,
+      emit
+    );
+    expect(events.at(-1)!.data.is_loaded).toBe(false);
+  });
+});
diff --git a/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts
new file mode 100644
index 000000000..eb0a85e4d
--- /dev/null
+++ b/packages/test/src/test/ai-provider-api/StableDiffusionCpp_ModelSearch.test.ts
@@ -0,0 +1,72 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { createStableDiffusionCppModelSearchRunFn } from "@workglow/stable-diffusion-server/ai-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+afterEach(() => vi.restoreAllMocks());
+
+describe("createStableDiffusionCppModelSearchRunFn", () => {
+  it("returns [] when no externalUrl set", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch");
+    const fn = createStableDiffusionCppModelSearchRunFn({});
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
+  it("returns mapped results from /v1/models when externalUrl set", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "loaded-model" }] }), { status: 200 })
+    );
+    const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    const results = events.at(-1)!.data.results;
+    expect(results).toHaveLength(1);
+    expect(results[0].id).toBe("loaded-model");
+    expect(results[0].record.provider).toBe("LOCAL_STABLE_DIFFUSION_CPP");
+    expect(results[0].record.provider_config.base_url).toBe("http://localhost:8080");
+  });
+
+  it("returns [] when fetch fails", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new Error("ECONNREFUSED"));
+    const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
+  it("does not fetch public externalUrl values", async () => {
+    const fetchSpy = vi.spyOn(globalThis, "fetch");
+    const fn = createStableDiffusionCppModelSearchRunFn({
+      externalUrl: "https://example.com:8080",
+    });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "" } as any, undefined as any, undefined as any, emit);
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(events.at(-1)!.data.results).toEqual([]);
+  });
+
+  it("filters by query case-insensitively", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({ data: [{ id: "sd-1.5" }, { id: "Flux-1" }] }), {
+        status: 200,
+      })
+    );
+    const fn = createStableDiffusionCppModelSearchRunFn({ externalUrl: "http://localhost:8080" });
+    const events: any[] = [];
+    const emit = (e: any) => events.push(e);
+    await fn({ query: "flux" } as any, undefined as any, undefined as any, emit);
+    const results = events.at(-1)!.data.results;
+    expect(results.map((r: any) => r.id)).toEqual(["Flux-1"]);
+  });
+});
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
index 232b4316c..fb3b2f139 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_Client.ts
@@ -43,10 +43,10 @@ export async function acquireBaseUrl(
 ): Promise<IAcquiredBaseUrl> {
   const modelBaseUrl = model?.provider_config?.base_url;
   if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) {
-    return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease };
+    return { baseUrl: normalizeServerBaseUrl(modelBaseUrl), release: noopRelease };
   }
   if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) {
-    return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease };
+    return { baseUrl: normalizeServerBaseUrl(opts.externalUrl), release: noopRelease };
   }
   if (opts.transport) {
     const modelPath = model?.provider_config?.model_path;
@@ -65,7 +65,7 @@ export async function acquireBaseUrl(
       opts: { ctx },
     });
     return {
-      baseUrl: stripTrailingSlash(handle.url),
+      baseUrl: normalizeServerBaseUrl(handle.url),
       release: () => handle.release(),
     };
   }
@@ -74,8 +74,94 @@ export async function acquireBaseUrl(
   );
 }
 
-function stripTrailingSlash(url: string): string {
-  return url.replace(/\/+$/, "");
+export function normalizeServerBaseUrl(rawUrl: string): string {
+  let url: URL;
+  try {
+    url = new URL(rawUrl);
+  } catch {
+    throw new Error("LlamaCppServer: base URL must be a valid local HTTP(S) URL.");
+  }
+
+  if (url.protocol !== "http:" && url.protocol !== "https:") {
+    throw new Error("LlamaCppServer: base URL must be a valid local HTTP(S) URL.");
+  }
+  if (url.username || url.password) {
+    throw new Error("LlamaCppServer: base URL must not include credentials.");
+  }
+  if (!isLocalHostname(url.hostname)) {
+    throw new Error("LlamaCppServer: base URL must target a local HTTP(S) server.");
+  }
+
+  url.hash = "";
+  url.search = "";
+  let pathnameEnd = url.pathname.length;
+  while (pathnameEnd > 1 && url.pathname.charCodeAt(pathnameEnd - 1) === 47) {
+    pathnameEnd--;
+  }
+  const pathname = url.pathname.slice(0, pathnameEnd);
+  return pathname === "/" ? url.origin : `${url.origin}${pathname}`;
+}
+
+export function buildServerUrl(baseUrl: string, endpoint: `/${string}`): string {
+  const base = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
+  const path = endpoint.startsWith("/") ? endpoint.slice(1) : endpoint;
+  return new URL(path, base).toString();
+}
+
+function isLocalHostname(hostname: string): boolean {
+  const host = removeIpv6Brackets(hostname.toLowerCase());
+  if (host === "localhost" || host.endsWith(".localhost")) {
+    return true;
+  }
+  return isLocalIpv4(host) || isLocalIpv6(host);
+}
+
+function removeIpv6Brackets(hostname: string): string {
+  if (hostname.startsWith("[") && hostname.endsWith("]")) {
+    return hostname.slice(1, -1);
+  }
+  return hostname;
+}
+
+function isLocalIpv4(hostname: string): boolean {
+  const parts = hostname.split(".");
+  if (parts.length !== 4) {
+    return false;
+  }
+  const octets: number[] = [];
+  for (const part of parts) {
+    if (part.length === 0) {
+      return false;
+    }
+    for (const char of part) {
+      if (char < "0" || char > "9") {
+        return false;
+      }
+    }
+    const octet = Number(part);
+    if (!Number.isInteger(octet) || octet < 0 || octet > 255) {
+      return false;
+    }
+    octets.push(octet);
+  }
+
+  const [first, second] = octets;
+  return (
+    first === 10 ||
+    first === 127 ||
+    (first === 172 && second >= 16 && second <= 31) ||
+    (first === 192 && second === 168) ||
+    (first === 169 && second === 254)
+  );
+}
+
+function isLocalIpv6(hostname: string): boolean {
+  return (
+    hostname === "::1" ||
+    hostname.startsWith("fc") ||
+    hostname.startsWith("fd") ||
+    hostname.startsWith("fe80:")
+  );
 }
 
 const noopRelease = async (): Promise<void> => {};
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
index f2801948b..d732b2c00 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelInfo.ts
@@ -5,7 +5,11 @@
  */
 
 import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai";
-import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import {
+  acquireBaseUrl,
+  buildServerUrl,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
 import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
 import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
 
@@ -26,7 +30,7 @@ export function createLlamaCppServerModelInfoStream(
         try {
           const { baseUrl, release } = await acquire(model, opts);
           try {
-            const res = await fetch(`${baseUrl}/props`, { signal });
+            const res = await fetch(buildServerUrl(baseUrl, "/props"), { signal });
             if (res.ok) {
               const props = (await res.json()) as {
                 default_generation_settings?: { n_embd?: number };
@@ -64,7 +68,7 @@ export function createLlamaCppServerModelInfoStream(
     try {
       const { baseUrl, release } = await acquire(model, opts);
       try {
-        const res = await fetch(`${baseUrl}/v1/models`, { signal });
+        const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal });
         if (res.ok) {
           const body = (await res.json()) as { data?: Array<{ id?: string }> };
           is_loaded = !!body.data?.some((m) => m.id === expectedName);
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
index 5e97e4acb..36517dfd5 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ModelSearch.ts
@@ -6,7 +6,11 @@
 
 import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai";
 import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils";
-import type { ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import {
+  buildServerUrl,
+  normalizeServerBaseUrl,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
 import { LOCAL_LLAMACPP_SERVER } from "./LlamaCppServer_Constants";
 import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
 
@@ -24,9 +28,9 @@ export function createLlamaCppServerModelSearchStream(
       emit({ type: "finish", data: { results: [] } });
       return;
     }
-    const baseUrl = opts.externalUrl.replace(/\/+$/, "");
     try {
-      const res = await fetch(`${baseUrl}/v1/models`, { signal });
+      const baseUrl = normalizeServerBaseUrl(opts.externalUrl);
+      const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal });
       if (!res.ok) {
         emit({ type: "finish", data: { results: [] } });
         return;
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
index 7f242c632..b8004cc5c 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextEmbedding.ts
@@ -9,7 +9,11 @@ import type {
   TextEmbeddingTaskInput,
   TextEmbeddingTaskOutput,
 } from "@workglow/ai";
-import { acquireBaseUrl, type ILlamaCppServerProviderOptions } from "./LlamaCppServer_Client";
+import {
+  acquireBaseUrl,
+  buildServerUrl,
+  type ILlamaCppServerProviderOptions,
+} from "./LlamaCppServer_Client";
 import type { LlamaCppServerModelConfig } from "./LlamaCppServer_ModelSchema";
 import { getLlamaCppServerModelName } from "./LlamaCppServer_ModelUtil";
 
@@ -32,7 +36,7 @@ export function createLlamaCppServerTextEmbeddingStream(
     });
     const { baseUrl, release } = await acquire(model, opts);
     try {
-      const response = await fetch(`${baseUrl}/v1/embeddings`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/v1/embeddings"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
index 29c674674..014685ad4 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextGeneration.ts
@@ -11,6 +11,7 @@ import type {
 } from "@workglow/ai";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   readChatCompletionDeltas,
   type ILlamaCppServerProviderOptions,
 } from "./LlamaCppServer_Client";
@@ -77,7 +78,7 @@ export function createLlamaCppServerTextGenerationStream(
     const { baseUrl, release } = await acquire(model, opts);
     try {
       signal?.throwIfAborted?.();
-      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
index c0266ad82..35bab3425 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextRewriter.ts
@@ -7,6 +7,7 @@
 import type { AiProviderRunFn, TextRewriterTaskInput, TextRewriterTaskOutput } from "@workglow/ai";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   readChatCompletionDeltas,
   type ILlamaCppServerProviderOptions,
 } from "./LlamaCppServer_Client";
@@ -31,7 +32,7 @@ export function createLlamaCppServerTextRewriterStream(
     });
     const { baseUrl, release } = await acquire(model, opts);
     try {
-      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
index 14c914cd5..28286bdbd 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_TextSummary.ts
@@ -7,6 +7,7 @@
 import type { AiProviderRunFn, TextSummaryTaskInput, TextSummaryTaskOutput } from "@workglow/ai";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   readChatCompletionDeltas,
   type ILlamaCppServerProviderOptions,
 } from "./LlamaCppServer_Client";
@@ -31,7 +32,7 @@ export function createLlamaCppServerTextSummaryStream(
     });
     const { baseUrl, release } = await acquire(model, opts);
     try {
-      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
index 7f4575234..686275e85 100644
--- a/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
+++ b/providers/llamacpp-server/src/ai/common/LlamaCppServer_ToolCalling.ts
@@ -20,6 +20,7 @@ import {
 import { parsePartialJson } from "@workglow/util/worker";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   readChatCompletionDeltas,
   type ILlamaCppServerProviderOptions,
 } from "./LlamaCppServer_Client";
@@ -57,7 +58,7 @@ export function createLlamaCppServerToolCallingStream(
     });
     const { baseUrl, release } = await acquire(model, opts);
     try {
-      const response = await fetch(`${baseUrl}/v1/chat/completions`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/v1/chat/completions"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
index 205be3448..29bbe9c07 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_Client.ts
@@ -49,10 +49,10 @@ export async function acquireBaseUrl(
 ): Promise<IAcquiredBaseUrl> {
   const modelBaseUrl = model?.provider_config?.base_url;
   if (typeof modelBaseUrl === "string" && modelBaseUrl.length > 0) {
-    return { baseUrl: stripTrailingSlash(modelBaseUrl), release: noopRelease };
+    return { baseUrl: normalizeServerBaseUrl(modelBaseUrl), release: noopRelease };
   }
   if (typeof opts.externalUrl === "string" && opts.externalUrl.length > 0) {
-    return { baseUrl: stripTrailingSlash(opts.externalUrl), release: noopRelease };
+    return { baseUrl: normalizeServerBaseUrl(opts.externalUrl), release: noopRelease };
   }
   if (opts.transport) {
     const modelPath = model?.provider_config?.model_path;
@@ -67,7 +67,7 @@ export async function acquireBaseUrl(
       opts: {},
     });
     return {
-      baseUrl: stripTrailingSlash(handle.url),
+      baseUrl: normalizeServerBaseUrl(handle.url),
       release: () => handle.release(),
     };
   }
@@ -76,8 +76,94 @@ export async function acquireBaseUrl(
   );
 }
 
-function stripTrailingSlash(url: string): string {
-  return url.replace(/\/+$/, "");
+export function normalizeServerBaseUrl(rawUrl: string): string {
+  let url: URL;
+  try {
+    url = new URL(rawUrl);
+  } catch {
+    throw new Error("StableDiffusionCpp: base URL must be a valid local HTTP(S) URL.");
+  }
+
+  if (url.protocol !== "http:" && url.protocol !== "https:") {
+    throw new Error("StableDiffusionCpp: base URL must be a valid local HTTP(S) URL.");
+  }
+  if (url.username || url.password) {
+    throw new Error("StableDiffusionCpp: base URL must not include credentials.");
+  }
+  if (!isLocalHostname(url.hostname)) {
+    throw new Error("StableDiffusionCpp: base URL must target a local HTTP(S) server.");
+  }
+
+  url.hash = "";
+  url.search = "";
+  let pathnameEnd = url.pathname.length;
+  while (pathnameEnd > 1 && url.pathname.charCodeAt(pathnameEnd - 1) === 47) {
+    pathnameEnd--;
+  }
+  const pathname = url.pathname.slice(0, pathnameEnd);
+  return pathname === "/" ? url.origin : `${url.origin}${pathname}`;
+}
+
+export function buildServerUrl(baseUrl: string, endpoint: `/${string}`): string {
+  const base = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
+  const path = endpoint.startsWith("/") ? endpoint.slice(1) : endpoint;
+  return new URL(path, base).toString();
+}
+
+function isLocalHostname(hostname: string): boolean {
+  const host = removeIpv6Brackets(hostname.toLowerCase());
+  if (host === "localhost" || host.endsWith(".localhost")) {
+    return true;
+  }
+  return isLocalIpv4(host) || isLocalIpv6(host);
+}
+
+function removeIpv6Brackets(hostname: string): string {
+  if (hostname.startsWith("[") && hostname.endsWith("]")) {
+    return hostname.slice(1, -1);
+  }
+  return hostname;
+}
+
+function isLocalIpv4(hostname: string): boolean {
+  const parts = hostname.split(".");
+  if (parts.length !== 4) {
+    return false;
+  }
+  const octets: number[] = [];
+  for (const part of parts) {
+    if (part.length === 0) {
+      return false;
+    }
+    for (const char of part) {
+      if (char < "0" || char > "9") {
+        return false;
+      }
+    }
+    const octet = Number(part);
+    if (!Number.isInteger(octet) || octet < 0 || octet > 255) {
+      return false;
+    }
+    octets.push(octet);
+  }
+
+  const [first, second] = octets;
+  return (
+    first === 10 ||
+    first === 127 ||
+    (first === 172 && second >= 16 && second <= 31) ||
+    (first === 192 && second === 168) ||
+    (first === 169 && second === 254)
+  );
+}
+
+function isLocalIpv6(hostname: string): boolean {
+  return (
+    hostname === "::1" ||
+    hostname.startsWith("fc") ||
+    hostname.startsWith("fd") ||
+    hostname.startsWith("fe80:")
+  );
 }
 
 const noopRelease = async (): Promise<void> => {};
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
index cc4dbac24..0e959333d 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageEdit.ts
@@ -8,6 +8,7 @@ import type { AiProviderRunFn, ImageEditTaskInput, ImageEditTaskOutput } from "@
 import { imageValueToPngBytes, pngBytesToImageValue } from "@workglow/ai/provider-utils";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   decodeBase64Png,
   encodeBytesToBase64,
   type IStableDiffusionCppProviderOptions,
@@ -47,7 +48,7 @@ export function createStableDiffusionCppImageEditRunFn(
     const { baseUrl, release } = await acquire(model, opts);
     try {
       signal?.throwIfAborted?.();
-      const response = await fetch(`${baseUrl}/img2img`, {
+      const response = await fetch(buildServerUrl(baseUrl, "/img2img"), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
index fa2666069..52850db64 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ImageGenerate.ts
@@ -12,6 +12,7 @@ import type {
 import { pngBytesToImageValue } from "@workglow/ai/provider-utils";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   decodeBase64Png,
   type IStableDiffusionCppProviderOptions,
 } from "./StableDiffusionCpp_Client";
@@ -37,8 +38,9 @@ export function createStableDiffusionCppImageGenerateRunFn(
   return async (input, model, signal, emit) => {
     signal?.throwIfAborted?.();
 
-    const endpoint =
-      model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT;
+    const endpoint = resolveEndpoint(
+      model?.provider_config?.endpoint ?? opts.endpoint ?? STABLE_DIFFUSION_CPP_DEFAULT_ENDPOINT
+    );
     const modelName = getStableDiffusionCppModelName(model);
 
     const body = JSON.stringify({
@@ -49,7 +51,7 @@ export function createStableDiffusionCppImageGenerateRunFn(
     const { baseUrl, release } = await acquire(model, opts);
     try {
       signal?.throwIfAborted?.();
-      const response = await fetch(`${baseUrl}${endpoint}`, {
+      const response = await fetch(buildServerUrl(baseUrl, endpoint), {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body,
@@ -75,3 +77,10 @@ export function createStableDiffusionCppImageGenerateRunFn(
     }
   };
 }
+
+function resolveEndpoint(endpoint: string): "/txt2img" | "/v1/images/generations" {
+  if (endpoint === "/txt2img" || endpoint === "/v1/images/generations") {
+    return endpoint;
+  }
+  throw new Error(`StableDiffusionCpp: unsupported image-generation endpoint ${endpoint}`);
+}
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
index 03726b6ad..7460cd973 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelInfo.ts
@@ -7,6 +7,7 @@
 import type { AiProviderRunFn, ModelInfoTaskInput, ModelInfoTaskOutput } from "@workglow/ai";
 import {
   acquireBaseUrl,
+  buildServerUrl,
   type IStableDiffusionCppProviderOptions,
 } from "./StableDiffusionCpp_Client";
 import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
@@ -26,7 +27,7 @@ export function createStableDiffusionCppModelInfoRunFn(
     try {
       const { baseUrl, release } = await acquire(model, opts);
       try {
-        const res = await fetch(`${baseUrl}/v1/models`, { signal });
+        const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal });
         if (res.ok) {
           const body = (await res.json()) as { data?: Array<{ id?: string }> };
           is_loaded = !!body.data?.some((m) => m.id === expectedName);
diff --git a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
index bba4b0e4f..e27e14402 100644
--- a/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
+++ b/providers/stable-diffusion-server/src/ai/common/StableDiffusionCpp_ModelSearch.ts
@@ -6,7 +6,11 @@
 
 import type { AiProviderRunFn, ModelSearchTaskInput, ModelSearchTaskOutput } from "@workglow/ai";
 import { filterModelSearchResultsByQuery } from "@workglow/ai/provider-utils";
-import type { IStableDiffusionCppProviderOptions } from "./StableDiffusionCpp_Client";
+import {
+  buildServerUrl,
+  normalizeServerBaseUrl,
+  type IStableDiffusionCppProviderOptions,
+} from "./StableDiffusionCpp_Client";
 import { LOCAL_STABLE_DIFFUSION_CPP } from "./StableDiffusionCpp_Constants";
 import type { StableDiffusionCppModelConfig } from "./StableDiffusionCpp_ModelSchema";
 
@@ -19,9 +23,9 @@ export function createStableDiffusionCppModelSearchRunFn(
       emit({ type: "finish", data: { results: [] } });
       return;
     }
-    const baseUrl = opts.externalUrl.replace(/\/+$/, "");
     try {
-      const res = await fetch(`${baseUrl}/v1/models`, { signal });
+      const baseUrl = normalizeServerBaseUrl(opts.externalUrl);
+      const res = await fetch(buildServerUrl(baseUrl, "/v1/models"), { signal });
       if (!res.ok) {
         emit({ type: "finish", data: { results: [] } });
         return;

From ec5cc74d3e28c7ebad60a8cbebfb46153a3e4b3d Mon Sep 17 00:00:00 2001
From: Steven Roussey <sroussey@gmail.com>
Date: Sat, 23 May 2026 23:07:36 +0000
Subject: [PATCH 8/8] =?UTF-8?q?docs(stable-diffusion-server):=20README=20?=
 =?UTF-8?q?=E2=80=94=20install,=20quickstart,=20capability=20table,=20brow?=
 =?UTF-8?q?ser=20note?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 providers/stable-diffusion-server/README.md | 125 ++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 providers/stable-diffusion-server/README.md

diff --git a/providers/stable-diffusion-server/README.md b/providers/stable-diffusion-server/README.md
new file mode 100644
index 000000000..5861874c9
--- /dev/null
+++ b/providers/stable-diffusion-server/README.md
@@ -0,0 +1,125 @@
+# `@workglow/stable-diffusion-server`
+
+OpenAI-compatible HTTP client for an upstream
+[`stable-diffusion.cpp`](https://github.com/leejet/stable-diffusion.cpp) server.
+
+This package **does not bundle stable-diffusion.cpp**. It speaks to a
+running `sd-server` process — either one you start yourself
+(`externalUrl` mode) or one acquired through an `IBackendsTransport`
+(`transport` mode, used by the Workglow Builder's broker).
+
+## Install
+
+```bash
+bun add @workglow/stable-diffusion-server
+```
+
+You also need `@workglow/ai`, `@workglow/task-graph`, `@workglow/storage`,
+`@workglow/job-queue`, and `@workglow/util` (peer dependencies).
+
+## Quickstart — `externalUrl` mode
+
+Start `sd-server` yourself, then point the provider at it:
+
+```bash
+sd-server -m ./models/sd-1.5.gguf --port 7860 --listen
+```
+
+```ts
+import { registerStableDiffusionCppInline } from "@workglow/stable-diffusion-server/ai-runtime";
+
+await registerStableDiffusionCppInline({
+  externalUrl: "http://localhost:7860",
+});
+```
+
+The provider is now visible to the registry as `LOCAL_STABLE_DIFFUSION_CPP`.
+
+## Quickstart — `transport` mode (Electron + broker)
+
+```ts
+import { registerStableDiffusionCppInline } from "@workglow/stable-diffusion-server/ai-runtime";
+
+await registerStableDiffusionCppInline({
+  transport: backendsTransport, // your IBackendsTransport implementation
+  endpoint: "/txt2img",
+});
+```
+
+In transport mode each model record must include
+`provider_config.model_path` — the absolute path to the model file. The
+broker spawns one `sd-server` per `modelPath`, shared by refcount.
+
+## Model record shape
+
+```ts
+{
+  model_id: "sd-1.5",
+  provider: "LOCAL_STABLE_DIFFUSION_CPP",
+  provider_config: {
+    model_path: "/abs/path/to/sd-1.5.gguf",        // required for transport mode
+    model_name: "sd-1.5",                           // optional; sent as OpenAI `model` field
+    base_url: "http://localhost:7860",              // optional per-record override
+    endpoint: "/txt2img",                           // optional per-record endpoint override
+  },
+  capabilities: [],
+  metadata: {},
+}
+```
+
+## Supported capabilities
+
+| Capability | Endpoint | Notes |
+|---|---|---|
+| `image.generation` | `POST /txt2img` (or `POST /v1/images/generations`) | txt2img — endpoint flavor configurable, see below |
+| `image.editing` | `POST /img2img` | img2img with base64-encoded init image |
+| `model.info` | derived from acquired URL | Reports `is_loaded` based on broker handle / externalUrl |
+| `model.search` | `GET /v1/models` | externalUrl mode only — see below |
+
+### Endpoint flavor: `/txt2img` vs `/v1/images/generations`
+
+`image.generation` supports two request shapes, selectable per record
+(via `provider_config.endpoint`) or per provider (via the
+`registerStableDiffusionCpp*({ endpoint })` option):
+
+- **`/txt2img`** — the conventional stable-diffusion.cpp HTTP API.
+  Defaults to this if neither model nor provider sets one.
+- **`/v1/images/generations`** — used by OpenAI-compatible sd.cpp
+  builds. Sends `model`, `prompt`, `n`, `size` in the OpenAI request
+  shape; response is parsed as `data[].b64_json`.
+
+`image.editing` always uses `/img2img` regardless of the txt2img
+endpoint flavor.
+
+### Why `model.search` returns `[]` in transport mode
+
+`transport.ensureRunning` requires a `modelPath`, which is what
+`model.search` is meant to help the user pick. The broker's catalog of
+installed models is the Builder UI's concern, not the provider's. In
+`externalUrl` mode `GET /v1/models` works and returns whatever the
+server enumerates.
+
+## Registration shapes
+
+Three registration entry points, all sharing the same options
+(`{ transport?, externalUrl?, endpoint? }`):
+
+- **`registerStableDiffusionCppInline(options)`** — main-thread inline.
+  Primarily used in tests and any single-thread scenario.
+- **`registerStableDiffusionCppWorker(options)`** — called inside a
+  worker runtime. This is the primary production path. The worker
+  constructs its own `IBackendsTransport` (e.g.,
+  `MessagePortBackendsTransport`) and passes it here directly — no port
+  transfer happens.
+- **`registerStableDiffusionCpp({ worker })`** — main-thread proxy that
+  forwards jobs to a worker. The actual run-fns and transport live in
+  the worker; this side only exposes the provider identifier to the
+  registry.
+
+## Browser
+
+`@workglow/stable-diffusion-server/ai` resolves to a browser bundle that
+uses the exact same source as the node bundle. Pure `fetch` works the
+same in both. In a plain browser there is no broker to construct an
+`IBackendsTransport` against, so practical use is `externalUrl` mode;
+nothing in the code forbids passing a custom transport if one exists.