Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ and this project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.ht
- Workflow builder controls for undo/redo and explicit edge disconnect.
- Web recorder stop endpoint (`/api/recorders/web/stop`) and recorder navigation event capture.
- Recorder draft review panel with reorder/edit/skip controls before inserting recorded steps.
- Autopilot plan diagnostics: overall confidence score, node-level insights, and fallback template options.

### Changed
- CI now includes browser smoke validation (`Web E2E Smoke`).
- Web editor keyboard shortcuts now include undo/redo and selection-aware delete behavior.
- Web recorder now follows capture -> review -> insert flow instead of immediate node injection.
- Autopilot now requires explicit confirm-before-create flow and uses richer starter templates for vague prompts.

## [1.0.7] - 2026-02-13

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ It combines a drag-and-drop workflow studio, resilient execution, AI-assisted au
- Core editor UX: undo/redo, duplicate, edge disconnect, auto-layout, and JSON import/export
- Web automation (Playwright) and desktop automation (agent service)
- Recorder flows for web and desktop action capture with review-before-insert draft editing
- Autopilot workflow generation from natural-language prompts
- Autopilot workflow generation from natural-language prompts with confidence scoring and confirm-before-create review
- AI nodes: `transform_llm`, `document_understanding`, `clipboard_ai_transfer`
- Integrations (`http_api`, `postgresql`, `mysql`, `mongodb`, `google_sheets`, `airtable`, `s3`)
- Orchestrator queue with attended/unattended robots and dispatch lifecycle
Expand Down
17 changes: 15 additions & 2 deletions apps/server/src/lib/autopilot.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,29 @@ test("buildAutopilotPlan generates web + api + ai chain from natural language pr
assert.ok(nodeTypes.includes("transform_llm"));
assert.ok(nodeTypes.includes("http_request"));
assert.equal(plan.definition.edges.length, plan.definition.nodes.length - 1);
assert.ok(plan.confidence > 0.6);
assert.equal(plan.requiresConfirmation, true);
assert.ok(plan.nodeInsights.some((insight) => insight.nodeType === "http_request"));
});

test("buildAutopilotPlan falls back to starter workflow for vague prompts", () => {
const plan = buildAutopilotPlan("do stuff");
const nodeTypes = plan.definition.nodes.map((node) => String(node.data?.type || ""));
assert.deepEqual(nodeTypes, ["start", "set_variable"]);
assert.ok(plan.capabilities.includes("orchestration"));
assert.ok(nodeTypes.length >= 4);
assert.ok(plan.fallbackUsed);
assert.ok(Boolean(plan.fallbackTemplateId));
assert.ok(plan.warnings.some((warning) => warning.toLowerCase().includes("starter template")));
assert.ok(plan.fallbackOptions.length >= 3);
});

test("buildAutopilotPlan returns warnings for planned feature areas", () => {
const plan = buildAutopilotPlan("process SAP invoice PDF with clipboard ai");
assert.ok(plan.warnings.length >= 2);
});

test("buildAutopilotPlan includes per-node warnings for placeholder heavy steps", () => {
const plan = buildAutopilotPlan("simple web workflow");
const navigateInsight = plan.nodeInsights.find((insight) => insight.nodeType === "playwright_navigate");
assert.ok(navigateInsight);
assert.ok(typeof navigateInsight?.confidence === "number");
});
282 changes: 273 additions & 9 deletions apps/server/src/lib/autopilot.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
import type { WorkflowDefinition, WorkflowNode } from "./types.js";

type AutopilotNodeInsight = {
nodeId: string;
nodeType: string;
label: string;
confidence: number;
reason: string;
warnings: string[];
};

type AutopilotFallbackOption = {
id: string;
name: string;
description: string;
reason: string;
};

type PlanResult = {
name: string;
description: string;
capabilities: string[];
warnings: string[];
confidence: number;
requiresConfirmation: boolean;
fallbackUsed: boolean;
fallbackTemplateId?: string;
fallbackOptions: AutopilotFallbackOption[];
nodeInsights: AutopilotNodeInsight[];
definition: WorkflowDefinition;
};

Expand All @@ -14,6 +36,27 @@ const EXECUTION_DEFAULTS = {
defaultNodeTimeoutMs: 30000
};

const FALLBACK_OPTIONS: AutopilotFallbackOption[] = [
{
id: "web_intake_ai_review",
name: "Web Intake + AI Review",
description: "Navigate, extract page content, transform with AI, and route for approval.",
reason: "Best for broad web-based operational tasks."
},
{
id: "data_cleanup_review",
name: "Data Cleanup + Validation",
description: "Import CSV, normalize values with AI, validate output, and add approval.",
reason: "Best for generic data wrangling prompts."
},
{
id: "api_sync_starter",
name: "API Sync Starter",
description: "Set run context, send API request, and require human approval.",
reason: "Best for broad integration and handoff requests."
}
];

function compactName(rawPrompt: string) {
const cleaned = rawPrompt
.replace(/[^a-zA-Z0-9\s-]/g, " ")
Expand All @@ -37,11 +80,225 @@ function makeNode(id: string, x: number, data: Record<string, unknown>): Workflo
};
}

function round2(value: number) {
return Math.round(value * 100) / 100;
}

function buildNodeInsights(nodes: WorkflowNode[], prompt: string, fallbackUsed: boolean): AutopilotNodeInsight[] {
return nodes.map((node) => {
const nodeType = String(node.data?.type || "");
const label = String(node.data?.label || node.id);
if (nodeType === "start") {
return {
nodeId: node.id,
nodeType,
label,
confidence: 0.99,
reason: "Mandatory workflow entry node.",
warnings: []
};
}

let confidence = 0.7;
let reason = "Generated from recognized prompt intent.";
const warnings: string[] = [];

if (fallbackUsed) {
confidence -= 0.12;
reason = "Generated from fallback template because prompt intent was broad.";
}

if (nodeType === "playwright_navigate") {
if (hasAny(prompt, ["web", "website", "browser", "url", "portal"])) confidence += 0.16;
else warnings.push("Target URL is placeholder and should be refined.");
reason = "Web navigation step inferred from prompt.";
}

if (nodeType === "playwright_extract") {
if (hasAny(prompt, ["extract", "scrape", "table", "screen scrape", "data scrape"])) confidence += 0.16;
else warnings.push("Selector is generic and may need adjustment.");
reason = "Data extraction step inferred from prompt.";
}

if (nodeType === "playwright_fill" || nodeType === "playwright_click") {
if (hasAny(prompt, ["form", "login", "sign in", "fill"])) confidence += 0.13;
else warnings.push("Recorded selectors/values should be reviewed.");
reason = "Form interaction inferred from prompt.";
}

if (nodeType === "http_request") {
if (hasAny(prompt, ["api", "webhook", "http", "endpoint"])) confidence += 0.15;
else warnings.push("API URL/body are starter placeholders.");
reason = "Integration call inferred from prompt.";
}

if (nodeType === "transform_llm") {
if (hasAny(prompt, ["ai", "classify", "summarize", "clean", "understand"])) confidence += 0.14;
reason = "AI transformation inferred from prompt.";
}

if (nodeType === "data_import_csv") {
if (hasAny(prompt, ["csv", "excel", "spreadsheet"])) confidence += 0.14;
reason = "Structured file ingestion inferred from prompt.";
}

if (nodeType === "manual_approval") {
if (hasAny(prompt, ["approval", "review", "human"])) confidence += 0.14;
reason = "Human checkpoint inferred from prompt/risk profile.";
}

if (nodeType === "submit_guard") {
if (hasAny(prompt, ["validate", "guard", "check"])) confidence += 0.12;
reason = "Validation guard inferred from prompt.";
}

if (nodeType === "set_variable") {
confidence = Math.min(confidence, 0.62);
reason = "Context bootstrap used to seed starter workflow.";
}

return {
nodeId: node.id,
nodeType,
label,
confidence: round2(Math.max(0.25, Math.min(0.98, confidence))),
reason,
warnings
};
});
}

function chooseFallbackTemplate(prompt: string) {
if (hasAny(prompt, ["csv", "excel", "spreadsheet", "report", "dataset", "clean", "normalize"])) {
return FALLBACK_OPTIONS[1];
}
if (hasAny(prompt, ["api", "webhook", "endpoint", "integration", "sync"])) {
return FALLBACK_OPTIONS[2];
}
return FALLBACK_OPTIONS[0];
}

function buildFallbackNodes(templateId: string): { nodes: WorkflowNode[]; capabilities: string[]; warnings: string[] } {
const start = makeNode("start", 80, { type: "start", label: "Start" });
if (templateId === "data_cleanup_review") {
return {
nodes: [
start,
makeNode("csv", 330, { type: "data_import_csv", label: "Import CSV", outputKey: "csvRows" }),
makeNode("llm", 580, {
type: "transform_llm",
label: "Normalize Data",
inputKey: "csvRows",
outputKey: "normalizedRows",
strictJson: true
}),
makeNode("validate", 830, {
type: "submit_guard",
label: "Validate Rows",
inputKey: "normalizedRows",
schema: { type: "array" }
}),
makeNode("approval", 1080, {
type: "manual_approval",
label: "Approve Output",
message: "Please review normalized rows before submit."
})
],
capabilities: ["data-import", "ai-transform", "validation", "human-in-the-loop"],
warnings: ["Prompt was broad; generated a data cleanup starter template."]
};
}
if (templateId === "api_sync_starter") {
return {
nodes: [
start,
makeNode("set-context", 330, {
type: "set_variable",
label: "Set Request Context",
key: "task",
value: "autopilot-api-sync"
}),
makeNode("api", 580, {
type: "http_request",
label: "Send API Request",
method: "POST",
url: "https://example.com/api",
body: { task: "{{task}}" }
}),
makeNode("approval", 830, {
type: "manual_approval",
label: "Review API Response",
message: "Confirm API output before downstream actions."
})
],
capabilities: ["api-integration", "orchestration", "human-in-the-loop"],
warnings: ["Prompt was broad; generated an API sync starter template."]
};
}
return {
nodes: [
start,
makeNode("navigate", 330, {
type: "playwright_navigate",
label: "Navigate",
url: "https://example.com"
}),
makeNode("extract", 580, {
type: "playwright_extract",
label: "Extract Data",
selector: "main, table, .content",
saveAs: "pageData"
}),
makeNode("llm", 830, {
type: "transform_llm",
label: "Summarize Data",
inputKey: "pageData",
outputKey: "summary",
strictJson: true
}),
makeNode("approval", 1080, {
type: "manual_approval",
label: "Approve Summary",
message: "Review summary before final handoff."
})
],
capabilities: ["web-automation", "scraping", "ai-transform", "human-in-the-loop"],
warnings: ["Prompt was broad; generated a web intake starter template."]
};
}

function computePlanConfidence(
insights: AutopilotNodeInsight[],
warnings: string[],
fallbackUsed: boolean,
matchedSignalCount: number
) {
const avgNodeConfidence =
insights.length > 0 ? insights.reduce((sum, insight) => sum + insight.confidence, 0) / insights.length : 0.4;
let confidence = avgNodeConfidence + Math.min(0.2, matchedSignalCount * 0.02) - Math.min(0.2, warnings.length * 0.03);
if (fallbackUsed) confidence -= 0.06;
return round2(Math.max(0.2, Math.min(0.97, confidence)));
}

export function buildAutopilotPlan(promptRaw: string, preferredName?: string): PlanResult {
const prompt = promptRaw.toLowerCase();
const nodes: WorkflowNode[] = [makeNode("start", 80, { type: "start", label: "Start" })];
const capabilities = new Set<string>();
const warnings: string[] = [];
let fallbackUsed = false;
let fallbackTemplateId: string | undefined;

const coreSignals = [
hasAny(prompt, ["web", "website", "browser", "url", "portal"]),
hasAny(prompt, ["form", "login", "sign in", "fill"]),
hasAny(prompt, ["extract", "scrape", "table", "screen scrape", "data scrape"]),
hasAny(prompt, ["api", "webhook", "http", "endpoint"]),
hasAny(prompt, ["csv", "excel", "spreadsheet"]),
hasAny(prompt, ["ai", "classify", "summarize", "clean", "understand"]),
hasAny(prompt, ["approval", "review", "human"]),
hasAny(prompt, ["validate", "guard", "check"])
];
const matchedSignalCount = coreSignals.filter(Boolean).length;

if (hasAny(prompt, ["web", "website", "browser", "url", "portal"])) {
nodes.push(
Expand Down Expand Up @@ -157,15 +414,13 @@ export function buildAutopilotPlan(promptRaw: string, preferredName?: string): P
}

if (nodes.length === 1) {
nodes.push(
makeNode("set-context", 330, {
type: "set_variable",
label: "Set Context",
key: "task",
value: promptRaw
})
);
capabilities.add("orchestration");
const fallback = chooseFallbackTemplate(prompt);
const fallbackPlan = buildFallbackNodes(fallback.id);
nodes.splice(0, nodes.length, ...fallbackPlan.nodes);
fallbackPlan.capabilities.forEach((item) => capabilities.add(item));
warnings.push(...fallbackPlan.warnings);
fallbackUsed = true;
fallbackTemplateId = fallback.id;
}

const edges = nodes.slice(1).map((node, index) => ({
Expand All @@ -174,11 +429,20 @@ export function buildAutopilotPlan(promptRaw: string, preferredName?: string): P
target: node.id
}));

const nodeInsights = buildNodeInsights(nodes, prompt, fallbackUsed);
const confidence = computePlanConfidence(nodeInsights, warnings, fallbackUsed, matchedSignalCount);

return {
name: preferredName?.trim() || compactName(promptRaw),
description: `Autopilot draft generated from prompt: "${promptRaw.trim()}"`,
capabilities: Array.from(capabilities),
warnings,
confidence,
requiresConfirmation: true,
fallbackUsed,
fallbackTemplateId,
fallbackOptions: FALLBACK_OPTIONS,
nodeInsights,
definition: {
nodes,
edges,
Expand Down
Loading
Loading