From f2990e976cce0d15019b79a80e97e4c07dba9250 Mon Sep 17 00:00:00 2001
From: Jack Brown <jackcbrown89@gmail.com>
Date: Fri, 29 May 2026 11:45:22 -0700
Subject: [PATCH 1/2] Wait out auto-repair chain before failing on invoke
 --wait

When a workflow execution fails, `workflows invoke --wait` no longer counts
it as a failure immediately. If the account has auto-repair enabled and the
workflow is deterministic, the CLI now follows the post-failure event
timeline (summarization -> repair -> re-execution) before deciding the exit
code:

- summarization classifies it as an app issue  -> fail
- summarization is not successful (inconclusive) -> fail
- repair is not successful                       -> fail
- repair succeeds and the re-run passes          -> pass (self-healed)
- repair succeeds but the re-run fails           -> fail

AI-driven workflows and accounts without auto-repair keep the previous
fail-fast behavior. The `--timeout` now covers the whole wait, including the
repair chain.

Adds GET /settings and GET /workflows/{id}/summarizations/{id} to the client,
a pollWorkflowRepairChain() poller, the WorkflowSummarization/Settings types,
and the last_summarization_* fields on WorkflowResource. Also registers the
previously-unwired `skills` command.
---
 README.md              |  19 ++++-
 src/api/client.ts      | 184 ++++++++++++++++++++++++++++++++++++++++-
 src/api/types.ts       |  34 +++++++-
 src/commands/invoke.ts | 155 ++++++++++++++++++++++++++++++----
 src/commands/skills.ts |  45 ++++++++++
 src/index.ts           |   2 +
 6 files changed, 415 insertions(+), 24 deletions(-)
 create mode 100644 src/commands/skills.ts
diff --git a/README.md b/README.md
index 0db4c09..966ce15 100644
--- a/README.md
+++ b/README.md
@@ -83,12 +83,27 @@ Lark ships [Agent Skills](https://docs.getlark.ai/agents) that teach Claude Code
 npx skills add getlark/skills
 ```
 
+Or, equivalently, via the `getlark` CLI:
+
+```bash
+getlark skills install
+```
+
 Then ask the agent to run the `setup` skill. See the [Agents docs](https://docs.getlark.ai/agents) for the full skill catalog and the opt-in `PostToolUse` hook that validates your branch after every `git commit` or `git push`.
 
 ## CI Pipeline Usage
 
 The `--wait` flag makes it easy to use in CI pipelines. The command will block until the workflow completes and exit with a non-zero code on failure.
 
+If your account has auto-repair enabled for deterministic workflows, a failed execution is not counted as a failure right away. Instead `--wait` follows the repair chain before deciding the exit code:
+
+- If the failure summarization classifies it as an **app issue**, the workflow **fails**.
+- If the repair job itself **fails**, the workflow **fails**.
+- If the repair succeeds and the automatic re-run **passes**, the workflow **passes** (the test self-healed) and is reported as auto-repaired.
+- If the repair succeeds but the re-run **fails**, the workflow **fails**.
+
+Workflows in `ai_driven` mode, and accounts without auto-repair enabled, keep the previous fail-fast behavior. The `--timeout` covers the whole wait, including any repair chain.
+
 ### GitHub Actions Example
 
 Set the `GETLARK_API_KEY` environment variable in GitHub Actions secrets.
@@ -230,8 +245,8 @@ getlark workflows invoke --group-name "Checkout Flow" --wait
 | `--all`                    | Invoke all workflows                                                             |
 | `--group-id <groupId>`     | Invoke all workflows in a group (by group ID)                                    |
 | `--group-name <groupName>` | Invoke all workflows in a group (by group name)                                  |
-| `--wait`                   | Wait for the execution to finish (successfully or unsuccessfully) before exiting |
-| `--timeout <seconds>`      | Maximum time to wait in seconds (default: 600, requires `--wait`)                |
+| `--wait`                   | Wait for the execution to finish before exiting. When auto-repair is enabled, also waits out the repair chain for failed deterministic workflows (see [CI Pipeline Usage](#ci-pipeline-usage)) |
+| `--timeout <seconds>`      | Maximum time to wait in seconds (default: 600, requires `--wait`); covers the repair chain too |
 | `--verbose`                | Print verbose output (includes logs)                                             |
 
 One of `--workflow-ids`, `--all`, `--group-id`, or `--group-name` is required.
diff --git a/src/api/client.ts b/src/api/client.ts
index a7c02d6..9f44a36 100644
--- a/src/api/client.ts
+++ b/src/api/client.ts
@@ -14,11 +14,13 @@ import type {
   ListWorkflowGroupsResponse,
   ListWorkflowRepairsResponse,
   ListWorkflowsResponse,
+  SettingsResource,
   WorkflowExecutionResource,
   WorkflowGenerationResource,
   WorkflowGroupResource,
   WorkflowRepairResource,
   WorkflowResource,
+  WorkflowSummarizationResource,
 } from "./types.js";
 
 export class TimeoutError extends Error {
@@ -37,6 +39,38 @@ export interface PollOptions {
   ) => void | Promise<void>;
 }
 
+/** The stage of the post-failure repair chain currently being awaited. */
+export type RepairChainStage = "summarization" | "repair" | "re-execution";
+
+export interface RepairChainPollOptions {
+  timeoutMs: number;
+  pollIntervalMs: number;
+  onPoll?: (stage: RepairChainStage, elapsedMs: number) => void | Promise<void>;
+}
+
+/**
+ * The verdict of waiting out the auto-repair chain that follows a failed
+ * execution. `repaired` means the test self-healed (repair succeeded and the
+ * re-run passed); every `failure` reason counts as a genuine failure.
+ */
+export type RepairChainOutcome =
+  | {
+      result: "success";
+      reason: "repaired";
+      executionId: string;
+      summary: string | null;
+    }
+  | {
+      result: "failure";
+      reason:
+        | "app_issue"
+        | "summarization_failed"
+        | "repair_failed"
+        | "reexecution_failed";
+      executionId: string;
+      summary: string | null;
+    };
+
 export class GetLarkClient {
   private baseUrl: string;
   private apiKey: string;
@@ -312,6 +346,24 @@ export class GetLarkClient {
     return this.request<ListWorkflowEventsResponse>("GET", path);
   }
 
+  // ── Summarizations ─────────────────────────────────────────
+
+  async getWorkflowSummarization(
+    workflowId: string,
+    summarizationId: string,
+  ): Promise<WorkflowSummarizationResource> {
+    return this.request<WorkflowSummarizationResource>(
+      "GET",
+      `/workflows/${workflowId}/summarizations/${summarizationId}`,
+    );
+  }
+
+  // ── Settings ───────────────────────────────────────────────
+
+  async getSettings(): Promise<SettingsResource> {
+    return this.request<SettingsResource>("GET", "/settings");
+  }
+
   // ── Secret Contexts ────────────────────────────────────────
 
   async listSecretContexts(): Promise<ListSecretContextsResponse> {
@@ -475,6 +527,10 @@ export class GetLarkClient {
 
   // ── Polling ────────────────────────────────────────────────
 
+  private sleep(ms: number): Promise<void> {
+    return new Promise<void>((resolve) => setTimeout(resolve, ms));
+  }
+
   async pollWorkflowExecution(
     workflowId: string,
     executionId: string,
@@ -483,9 +539,6 @@ export class GetLarkClient {
     const { timeoutMs, pollIntervalMs, onPoll } = options;
     const startTime = Date.now();
 
-    const sleep = (ms: number) =>
-      new Promise<void>((resolve) => setTimeout(resolve, ms));
-
     const terminalStatuses = new Set(["success", "failure", "cancelled"]);
 
     while (true) {
@@ -507,7 +560,130 @@ export class GetLarkClient {
         );
       }
 
-      await sleep(pollIntervalMs);
+      await this.sleep(pollIntervalMs);
+    }
+  }
+
+  /**
+   * After an execution fails, an account with auto-repair enabled will run a
+   * summarization, an optional repair, and a follow-up re-execution. This
+   * follows that chain via the workflow event timeline and returns a verdict:
+   *
+   *   summarization "app_issue"        → failure (genuine app defect)
+   *   summarization not successful     → failure (inconclusive)
+   *   repair not successful            → failure
+   *   re-execution failure             → failure
+   *   re-execution success             → success (test self-healed)
+   */
+  async pollWorkflowRepairChain(
+    workflowId: string,
+    failedExecution: WorkflowExecutionResource,
+    options: RepairChainPollOptions,
+  ): Promise<RepairChainOutcome> {
+    const { timeoutMs, pollIntervalMs, onPoll } = options;
+    const startTime = Date.now();
+
+    const terminalStatuses = new Set(["success", "failure", "cancelled"]);
+    const failedAt = new Date(failedExecution.created_at).getTime();
+    const at = (ts: string | null) => (ts ? new Date(ts).getTime() : 0);
+
+    let stage: RepairChainStage = "summarization";
+
+    while (true) {
+      // Events are returned newest-first; reorder the events that belong to
+      // this failure (created after the failed execution) oldest-first so we
+      // can walk the chain in the order it happened.
+      const { workflow_events } = await this.listWorkflowEvents(workflowId, {
+        limit: 50,
+      });
+      const chain = workflow_events
+        .filter((e) => at(e.created_at) > failedAt)
+        .sort((a, b) => at(a.created_at) - at(b.created_at));
+
+      const elapsedMs = Date.now() - startTime;
+      await onPoll?.(stage, elapsedMs);
+
+      const summ = chain.find((e) => e.event_type === "summarization");
+      if (summ && terminalStatuses.has(summ.status)) {
+        if (summ.status !== "success") {
+          return {
+            result: "failure",
+            reason: "summarization_failed",
+            executionId: failedExecution.id,
+            summary: null,
+          };
+        }
+        const detail = await this.getWorkflowSummarization(workflowId, summ.id);
+        // Make sure this summarization is the one for our failed execution and
+        // not a newer one that raced ahead; if not, keep waiting for ours.
+        if (detail.workflow_execution_id !== failedExecution.id) {
+          if (elapsedMs >= timeoutMs) {
+            throw new TimeoutError(
+              `Timed out after ${Math.round(timeoutMs / 1000)}s waiting for repair of execution ${failedExecution.id} (stage: ${stage})`,
+            );
+          }
+          await this.sleep(pollIntervalMs);
+          continue;
+        }
+        if (detail.category === "app_issue") {
+          return {
+            result: "failure",
+            reason: "app_issue",
+            executionId: failedExecution.id,
+            summary: detail.summary,
+          };
+        }
+        // A test-side issue: an auto-repair should follow.
+        stage = "repair";
+
+        const repair = chain.find(
+          (e) =>
+            e.event_type === "repair" && at(e.created_at) >= at(summ.created_at),
+        );
+        if (repair && terminalStatuses.has(repair.status)) {
+          if (repair.status !== "success") {
+            return {
+              result: "failure",
+              reason: "repair_failed",
+              executionId: failedExecution.id,
+              summary: null,
+            };
+          }
+          // The repair succeeded; the backend re-runs the test.
+          stage = "re-execution";
+
+          const reExecution = chain.find(
+            (e) =>
+              e.event_type === "execution" &&
+              repair.stopped_at !== null &&
+              at(e.created_at) >= at(repair.stopped_at),
+          );
+          if (reExecution && terminalStatuses.has(reExecution.status)) {
+            if (reExecution.status === "success") {
+              return {
+                result: "success",
+                reason: "repaired",
+                executionId: reExecution.id,
+                summary: detail.summary,
+              };
+            }
+            return {
+              result: "failure",
+              reason: "reexecution_failed",
+              executionId: reExecution.id,
+              summary: detail.summary,
+            };
+          }
+        }
+      }
+
+      if (elapsedMs >= timeoutMs) {
+        throw new TimeoutError(
+          `Timed out after ${Math.round(timeoutMs / 1000)}s waiting for repair of execution ${failedExecution.id} (stage: ${stage})`,
+        );
+      }
+
+      await this.sleep(pollIntervalMs);
     }
   }
 }
diff --git a/src/api/types.ts b/src/api/types.ts
index 7c0bec7..7de9b47 100644
--- a/src/api/types.ts
+++ b/src/api/types.ts
@@ -33,6 +33,10 @@ export interface WorkflowResource {
   last_repair_started_at: string | null;
   last_repair_stopped_at: string | null;
   last_repair_result_type: "success" | "failure" | "cancelled" | null;
+  last_summarization_id: string | null;
+  last_summarization_started_at: string | null;
+  last_summarization_stopped_at: string | null;
+  last_summarization_result_type: "success" | "failure" | "cancelled" | null;
   schedule: string | null;
   group_id: string | null;
   next_execution_at: string | null;
@@ -63,7 +67,8 @@ export interface WorkflowArtifactResource {
     | "video"
     | "javascript"
     | "python"
-    | "shellscript";
+    | "shellscript"
+    | "other";
   filename: string;
   presigned_url: string;
   presigned_url_expires_at: string;
@@ -130,6 +135,24 @@ export interface WorkflowRepairResource {
   updated_at: string;
 }
 
+export interface WorkflowSummarizationResource {
+  id: string;
+  workflow_id: string;
+  workflow_execution_id: string;
+  status: "pending" | "running" | "success" | "failure" | "cancelled";
+  // "app_issue" means the failure is a genuine defect in the app under test;
+  // any other category (e.g. "test_issue") indicates a test-side issue that
+  // an auto-repair may be able to fix.
+  category: "test_issue" | "app_issue";
+  started_at: string | null;
+  stopped_at: string | null;
+  summary: string | null;
+  secret_contexts: string[] | null;
+  artifacts: WorkflowArtifactResource[];
+  created_at: string;
+  updated_at: string;
+}
+
 export interface ListedWorkflowRepairResource {
   id: string;
   workflow_id: string;
@@ -149,7 +172,7 @@ export interface ListWorkflowRepairsResponse {
 export interface ListedWorkflowEventResource {
   id: string;
   workflow_id: string;
-  event_type: "generation" | "execution" | "repair";
+  event_type: "generation" | "execution" | "repair" | "summarization";
   status: "pending" | "running" | "success" | "failure" | "cancelled";
   started_at: string | null;
   stopped_at: string | null;
@@ -174,6 +197,13 @@ export interface ListWorkflowGroupsResponse {
   has_more: boolean;
 }
 
+export interface SettingsResource {
+  tasks_enabled: boolean;
+  deterministic_workflow_enabled: boolean;
+  auto_repair_deterministic_workflows_enabled: boolean;
+  qa_report_enabled: boolean;
+}
+
 export type JobType = "workflow_import";
 
 export type JobStatus =
diff --git a/src/commands/invoke.ts b/src/commands/invoke.ts
index 0868642..b752ef2 100644
--- a/src/commands/invoke.ts
+++ b/src/commands/invoke.ts
@@ -2,11 +2,25 @@ import { Option, type Command } from "commander";
 import { GetLarkClient, TimeoutError } from "../api/client.js";
 import { getConfig } from "../config.js";
 import type {
-  WorkflowExecutionResource,
   WorkflowResource,
   WorkflowGroupResource,
 } from "../api/types.js";
 
+/**
+ * The resolved result of invoking a single workflow, including the verdict of
+ * the auto-repair chain when a failed execution was given the chance to heal.
+ */
+interface WorkflowOutcome {
+  workflowId: string;
+  executionId: string;
+  result: "pending" | "running" | "success" | "failure" | "cancelled";
+  /** The execution failed but was auto-repaired and passed on re-run. */
+  repaired: boolean;
+  /** Summarization classified the failure as a genuine app defect. */
+  appIssue: boolean;
+  summary: string | null;
+}
+
 const PAGE_SIZE = 100;
 
 async function fetchAllWorkflows(
@@ -70,19 +84,30 @@ async function invokeWorkflow(
   wait: boolean,
   timeoutSeconds: number,
   verbose: boolean,
-): Promise<WorkflowExecutionResource> {
+  autoRepairEnabled: boolean,
+): Promise<WorkflowOutcome> {
   const execution = await client.invokeWorkflow(workflowId);
   if (!wait) {
-    return execution;
+    return {
+      workflowId,
+      executionId: execution.id,
+      result: execution.status,
+      repaired: false,
+      appIssue: false,
+      summary: execution.summary,
+    };
   }
 
+  // A single deadline covers the whole wait — the execution plus, if it fails,
+  // the auto-repair chain that may follow.
+  const deadline = Date.now() + timeoutSeconds * 1000;
   let logOffset = 0;
 
   const finalExecution = await client.pollWorkflowExecution(
     workflowId,
     execution.id,
     {
-      timeoutMs: timeoutSeconds * 1000,
+      timeoutMs: deadline - Date.now(),
       pollIntervalMs: POLL_INTERVAL_MS,
       onPoll: async (exec, elapsedMs) => {
         if (!verbose) {
@@ -113,7 +138,74 @@ async function invokeWorkflow(
     },
   );
 
-  return finalExecution;
+  const outcome: WorkflowOutcome = {
+    workflowId,
+    executionId: finalExecution.id,
+    result: finalExecution.status,
+    repaired: false,
+    appIssue: false,
+    summary: finalExecution.summary,
+  };
+
+  if (finalExecution.status !== "failure") {
+    return outcome;
+  }
+
+  // The execution failed. Unless the account has auto-repair enabled, a failure
+  // is final — preserve the original fail-fast behavior.
+  if (!autoRepairEnabled) {
+    return outcome;
+  }
+
+  // Auto-repair only applies to deterministic workflows, so fail immediately
+  // for AI-driven ones rather than waiting for a repair that never comes.
+  const workflow = await client.getWorkflow(workflowId);
+  if (workflow.mode !== "deterministic") {
+    return outcome;
+  }
+
+  // If execution polling already consumed the deadline, treat the failure as
+  // final rather than entering the repair chain only to time out immediately.
+  const remainingMs = deadline - Date.now();
+  if (remainingMs <= 0) {
+    return outcome;
+  }
+
+  if (verbose) {
+    logForWorkflow(
+      workflowId,
+      "Execution failed; waiting for summarization/auto-repair to settle...",
+    );
+  }
+
+  const verdict = await client.pollWorkflowRepairChain(
+    workflowId,
+    finalExecution,
+    {
+      timeoutMs: remainingMs,
+      pollIntervalMs: POLL_INTERVAL_MS,
+      onPoll: (stage, elapsedMs) => {
+        if (!verbose) {
+          return;
+        }
+        logForWorkflow(
+          workflowId,
+          "Repair stage: %s (%s elapsed)",
+          stage,
+          formatElapsed(elapsedMs),
+        );
+      },
+    },
+  );
+
+  return {
+    workflowId,
+    executionId: verdict.executionId,
+    result: verdict.result,
+    repaired: verdict.result === "success",
+    appIssue: verdict.reason === "app_issue",
+    summary: verdict.summary ?? finalExecution.summary,
+  };
 }
 
 export function registerInvokeCommand(
@@ -235,6 +327,26 @@ export function registerInvokeCommand(
             process.exit(3);
           }
 
+          // When waiting, a failed execution may be auto-repaired before it
+          // counts as a real failure. Check the account setting once up front;
+          // if it can't be read, fall back to fail-fast behavior.
+          let autoRepairEnabled = false;
+          if (cmdOpts.wait) {
+            try {
+              const settings = await client.getSettings();
+              autoRepairEnabled =
+                settings.auto_repair_deterministic_workflows_enabled;
+            } catch (err) {
+              if (verbose) {
+                const message =
+                  err instanceof Error ? err.message : String(err);
+                console.error(
+                  `Warning: could not read settings (${message}); treating failures as final.`,
+                );
+              }
+            }
+          }
+
           const workflowExecutionPromises = workflowIds.map((workflowId) =>
             invokeWorkflow(
               client,
@@ -242,6 +354,7 @@ export function registerInvokeCommand(
               cmdOpts.wait ?? false,
               timeoutSeconds,
               verbose,
+              autoRepairEnabled,
             ),
           );
 
@@ -257,7 +370,7 @@ export function registerInvokeCommand(
             });
           }
 
-          let workflowExecutionResults: PromiseSettledResult<WorkflowExecutionResource>[] =
+          let workflowExecutionResults: PromiseSettledResult<WorkflowOutcome>[] =
             [];
           if (timeoutPromise) {
             const result = await Promise.race([
@@ -281,20 +394,30 @@ export function registerInvokeCommand(
           const cancelledWorkflowIds: string[] = [];
           for (const result of workflowExecutionResults) {
             if (result.status === "fulfilled") {
-              if (result.value.status === "success") {
-                console.log(
-                  `Workflow ${result.value.workflow_id} executed successfully. Execution ID: ${result.value.id}`,
-                );
-              } else if (result.value.status === "failure") {
+              const outcome = result.value;
+              if (outcome.result === "success") {
+                if (outcome.repaired) {
+                  console.log(
+                    `Workflow ${outcome.workflowId} failed but was auto-repaired and passed on re-run. Execution ID: ${outcome.executionId}`,
+                  );
+                } else {
+                  console.log(
+                    `Workflow ${outcome.workflowId} executed successfully. Execution ID: ${outcome.executionId}`,
+                  );
+                }
+              } else if (outcome.result === "failure") {
+                const label = outcome.appIssue
+                  ? "executed with failure (app issue)"
+                  : "executed with failure";
                 console.error(
-                  `Workflow ${result.value.workflow_id} executed with failure. Execution ID: ${result.value.id}. Summary: ${result.value.summary}`,
+                  `Workflow ${outcome.workflowId} ${label}. Execution ID: ${outcome.executionId}. Summary: ${outcome.summary}`,
                 );
-                failedWorkflowIds.push(result.value.workflow_id);
-              } else if (result.value.status === "cancelled") {
+                failedWorkflowIds.push(outcome.workflowId);
+              } else if (outcome.result === "cancelled") {
                 console.error(
-                  `Workflow ${result.value.workflow_id} was cancelled. Execution ID: ${result.value.id}`,
+                  `Workflow ${outcome.workflowId} was cancelled. Execution ID: ${outcome.executionId}`,
                 );
-                cancelledWorkflowIds.push(result.value.workflow_id);
+                cancelledWorkflowIds.push(outcome.workflowId);
               }
             } else {
               console.error(`Error: ${result.reason}`);
diff --git a/src/commands/skills.ts b/src/commands/skills.ts
new file mode 100644
index 0000000..41dd5e2
--- /dev/null
+++ b/src/commands/skills.ts
@@ -0,0 +1,45 @@
+import { spawn } from "node:child_process";
+import type { Command } from "commander";
+
+const SKILLS_PACKAGE = "getlark/skills";
+
+export function registerSkillsCommand(program: Command): void {
+  const skills = program
+    .command("skills")
+    .description(
+      "Install Lark Agent Skills (Claude Code plugin, Cursor, Codex, etc.)",
+    );
+
+  skills
+    .command("install")
+    .description(
+      `Install the Lark skills into the current project via \`npx skills add ${SKILLS_PACKAGE}\`. Works with Claude Code, Cursor, Codex, OpenCode, Windsurf, Gemini CLI, and Copilot.`,
+    )
+    .action(() => {
+      const cmdArgs = ["-y", "skills", "add", SKILLS_PACKAGE];
+
+      const child = spawn("npx", cmdArgs, {
+        stdio: "inherit",
+        shell: false,
+      });
+
+      child.on("error", (err) => {
+        const message = err instanceof Error ? err.message : String(err);
+        console.error(
+          `Error: Failed to run \`npx ${cmdArgs.join(" ")}\`: ${message}`,
+        );
+        console.error(
+          "Make sure Node.js (>= 18) and npx are installed and on your PATH.",
+        );
+        process.exit(1);
+      });
+
+      child.on("exit", (code, signal) => {
+        if (signal) {
+          process.kill(process.pid, signal);
+          return;
+        }
+        process.exit(code ?? 1);
+      });
+    });
+}
diff --git a/src/index.ts b/src/index.ts
index e39cd16..cee4909 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -15,6 +15,7 @@ import { registerEventsCommand } from "./commands/events.js";
 import { registerSecretContextsCommand } from "./commands/secret-contexts.js";
 import { registerWorkflowGroupsCommand } from "./commands/workflow-groups.js";
 import { registerJobsCommand } from "./commands/jobs.js";
+import { registerSkillsCommand } from "./commands/skills.js";
 import { registerLoginCommand } from "./commands/login.js";
 import { registerLogoutCommand } from "./commands/logout.js";
 import { registerConfigCommand } from "./commands/config.js";
@@ -61,6 +62,7 @@ registerEventsCommand(workflows, program);
 registerSecretContextsCommand(program);
 registerWorkflowGroupsCommand(program);
 registerJobsCommand(program);
+registerSkillsCommand(program);
 
 registerLoginCommand(program);
 registerLogoutCommand(program);

From c6b1fc15872e0005798ff10b3ae2cd59f77758ca Mon Sep 17 00:00:00 2001
From: Jack Brown <jackcbrown89@gmail.com>
Date: Fri, 29 May 2026 13:35:35 -0700
Subject: [PATCH 2/2] address feedback

---
 src/api/client.ts      | 58 ++++++++++++++++++++++++++++++------------
 src/commands/invoke.ts | 57 +++++++++++++++++++----------------------
 src/commands/skills.ts |  5 +++-
 3 files changed, 72 insertions(+), 48 deletions(-)

diff --git a/src/api/client.ts b/src/api/client.ts
index 9f44a36..87bddce 100644
--- a/src/api/client.ts
+++ b/src/api/client.ts
@@ -8,6 +8,7 @@ import type {
   JobResource,
   JobStatus,
   JobValidationReport,
+  ListedWorkflowEventResource,
   ListJobsResponse,
   ListSecretContextsResponse,
   ListWorkflowEventsResponse,
@@ -584,8 +585,13 @@ export class GetLarkClient {
     const startTime = Date.now();
 
     const terminalStatuses = new Set(["success", "failure", "cancelled"]);
-    const failedAt = new Date(failedExecution.created_at).getTime();
     const at = (ts: string | null) => (ts ? new Date(ts).getTime() : 0);
+    // The repair chain only starts once the execution actually fails, so scope
+    // the event window to the failure time (stopped_at), falling back to
+    // created_at only if the execution never recorded a stop time. Using
+    // created_at would widen the window to when the execution was triggered and
+    // can pull in unrelated events for long-running executions.
+    const failedAt = at(failedExecution.stopped_at ?? failedExecution.created_at);
 
     let stage: RepairChainStage = "summarization";
 
@@ -603,8 +609,33 @@ export class GetLarkClient {
       const elapsedMs = Date.now() - startTime;
       await onPoll?.(stage, elapsedMs);
 
-      const summ = chain.find((e) => e.event_type === "summarization");
-      if (summ && terminalStatuses.has(summ.status)) {
+      // There may be several terminal summarization events in the window (e.g.
+      // a newer chain that raced ahead, or stale ones). The oldest is not
+      // necessarily ours, so consider every candidate and match on
+      // workflow_execution_id rather than acting on the first one we find — and
+      // verify ownership BEFORE branching on status, so a failed summarization
+      // belonging to a different execution can't wrongly fail our chain.
+      let summ: ListedWorkflowEventResource | undefined;
+      let detail: WorkflowSummarizationResource | undefined;
+      for (const candidate of chain) {
+        if (
+          candidate.event_type !== "summarization" ||
+          !terminalStatuses.has(candidate.status)
+        ) {
+          continue;
+        }
+        const candidateDetail = await this.getWorkflowSummarization(
+          workflowId,
+          candidate.id,
+        );
+        if (candidateDetail.workflow_execution_id === failedExecution.id) {
+          summ = candidate;
+          detail = candidateDetail;
+          break;
+        }
+      }
+      // If we found OUR summarization, branch on its status.
+      if (summ && detail) {
         if (summ.status !== "success") {
           return {
             result: "failure",
@@ -613,18 +644,6 @@ export class GetLarkClient {
             summary: null,
           };
         }
-        const detail = await this.getWorkflowSummarization(workflowId, summ.id);
-        // Make sure this summarization is the one for our failed execution and
-        // not a newer one that raced ahead; if not, keep waiting for ours.
-        if (detail.workflow_execution_id !== failedExecution.id) {
-          if (elapsedMs >= timeoutMs) {
-            throw new TimeoutError(
-              `Timed out after ${Math.round(timeoutMs / 1000)}s waiting for repair of execution ${failedExecution.id} (stage: ${stage})`,
-            );
-          }
-          await this.sleep(pollIntervalMs);
-          continue;
-        }
         if (detail.category === "app_issue") {
           return {
             result: "failure",
@@ -667,11 +686,18 @@ export class GetLarkClient {
                 summary: detail.summary,
               };
             }
+            // Surface the re-execution's OWN failure summary rather than the
+            // summarization's repair-suggestion text, which describes the
+            // original failure and is misleading for a re-execution failure.
+            const reExecutionDetail = await this.getWorkflowExecution(
+              workflowId,
+              reExecution.id,
+            );
             return {
               result: "failure",
               reason: "reexecution_failed",
               executionId: reExecution.id,
-              summary: detail.summary,
+              summary: reExecutionDetail.summary,
             };
           }
         }
diff --git a/src/commands/invoke.ts b/src/commands/invoke.ts
index b752ef2..1aaef16 100644
--- a/src/commands/invoke.ts
+++ b/src/commands/invoke.ts
@@ -358,40 +358,17 @@ export function registerInvokeCommand(
             ),
           );
 
-          let timeoutPromise: Promise<void> | undefined;
-          if (cmdOpts.timeout) {
-            timeoutPromise = new Promise((resolve) => {
-              setTimeout(
-                () => {
-                  resolve();
-                },
-                parseInt(cmdOpts.timeout!, 10) * 1000,
-              );
-            });
-          }
-
-          let workflowExecutionResults: PromiseSettledResult<WorkflowOutcome>[] =
-            [];
-          if (timeoutPromise) {
-            const result = await Promise.race([
-              Promise.allSettled(workflowExecutionPromises),
-              timeoutPromise,
-            ]);
-            if (!result) {
-              console.error(
-                "Timed out waiting for workflow executions to complete",
-              );
-              process.exit(2);
-            }
-            workflowExecutionResults = result;
-          } else {
-            workflowExecutionResults = await Promise.allSettled(
-              workflowExecutionPromises,
-            );
-          }
+          // The per-execution deadline inside invokeWorkflow is authoritative
+          // and throws TimeoutError regardless of whether --timeout was passed,
+          // so there's no need for an outer race here.
+          const workflowExecutionResults = await Promise.allSettled(
+            workflowExecutionPromises,
+          );
 
           const failedWorkflowIds: string[] = [];
           const cancelledWorkflowIds: string[] = [];
+          let timedOut = false;
+          let unexpectedError = false;
           for (const result of workflowExecutionResults) {
             if (result.status === "fulfilled") {
               const outcome = result.value;
@@ -420,10 +397,22 @@ export function registerInvokeCommand(
                 cancelledWorkflowIds.push(outcome.workflowId);
               }
             } else {
+              if (result.reason instanceof TimeoutError) {
+                timedOut = true;
+              } else {
+                unexpectedError = true;
+              }
               console.error(`Error: ${result.reason}`);
             }
           }
 
+          // A timeout takes priority over other outcomes: the documented
+          // contract is exit code 2, and without this a timed-out --wait run
+          // would otherwise fall through to exit 0.
+          if (timedOut) {
+            process.exit(2);
+          }
+
           if (cancelledWorkflowIds.length > 0) {
             console.error(
               `Workflows cancelled: ${cancelledWorkflowIds.join(", ")}`,
@@ -441,6 +430,12 @@ export function registerInvokeCommand(
             process.exit(1);
           }
 
+          // A non-timeout rejection is an unexpected error; don't let it pass
+          // silently as success.
+          if (unexpectedError) {
+            process.exit(3);
+          }
+
           process.exit(0);
         } catch (error) {
           if (error instanceof TimeoutError) {
diff --git a/src/commands/skills.ts b/src/commands/skills.ts
index 41dd5e2..5d96d26 100644
--- a/src/commands/skills.ts
+++ b/src/commands/skills.ts
@@ -20,7 +20,10 @@ export function registerSkillsCommand(program: Command): void {
 
       const child = spawn("npx", cmdArgs, {
         stdio: "inherit",
-        shell: false,
+        // On Windows npx resolves to npx.cmd, which spawn can't find with
+        // shell: false (ENOENT). cmdArgs are static, so using the shell on
+        // Windows is safe; keep shell: false everywhere else.
+        shell: process.platform === "win32",
       });
 
       child.on("error", (err) => {