Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,27 @@ Lark ships [Agent Skills](https://docs.getlark.ai/agents) that teach Claude Code
npx skills add getlark/skills
```

Or, equivalently, via the `getlark` CLI:

```bash
getlark skills install
```

Then ask the agent to run the `setup` skill. See the [Agents docs](https://docs.getlark.ai/agents) for the full skill catalog and the opt-in `PostToolUse` hook that validates your branch after every `git commit` or `git push`.

## CI Pipeline Usage

The `--wait` flag makes it easy to use in CI pipelines. The command will block until the workflow completes and exit with a non-zero code on failure.

If your account has auto-repair enabled for deterministic workflows, a failed execution is not counted as a failure right away. Instead `--wait` follows the repair chain before deciding the exit code:

- If the failure summarization classifies it as an **app issue**, the workflow **fails**.
- If the repair job itself **fails**, the workflow **fails**.
- If the repair succeeds and the automatic re-run **passes**, the workflow **passes** (the test self-healed) and is reported as auto-repaired.
- If the repair succeeds but the re-run **fails**, the workflow **fails**.

Workflows in `ai_driven` mode, and accounts without auto-repair enabled, keep the previous fail-fast behavior. The `--timeout` covers the whole wait, including any repair chain.

### GitHub Actions Example

Set the `GETLARK_API_KEY` environment variable in GitHub Actions secrets.
Expand Down Expand Up @@ -230,8 +245,8 @@ getlark workflows invoke --group-name "Checkout Flow" --wait
| `--all` | Invoke all workflows |
| `--group-id <groupId>` | Invoke all workflows in a group (by group ID) |
| `--group-name <groupName>` | Invoke all workflows in a group (by group name) |
| `--wait` | Wait for the execution to finish (successfully or unsuccessfully) before exiting |
| `--timeout <seconds>` | Maximum time to wait in seconds (default: 600, requires `--wait`) |
| `--wait` | Wait for the execution to finish before exiting. When auto-repair is enabled, also waits out the repair chain for failed deterministic workflows (see [CI Pipeline Usage](#ci-pipeline-usage)) |
| `--timeout <seconds>` | Maximum time to wait in seconds (default: 600, requires `--wait`); covers the repair chain too |
| `--verbose` | Print verbose output (includes logs) |

One of `--workflow-ids`, `--all`, `--group-id`, or `--group-name` is required.
Expand Down
210 changes: 206 additions & 4 deletions src/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ import type {
JobResource,
JobStatus,
JobValidationReport,
ListedWorkflowEventResource,
ListJobsResponse,
ListSecretContextsResponse,
ListWorkflowEventsResponse,
ListWorkflowGroupsResponse,
ListWorkflowRepairsResponse,
ListWorkflowsResponse,
SettingsResource,
WorkflowExecutionResource,
WorkflowGenerationResource,
WorkflowGroupResource,
WorkflowRepairResource,
WorkflowResource,
WorkflowSummarizationResource,
} from "./types.js";

export class TimeoutError extends Error {
Expand All @@ -37,6 +40,38 @@ export interface PollOptions {
) => void | Promise<void>;
}

/** The stage of the post-failure repair chain currently being awaited. */
export type RepairChainStage = "summarization" | "repair" | "re-execution";

export interface RepairChainPollOptions {
timeoutMs: number;
pollIntervalMs: number;
onPoll?: (stage: RepairChainStage, elapsedMs: number) => void | Promise<void>;
}

/**
* The verdict of waiting out the auto-repair chain that follows a failed
* execution. `repaired` means the test self-healed (repair succeeded and the
* re-run passed); every `failure` reason counts as a genuine failure.
*/
export type RepairChainOutcome =
| {
result: "success";
reason: "repaired";
executionId: string;
summary: string | null;
}
| {
result: "failure";
reason:
| "app_issue"
| "summarization_failed"
| "repair_failed"
| "reexecution_failed";
executionId: string;
summary: string | null;
};

export class GetLarkClient {
private baseUrl: string;
private apiKey: string;
Expand Down Expand Up @@ -312,6 +347,24 @@ export class GetLarkClient {
return this.request<ListWorkflowEventsResponse>("GET", path);
}

// ── Summarizations ─────────────────────────────────────────

async getWorkflowSummarization(
workflowId: string,
summarizationId: string,
): Promise<WorkflowSummarizationResource> {
return this.request<WorkflowSummarizationResource>(
"GET",
`/workflows/${workflowId}/summarizations/${summarizationId}`,
);
}

// ── Settings ───────────────────────────────────────────────

async getSettings(): Promise<SettingsResource> {
return this.request<SettingsResource>("GET", "/settings");
}

// ── Secret Contexts ────────────────────────────────────────

async listSecretContexts(): Promise<ListSecretContextsResponse> {
Expand Down Expand Up @@ -475,6 +528,10 @@ export class GetLarkClient {

// ── Polling ────────────────────────────────────────────────

private sleep(ms: number): Promise<void> {
return new Promise<void>((resolve) => setTimeout(resolve, ms));
}

async pollWorkflowExecution(
workflowId: string,
executionId: string,
Expand All @@ -483,9 +540,6 @@ export class GetLarkClient {
const { timeoutMs, pollIntervalMs, onPoll } = options;
const startTime = Date.now();

const sleep = (ms: number) =>
new Promise<void>((resolve) => setTimeout(resolve, ms));

const terminalStatuses = new Set(["success", "failure", "cancelled"]);

while (true) {
Expand All @@ -507,7 +561,155 @@ export class GetLarkClient {
);
}

await sleep(pollIntervalMs);
await this.sleep(pollIntervalMs);
}
}

/**
* After an execution fails, an account with auto-repair enabled will run a
* summarization, an optional repair, and a follow-up re-execution. This
* follows that chain via the workflow event timeline and returns a verdict:
*
* summarization "app_issue" → failure (genuine app defect)
* summarization not successful → failure (inconclusive)
* repair not successful → failure
* re-execution failure → failure
* re-execution success → success (test self-healed)
*/
async pollWorkflowRepairChain(
workflowId: string,
failedExecution: WorkflowExecutionResource,
options: RepairChainPollOptions,
): Promise<RepairChainOutcome> {
const { timeoutMs, pollIntervalMs, onPoll } = options;
const startTime = Date.now();

const terminalStatuses = new Set(["success", "failure", "cancelled"]);
const at = (ts: string | null) => (ts ? new Date(ts).getTime() : 0);
// The repair chain only starts once the execution actually fails, so scope
// the event window to the failure time (stopped_at), falling back to
// created_at only if the execution never recorded a stop time. Using
// created_at would widen the window to when the execution was triggered and
// can pull in unrelated events for long-running executions.
const failedAt = at(failedExecution.stopped_at ?? failedExecution.created_at);

let stage: RepairChainStage = "summarization";

while (true) {
// Events are returned newest-first; reorder the events that belong to
// this failure (created after the failed execution) oldest-first so we
// can walk the chain in the order it happened.
const { workflow_events } = await this.listWorkflowEvents(workflowId, {
limit: 50,
});
const chain = workflow_events
.filter((e) => at(e.created_at) > failedAt)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strict > filter misses same-timestamp summarization events

Medium Severity

The event filter at at(e.created_at) > failedAt uses strict greater-than, while the subsequent searches for repair and re-execution events (lines 660, 678) use non-strict >=. If the backend creates the summarization event at the exact same timestamp as the execution's stopped_at (e.g., triggered synchronously or within the same clock tick), the filter permanently excludes it. Every poll re-fetches and re-filters with the same strict >, so the summarization is never found, causing the chain to spin until timeout (exit code 2) instead of following the repair path.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit c6b1fc1. Configure here.

.sort((a, b) => at(a.created_at) - at(b.created_at));

const elapsedMs = Date.now() - startTime;
await onPoll?.(stage, elapsedMs);

// There may be several terminal summarization events in the window (e.g.
// a newer chain that raced ahead, or stale ones). The oldest is not
// necessarily ours, so consider every candidate and match on
// workflow_execution_id rather than acting on the first one we find — and
// verify ownership BEFORE branching on status, so a failed summarization
// belonging to a different execution can't wrongly fail our chain.
let summ: ListedWorkflowEventResource | undefined;
let detail: WorkflowSummarizationResource | undefined;
for (const candidate of chain) {
if (
candidate.event_type !== "summarization" ||
!terminalStatuses.has(candidate.status)
) {
continue;
}
const candidateDetail = await this.getWorkflowSummarization(
workflowId,
candidate.id,
);
if (candidateDetail.workflow_execution_id === failedExecution.id) {
summ = candidate;
detail = candidateDetail;
break;
}
}
// If we found OUR summarization, branch on its status.
if (summ && detail) {
if (summ.status !== "success") {
return {
result: "failure",
reason: "summarization_failed",
executionId: failedExecution.id,
summary: null,
};
Comment thread
cursor[bot] marked this conversation as resolved.
}
if (detail.category === "app_issue") {
return {
result: "failure",
reason: "app_issue",
executionId: failedExecution.id,
summary: detail.summary,
};
}
// A test-side issue: an auto-repair should follow.
stage = "repair";

const repair = chain.find(
(e) =>
e.event_type === "repair" && at(e.created_at) >= at(summ.created_at),
);
if (repair && terminalStatuses.has(repair.status)) {
if (repair.status !== "success") {
return {
result: "failure",
reason: "repair_failed",
executionId: failedExecution.id,
summary: null,
};
}
// The repair succeeded; the backend re-runs the test.
stage = "re-execution";

const reExecution = chain.find(
(e) =>
e.event_type === "execution" &&
repair.stopped_at !== null &&
at(e.created_at) >= at(repair.stopped_at),
);
if (reExecution && terminalStatuses.has(reExecution.status)) {
if (reExecution.status === "success") {
return {
result: "success",
reason: "repaired",
executionId: reExecution.id,
summary: detail.summary,
};
}
// Surface the re-execution's OWN failure summary rather than the
// summarization's repair-suggestion text, which describes the
// original failure and is misleading for a re-execution failure.
const reExecutionDetail = await this.getWorkflowExecution(
workflowId,
reExecution.id,
);
return {
result: "failure",
reason: "reexecution_failed",
executionId: reExecution.id,
summary: reExecutionDetail.summary,
};
}
}
}

if (elapsedMs >= timeoutMs) {
throw new TimeoutError(
`Timed out after ${Math.round(timeoutMs / 1000)}s waiting for repair of execution ${failedExecution.id} (stage: ${stage})`,
);
}

await this.sleep(pollIntervalMs);
}
}
}
34 changes: 32 additions & 2 deletions src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ export interface WorkflowResource {
last_repair_started_at: string | null;
last_repair_stopped_at: string | null;
last_repair_result_type: "success" | "failure" | "cancelled" | null;
last_summarization_id: string | null;
last_summarization_started_at: string | null;
last_summarization_stopped_at: string | null;
last_summarization_result_type: "success" | "failure" | "cancelled" | null;
schedule: string | null;
group_id: string | null;
next_execution_at: string | null;
Expand Down Expand Up @@ -63,7 +67,8 @@ export interface WorkflowArtifactResource {
| "video"
| "javascript"
| "python"
| "shellscript";
| "shellscript"
| "other";
filename: string;
presigned_url: string;
presigned_url_expires_at: string;
Expand Down Expand Up @@ -130,6 +135,24 @@ export interface WorkflowRepairResource {
updated_at: string;
}

export interface WorkflowSummarizationResource {
id: string;
workflow_id: string;
workflow_execution_id: string;
status: "pending" | "running" | "success" | "failure" | "cancelled";
// "app_issue" means the failure is a genuine defect in the app under test;
// any other category (e.g. "test_issue") indicates a test-side issue that
// an auto-repair may be able to fix.
category: "test_issue" | "app_issue";
started_at: string | null;
stopped_at: string | null;
summary: string | null;
secret_contexts: string[] | null;
artifacts: WorkflowArtifactResource[];
created_at: string;
updated_at: string;
}

export interface ListedWorkflowRepairResource {
id: string;
workflow_id: string;
Expand All @@ -149,7 +172,7 @@ export interface ListWorkflowRepairsResponse {
export interface ListedWorkflowEventResource {
id: string;
workflow_id: string;
event_type: "generation" | "execution" | "repair";
event_type: "generation" | "execution" | "repair" | "summarization";
status: "pending" | "running" | "success" | "failure" | "cancelled";
started_at: string | null;
stopped_at: string | null;
Expand All @@ -174,6 +197,13 @@ export interface ListWorkflowGroupsResponse {
has_more: boolean;
}

export interface SettingsResource {
tasks_enabled: boolean;
deterministic_workflow_enabled: boolean;
auto_repair_deterministic_workflows_enabled: boolean;
qa_report_enabled: boolean;
}

export type JobType = "workflow_import";

export type JobStatus =
Expand Down
Loading