Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions .github/workflows/opencode-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2239,19 +2239,19 @@ jobs:
id: opencode_review_model_pool
if: needs.coverage-evidence.result == 'success'
continue-on-error: true
timeout-minutes: 20
timeout-minutes: 320
env:
STRIX_GITHUB_MODELS_TOKEN: ${{ secrets.STRIX_GITHUB_MODELS_TOKEN || github.token }}
GITHUB_TOKEN: ${{ secrets.STRIX_GITHUB_MODELS_TOKEN || github.token }}
USE_GITHUB_TOKEN: "true"
SHARE: "false"
NPM_CONFIG_IGNORE_SCRIPTS: "true"
NO_COLOR: "1"
OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-nano"
OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-chat github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini github-models/mistral-ai/mistral-medium-2505 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct"
OPENCODE_MODEL_ATTEMPTS: "1"
OPENCODE_RUN_TIMEOUT_SECONDS: "240"
OPENCODE_RUN_TIMEOUT_SECONDS: "18000"
OPENCODE_EXPORT_TIMEOUT_SECONDS: "120"
OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360"
OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000"
OPENCODE_BACKOFF_INITIAL_SECONDS: "30"
OPENCODE_BACKOFF_MAX_SECONDS: "30"
OPENCODE_FIRST_ATTEMPT_AGENT: ci-review
Expand Down Expand Up @@ -4022,6 +4022,7 @@ jobs:
local output_file="$1"
local mode="$2"
local jq_filter
local check_runs_json

case "$mode" in
failed)
Expand Down Expand Up @@ -4054,11 +4055,19 @@ jobs:
;;
esac

gh api -X GET "repos/${GH_REPOSITORY}/commits/${HEAD_SHA}/check-runs" \
check_runs_json="$(mktemp)"
if ! gh api -X GET "repos/${GH_REPOSITORY}/commits/${HEAD_SHA}/check-runs" \
-f per_page=100 \
--paginate \
--slurp |
jq -r "$jq_filter" >"$output_file"
--slurp >"$check_runs_json"; then
rm -f "$check_runs_json"
return 1
fi
if ! jq -r "$jq_filter" "$check_runs_json" >"$output_file"; then
rm -f "$check_runs_json"
return 1
fi
rm -f "$check_runs_json"
}

current_head_manual_strix_success_status() {
Expand Down
6 changes: 6 additions & 0 deletions scripts/ci/opencode_review_approve_gate.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail

if command -v py >/dev/null 2>&1 && ! python3 -c 'import sys; sys.exit(0)' >/dev/null 2>&1; then
python3() {
py -3 "$@"
}
fi

if [ $# -ne 4 ] && [ $# -ne 5 ]; then
echo "usage: $0 <expected_head_sha> <expected_run_id> <expected_run_attempt> <comment_body_file> [normalized_json_file]" >&2
exit 64
Expand Down
29 changes: 23 additions & 6 deletions scripts/ci/pr_review_fix_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
is_opencode_review,
review_matches_current_head,
run,
unresolved_thread_count,
)
except ModuleNotFoundError:
from scripts.ci.pr_review_merge_scheduler import (
Expand All @@ -30,7 +29,6 @@
is_opencode_review,
review_matches_current_head,
run,
unresolved_thread_count,
)


Expand Down Expand Up @@ -116,9 +114,6 @@ def needs_autofix(pr: dict[str, Any]) -> tuple[bool, tuple[str, ...]]:
reasons: list[str] = []
if has_current_head_changes_requested(pr) and change_request_is_autofixable(pr):
reasons.append("current-head OpenCode requested changes")
unresolved = unresolved_thread_count(pr)
if unresolved:
reasons.append(f"{unresolved} active unresolved review thread(s)")
return bool(reasons), tuple(reasons)


Expand Down Expand Up @@ -209,7 +204,7 @@ def inspect_pr(

needs_fix, reasons = needs_autofix(pr)
if not needs_fix:
return "skip", ("no current-head change request or active unresolved review thread",)
return "skip", ("no autofixable current-head OpenCode change request",)
Comment thread
seonghobae marked this conversation as resolved.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HIGH OpenCode could not establish approval sufficiency

  • Problem: the model pool exhausted without a valid current-head review control block, so this changed line cannot be approved from deterministic check state alone.
  • Impact: PR-intent mismatches, missing files, robustness bugs, UX/DX regressions, and CodeGraph-backed flow changes could be missed.
  • Fix: rerun OpenCode after model availability recovers, or add the missing source/test/docs/generated verification evidence needed for a source-backed approval.
  • Verification: rerun the OpenCode Review workflow and confirm it emits APPROVE or source-backed REQUEST_CHANGES for this head SHA.


if comments is None:
comments = issue_comments(repo, number)
Expand Down Expand Up @@ -344,8 +339,30 @@ def self_test() -> int:
}
]
},
"reviewThreads": {
"nodes": [
{
"isResolved": False,
"isOutdated": False,
"comments": {
"nodes": [
{
"author": {"login": "copilot-pull-request-reviewer"},
"path": "tools/validate_dom.py",
}
]
},
}
]
},
}
assert needs_autofix(unresolved_thread_pr) == (False, ())
unresolved_only_pr = {
**pr,
"reviews": {"nodes": []},
"reviewThreads": unresolved_thread_pr["reviewThreads"],
}
assert needs_autofix(unresolved_only_pr) == (False, ())
print("self-test passed")
return 0

Expand Down
15 changes: 12 additions & 3 deletions scripts/ci/test_strix_quick_gate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ GATE_SCRIPT="$REPO_ROOT/scripts/ci/strix_quick_gate.sh"

FAILURES=0

if command -v py >/dev/null 2>&1 && ! python3 -c 'import sys; sys.exit(0)' >/dev/null 2>&1; then
python3() {
py -3 "$@"
}
fi

record_failure() {
echo "FAIL: $1" >&2
FAILURES=$((FAILURES + 1))
Expand Down Expand Up @@ -508,8 +514,9 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() {
assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "Read and follow the complete review contract" "opencode review uses a compact launcher while keeping the full review contract on disk"
assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "tokens_limit_reached" "opencode review detects provider context-window overflow"
assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "skipping remaining attempts for this model" "opencode review skips same-model retries after context-window overflow"
assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode primary review has a bounded per-model timeout before trying fallback models"
assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "3600"' "opencode model pool has a one-hour total retry budget"
assert_file_contains "$workflow_file" 'timeout-minutes: 320' "opencode model pool has enough wall-clock budget for large codebase reviews"
assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' "opencode primary review has a five-hour per-model timeout for large codebase reviews"
assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000"' "opencode model pool has a five-hour total retry budget"
assert_file_contains "$workflow_file" "needs.coverage-evidence.result == 'success'" "opencode model pool only runs after coverage evidence passed"
assert_file_contains "$workflow_file" "id: opencode_review_model_pool" "opencode DeepSeek V3 fallback still runs after a primary model timeout or step failure when coverage evidence passed"
assert_file_contains "$workflow_file" "always()" "opencode fallback chain uses always() so failed model steps cannot skip every fallback"
Expand Down Expand Up @@ -618,7 +625,7 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() {
assert_file_contains "$workflow_file" "no model produced a valid review control block" "opencode model-failure path documents why approval is withheld"
assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode primary and fallback paths avoid multi-attempt stalls on one model"
assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode catalog fallback tries each model once before moving on"
assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode catalog fallback has a bounded model review timeout before step timeout"
assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' "opencode catalog fallback has a bounded model review timeout before step timeout"
assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "OpenCode %s attempt %s/%s failed" "opencode catalog fallback records per-model retry failures"
assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "exponential backoff" "opencode model retry paths use exponential backoff instead of fixed sleeps"
assert_file_contains "$workflow_file" "github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini" "opencode review includes additional OpenAI reasoning model fallbacks"
Expand Down Expand Up @@ -691,6 +698,8 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() {
assert_file_contains "$workflow_file" 'collect_current_head_commit_check_runs()' "opencode approval falls back to current-head commit check-runs when PR rollup lags"
assert_file_contains "$workflow_file" 'commits/${HEAD_SHA}/check-runs' "opencode approval queries current-head commit check-runs before changing review state"
assert_file_contains "$workflow_file" '--slurp' "opencode approval aggregates paginated commit check-runs before classifying them"
assert_file_contains "$workflow_file" 'jq -r "$jq_filter" "$check_runs_json"' "opencode approval classifies slurped commit check-runs with jq after gh api output is written"
assert_file_not_contains "$workflow_file" '--jq "$jq_filter"' "opencode approval avoids gh api --slurp with --jq, which older gh versions reject"
assert_file_contains "$workflow_file" 'group_by(.name // "")' "opencode approval keeps only the latest same-name commit check-run"
assert_file_contains "$workflow_file" 'map(last)' "opencode approval ignores superseded same-name commit check-runs"
assert_file_contains "$workflow_file" 'collect_current_head_commit_check_runs "$commit_check_runs_file" pending' "opencode approval blocks approval on pending commit check-runs omitted from PR rollup"
Expand Down
12 changes: 7 additions & 5 deletions tests/test_opencode_agent_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,17 +211,19 @@ def test_workflow_provisions_sandbox_tool_and_reviewer_agent():
assert '"## Check outcome"' not in workflow
assert "publish REQUEST_CHANGES when coverage-evidence blocker states" in workflow
assert 'timeout-minutes: 75' in workflow
assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 20", workflow)
assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 320", workflow)
assert 'APPROVAL_CHECK_WAIT_ATTEMPTS: "81"' in workflow
assert 'APPROVAL_CHECK_WAIT_SLEEP_SECONDS: "30"' in workflow
assert 'OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-nano"' in workflow
assert "github-models/openai/gpt-5-chat github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano" in workflow
assert "github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini" in workflow
assert 'OPENCODE_MODEL_ATTEMPTS: "1"' in workflow
assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "240"' in workflow
assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' in workflow
assert 'OPENCODE_EXPORT_TIMEOUT_SECONDS: "120"' in workflow
assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360"' in workflow
assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000"' in workflow
assert 'OPENCODE_BACKOFF_MAX_SECONDS: "30"' in workflow
assert "${{ runner.temp }}/opencode-review-model-pool.md" in workflow
assert re.search(r'check-runs" \\\n\s+-f per_page=100 \\\n\s+--paginate \\\n\s+--slurp \|\n\s+jq -r "\$jq_filter"', workflow)
assert re.search(r'check-runs" \\\n\s+-f per_page=100 \\\n\s+--paginate \\\n\s+--slurp >"\$check_runs_json"', workflow)
assert 'jq -r "$jq_filter" "$check_runs_json"' in workflow
assert not re.search(r"--slurp\s*\\\n\s*--jq", workflow)
assert "falling back to current-head REST check-runs" in workflow

Expand Down
6 changes: 3 additions & 3 deletions tests/test_pr_review_fix_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_recent_fix_marker_is_head_scoped():


def test_needs_autofix_uses_current_head_evidence():
"""Autofix only starts from current-head review or thread evidence."""
"""Autofix only starts from current-head OpenCode review evidence."""
head = "a" * 40
pr = make_pr(
headRefOid=head,
Expand All @@ -58,7 +58,7 @@ def test_needs_autofix_uses_current_head_evidence():

assert fix.needs_autofix(pr) == (
True,
("current-head OpenCode requested changes", "1 active unresolved review thread(s)"),
("current-head OpenCode requested changes",),
)


Expand Down Expand Up @@ -381,7 +381,7 @@ def test_fix_inspect_skip_wait_and_error_paths(monkeypatch):
)

monkeypatch.setattr(fix, "needs_autofix", lambda pr: (False, ()))
assert fix.inspect_pr("owner/repo", make_pr(), args) == ("skip", ("no current-head change request or active unresolved review thread",))
assert fix.inspect_pr("owner/repo", make_pr(), args) == ("skip", ("no autofixable current-head OpenCode change request",))

monkeypatch.setattr(fix, "needs_autofix", lambda pr: (True, ("reason",)))
monkeypatch.setattr(fix, "issue_comments", lambda repo, number: [{"body": f"{fix.FIX_MARKER} head_sha={'a' * 40} epoch={int(time.time())} -->"}])
Expand Down
Loading