diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 4b6a0abb..4bbebe04 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -2239,7 +2239,7 @@ jobs: id: opencode_review_model_pool if: needs.coverage-evidence.result == 'success' continue-on-error: true - timeout-minutes: 20 + timeout-minutes: 320 env: STRIX_GITHUB_MODELS_TOKEN: ${{ secrets.STRIX_GITHUB_MODELS_TOKEN || github.token }} GITHUB_TOKEN: ${{ secrets.STRIX_GITHUB_MODELS_TOKEN || github.token }} @@ -2247,11 +2247,11 @@ jobs: SHARE: "false" NPM_CONFIG_IGNORE_SCRIPTS: "true" NO_COLOR: "1" - OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-nano" + OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-chat github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini github-models/mistral-ai/mistral-medium-2505 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct" OPENCODE_MODEL_ATTEMPTS: "1" - OPENCODE_RUN_TIMEOUT_SECONDS: "240" + OPENCODE_RUN_TIMEOUT_SECONDS: "18000" OPENCODE_EXPORT_TIMEOUT_SECONDS: "120" - OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360" + OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000" OPENCODE_BACKOFF_INITIAL_SECONDS: "30" OPENCODE_BACKOFF_MAX_SECONDS: "30" OPENCODE_FIRST_ATTEMPT_AGENT: ci-review @@ -4022,6 +4022,7 @@ jobs: local output_file="$1" local mode="$2" local jq_filter + local check_runs_json case "$mode" in failed) @@ -4054,11 +4055,19 @@ jobs: ;; esac - gh api -X GET "repos/${GH_REPOSITORY}/commits/${HEAD_SHA}/check-runs" \ + check_runs_json="$(mktemp)" + if ! gh api -X GET "repos/${GH_REPOSITORY}/commits/${HEAD_SHA}/check-runs" \ -f per_page=100 \ --paginate \ - --slurp | - jq -r "$jq_filter" >"$output_file" + --slurp >"$check_runs_json"; then + rm -f "$check_runs_json" + return 1 + fi + if ! jq -r "$jq_filter" "$check_runs_json" >"$output_file"; then + rm -f "$check_runs_json" + return 1 + fi + rm -f "$check_runs_json" } current_head_manual_strix_success_status() { diff --git a/scripts/ci/opencode_review_approve_gate.sh b/scripts/ci/opencode_review_approve_gate.sh index ae36f1f3..9262542b 100755 --- a/scripts/ci/opencode_review_approve_gate.sh +++ b/scripts/ci/opencode_review_approve_gate.sh @@ -1,6 +1,12 @@ #!/usr/bin/env bash set -euo pipefail +if command -v py >/dev/null 2>&1 && ! python3 -c 'import sys; sys.exit(0)' >/dev/null 2>&1; then + python3() { + py -3 "$@" + } +fi + if [ $# -ne 4 ] && [ $# -ne 5 ]; then echo "usage: $0 [normalized_json_file]" >&2 exit 64 diff --git a/scripts/ci/pr_review_fix_scheduler.py b/scripts/ci/pr_review_fix_scheduler.py index 97f1fd54..54cac961 100755 --- a/scripts/ci/pr_review_fix_scheduler.py +++ b/scripts/ci/pr_review_fix_scheduler.py @@ -20,7 +20,6 @@ is_opencode_review, review_matches_current_head, run, - unresolved_thread_count, ) except ModuleNotFoundError: from scripts.ci.pr_review_merge_scheduler import ( @@ -30,7 +29,6 @@ is_opencode_review, review_matches_current_head, run, - unresolved_thread_count, ) @@ -116,9 +114,6 @@ def needs_autofix(pr: dict[str, Any]) -> tuple[bool, tuple[str, ...]]: reasons: list[str] = [] if has_current_head_changes_requested(pr) and change_request_is_autofixable(pr): reasons.append("current-head OpenCode requested changes") - unresolved = unresolved_thread_count(pr) - if unresolved: - reasons.append(f"{unresolved} active unresolved review thread(s)") return bool(reasons), tuple(reasons) @@ -209,7 +204,7 @@ def inspect_pr( needs_fix, reasons = needs_autofix(pr) if not needs_fix: - return "skip", ("no current-head change request or active unresolved review thread",) + return "skip", ("no autofixable current-head OpenCode change request",) if comments is None: comments = issue_comments(repo, number) @@ -344,8 +339,30 @@ def self_test() -> int: } ] }, + "reviewThreads": { + "nodes": [ + { + "isResolved": False, + "isOutdated": False, + "comments": { + "nodes": [ + { + "author": {"login": "copilot-pull-request-reviewer"}, + "path": "tools/validate_dom.py", + } + ] + }, + } + ] + }, } assert needs_autofix(unresolved_thread_pr) == (False, ()) + unresolved_only_pr = { + **pr, + "reviews": {"nodes": []}, + "reviewThreads": unresolved_thread_pr["reviewThreads"], + } + assert needs_autofix(unresolved_only_pr) == (False, ()) print("self-test passed") return 0 diff --git a/scripts/ci/test_strix_quick_gate.sh b/scripts/ci/test_strix_quick_gate.sh index ffea57cd..9c480347 100755 --- a/scripts/ci/test_strix_quick_gate.sh +++ b/scripts/ci/test_strix_quick_gate.sh @@ -15,6 +15,12 @@ GATE_SCRIPT="$REPO_ROOT/scripts/ci/strix_quick_gate.sh" FAILURES=0 +if command -v py >/dev/null 2>&1 && ! python3 -c 'import sys; sys.exit(0)' >/dev/null 2>&1; then + python3() { + py -3 "$@" + } +fi + record_failure() { echo "FAIL: $1" >&2 FAILURES=$((FAILURES + 1)) @@ -508,8 +514,9 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "Read and follow the complete review contract" "opencode review uses a compact launcher while keeping the full review contract on disk" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "tokens_limit_reached" "opencode review detects provider context-window overflow" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "skipping remaining attempts for this model" "opencode review skips same-model retries after context-window overflow" - assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode primary review has a bounded per-model timeout before trying fallback models" - assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "3600"' "opencode model pool has a one-hour total retry budget" + assert_file_contains "$workflow_file" 'timeout-minutes: 320' "opencode model pool has enough wall-clock budget for large codebase reviews" + assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' "opencode primary review has a five-hour per-model timeout for large codebase reviews" + assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000"' "opencode model pool has a five-hour total retry budget" assert_file_contains "$workflow_file" "needs.coverage-evidence.result == 'success'" "opencode model pool only runs after coverage evidence passed" assert_file_contains "$workflow_file" "id: opencode_review_model_pool" "opencode DeepSeek V3 fallback still runs after a primary model timeout or step failure when coverage evidence passed" assert_file_contains "$workflow_file" "always()" "opencode fallback chain uses always() so failed model steps cannot skip every fallback" @@ -618,7 +625,7 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$workflow_file" "no model produced a valid review control block" "opencode model-failure path documents why approval is withheld" assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode primary and fallback paths avoid multi-attempt stalls on one model" assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode catalog fallback tries each model once before moving on" - assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode catalog fallback has a bounded model review timeout before step timeout" + assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' "opencode catalog fallback has a bounded model review timeout before step timeout" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "OpenCode %s attempt %s/%s failed" "opencode catalog fallback records per-model retry failures" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "exponential backoff" "opencode model retry paths use exponential backoff instead of fixed sleeps" assert_file_contains "$workflow_file" "github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini" "opencode review includes additional OpenAI reasoning model fallbacks" @@ -691,6 +698,8 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$workflow_file" 'collect_current_head_commit_check_runs()' "opencode approval falls back to current-head commit check-runs when PR rollup lags" assert_file_contains "$workflow_file" 'commits/${HEAD_SHA}/check-runs' "opencode approval queries current-head commit check-runs before changing review state" assert_file_contains "$workflow_file" '--slurp' "opencode approval aggregates paginated commit check-runs before classifying them" + assert_file_contains "$workflow_file" 'jq -r "$jq_filter" "$check_runs_json"' "opencode approval classifies slurped commit check-runs with jq after gh api output is written" + assert_file_not_contains "$workflow_file" '--jq "$jq_filter"' "opencode approval avoids gh api --slurp with --jq, which older gh versions reject" assert_file_contains "$workflow_file" 'group_by(.name // "")' "opencode approval keeps only the latest same-name commit check-run" assert_file_contains "$workflow_file" 'map(last)' "opencode approval ignores superseded same-name commit check-runs" assert_file_contains "$workflow_file" 'collect_current_head_commit_check_runs "$commit_check_runs_file" pending' "opencode approval blocks approval on pending commit check-runs omitted from PR rollup" diff --git a/tests/test_opencode_agent_contract.py b/tests/test_opencode_agent_contract.py index c5dd7a9b..7110b884 100644 --- a/tests/test_opencode_agent_contract.py +++ b/tests/test_opencode_agent_contract.py @@ -211,17 +211,19 @@ def test_workflow_provisions_sandbox_tool_and_reviewer_agent(): assert '"## Check outcome"' not in workflow assert "publish REQUEST_CHANGES when coverage-evidence blocker states" in workflow assert 'timeout-minutes: 75' in workflow - assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 20", workflow) + assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 320", workflow) assert 'APPROVAL_CHECK_WAIT_ATTEMPTS: "81"' in workflow assert 'APPROVAL_CHECK_WAIT_SLEEP_SECONDS: "30"' in workflow - assert 'OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-nano"' in workflow + assert "github-models/openai/gpt-5-chat github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano" in workflow + assert "github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini" in workflow assert 'OPENCODE_MODEL_ATTEMPTS: "1"' in workflow - assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "240"' in workflow + assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "18000"' in workflow assert 'OPENCODE_EXPORT_TIMEOUT_SECONDS: "120"' in workflow - assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360"' in workflow + assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "18000"' in workflow assert 'OPENCODE_BACKOFF_MAX_SECONDS: "30"' in workflow assert "${{ runner.temp }}/opencode-review-model-pool.md" in workflow - assert re.search(r'check-runs" \\\n\s+-f per_page=100 \\\n\s+--paginate \\\n\s+--slurp \|\n\s+jq -r "\$jq_filter"', workflow) + assert re.search(r'check-runs" \\\n\s+-f per_page=100 \\\n\s+--paginate \\\n\s+--slurp >"\$check_runs_json"', workflow) + assert 'jq -r "$jq_filter" "$check_runs_json"' in workflow assert not re.search(r"--slurp\s*\\\n\s*--jq", workflow) assert "falling back to current-head REST check-runs" in workflow diff --git a/tests/test_pr_review_fix_scheduler.py b/tests/test_pr_review_fix_scheduler.py index a838147a..91bc4856 100644 --- a/tests/test_pr_review_fix_scheduler.py +++ b/tests/test_pr_review_fix_scheduler.py @@ -38,7 +38,7 @@ def test_recent_fix_marker_is_head_scoped(): def test_needs_autofix_uses_current_head_evidence(): - """Autofix only starts from current-head review or thread evidence.""" + """Autofix only starts from current-head OpenCode review evidence.""" head = "a" * 40 pr = make_pr( headRefOid=head, @@ -58,7 +58,7 @@ def test_needs_autofix_uses_current_head_evidence(): assert fix.needs_autofix(pr) == ( True, - ("current-head OpenCode requested changes", "1 active unresolved review thread(s)"), + ("current-head OpenCode requested changes",), ) @@ -381,7 +381,7 @@ def test_fix_inspect_skip_wait_and_error_paths(monkeypatch): ) monkeypatch.setattr(fix, "needs_autofix", lambda pr: (False, ())) - assert fix.inspect_pr("owner/repo", make_pr(), args) == ("skip", ("no current-head change request or active unresolved review thread",)) + assert fix.inspect_pr("owner/repo", make_pr(), args) == ("skip", ("no autofixable current-head OpenCode change request",)) monkeypatch.setattr(fix, "needs_autofix", lambda pr: (True, ("reason",))) monkeypatch.setattr(fix, "issue_comments", lambda repo, number: [{"body": f"{fix.FIX_MARKER} head_sha={'a' * 40} epoch={int(time.time())} -->"}])