From 38ac3ebe6f2ad5c6a2b6a697a343a9314ee20a8a Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Sun, 5 Jul 2026 19:25:50 +0900 Subject: [PATCH] fix(review): order review models by GitHub Models quota, not gpt-5 first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #295 put openai/gpt-5 first in OPENCODE_MODEL_CANDIDATES. On GitHub Models, gpt-5/o3 are the "Reasoning" tier with the smallest quota (8-12 requests/day, 1-2/min) and hang or rate-limit constantly, so with gpt-5 first the pool burned the entire 350-min step on a stalled flagship and never fell back — reviews ran ~6 h and failed org-wide (observed: appguardrail review stuck >5 h in the model pool step, PRs unmergeable). Reorder candidates by quota allowance, largest first: non-reasoning "Low" tier (deepseek-v3, mistral-medium, llama-4: 150-450 req/day) then mini-reasoning (o4-mini, o3-mini, gpt-5-mini/nano/chat) then DeepSeek-R1 then the flagships (o3, gpt-5) last as quality fallback. Only the candidate order changes; ATTEMPTS(5), RUN_TIMEOUT(20400), step timeout(350) are left as-is per request. Reconcile test_opencode_agent_contract.py with the new order and with the already-current #295 settings it still asserted stale (ATTEMPTS 1->5, RUN_TIMEOUT 600->20400, step 285->350) so the guard test is green and accurate again. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01RTAMs4bpSZS77Xe3RQjv9P --- .github/workflows/opencode-review.yml | 9 ++++++++- tests/test_opencode_agent_contract.py | 27 ++++++++++++++------------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index a32b380..c8d6f96 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -2280,7 +2280,14 @@ jobs: SHARE: "false" NPM_CONFIG_IGNORE_SCRIPTS: "true" NO_COLOR: "1" - OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5 github-models/openai/gpt-5-chat github-models/deepseek/deepseek-v3-0324 github-models/openai/o3 github-models/deepseek/deepseek-r1 github-models/openai/o4-mini github-models/openai/o3-mini github-models/openai/gpt-5-mini github-models/mistral-ai/mistral-medium-2505 github-models/openai/gpt-5-nano github-models/deepseek/deepseek-r1-0528 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct" + # Ordered by GitHub Models quota allowance, largest first, so the + # highest-throughput models review before the rate-starved flagships. + # gpt-5/o3 are "Reasoning" tier (8-12 req/day) — putting them first + # (as before) stalled every review until the step timed out, because + # a rate-limited/hung flagship never fell back. Non-reasoning "Low" + # tier (deepseek-v3, mistral, llama-4: 150-450 req/day) and mini + # reasoning models go first; gpt-5 stays last as a quality fallback. + OPENCODE_MODEL_CANDIDATES: "github-models/deepseek/deepseek-v3-0324 github-models/mistral-ai/mistral-medium-2505 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct github-models/openai/o4-mini github-models/openai/o3-mini github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano github-models/openai/gpt-5-chat github-models/deepseek/deepseek-r1-0528 github-models/deepseek/deepseek-r1 github-models/openai/o3 github-models/openai/gpt-5" OPENCODE_MODEL_ATTEMPTS: "5" OPENCODE_RUN_TIMEOUT_SECONDS: "20400" OPENCODE_EXPORT_TIMEOUT_SECONDS: "120" diff --git a/tests/test_opencode_agent_contract.py b/tests/test_opencode_agent_contract.py index 8d2e14a..8493dc7 100644 --- a/tests/test_opencode_agent_contract.py +++ b/tests/test_opencode_agent_contract.py @@ -79,9 +79,9 @@ def test_opencode_model_pool_sets_high_effort_for_capable_candidates(): assert candidate_models assert set(candidate_models).issubset(set(models)) assert candidate_models[:3] == [ - "openai/o4-mini", - "openai/o3-mini", - "openai/gpt-5-mini", + "deepseek/deepseek-v3-0324", + "mistral-ai/mistral-medium-2505", + "meta/llama-4-maverick-17b-128e-instruct-fp8", ] assert { "openai/gpt-5-chat", @@ -282,25 +282,26 @@ def test_workflow_provisions_sandbox_tool_and_reviewer_agent(): assert "publish REQUEST_CHANGES when coverage-evidence blocker states" in workflow assert re.search(r"opencode-review-target:[\s\S]{0,240}timeout-minutes: 360", workflow) assert 'timeout-minutes: 75' in workflow - assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 285", workflow) + assert re.search(r"Run OpenCode PR Review model pool[\s\S]{0,240}timeout-minutes: 350", workflow) assert 'APPROVAL_CHECK_WAIT_ATTEMPTS: "81"' in workflow assert 'APPROVAL_CHECK_WAIT_SLEEP_SECONDS: "30"' in workflow assert ( - 'OPENCODE_MODEL_CANDIDATES: "github-models/openai/o4-mini ' + 'OPENCODE_MODEL_CANDIDATES: "github-models/deepseek/deepseek-v3-0324 ' + "github-models/mistral-ai/mistral-medium-2505 " + "github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 " + "github-models/meta/llama-4-scout-17b-16e-instruct " + "github-models/openai/o4-mini " "github-models/openai/o3-mini " "github-models/openai/gpt-5-mini " - 'github-models/openai/gpt-5-chat ' - "github-models/openai/o3 " - "github-models/mistral-ai/mistral-medium-2505 " "github-models/openai/gpt-5-nano " + 'github-models/openai/gpt-5-chat ' "github-models/deepseek/deepseek-r1-0528 " "github-models/deepseek/deepseek-r1 " - "github-models/deepseek/deepseek-v3-0324 " - "github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 " - 'github-models/meta/llama-4-scout-17b-16e-instruct"' + "github-models/openai/o3 " + 'github-models/openai/gpt-5"' ) in workflow - assert 'OPENCODE_MODEL_ATTEMPTS: "1"' in workflow - assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' in workflow + assert 'OPENCODE_MODEL_ATTEMPTS: "5"' in workflow + assert 'OPENCODE_RUN_TIMEOUT_SECONDS: "20400"' in workflow assert 'OPENCODE_EXPORT_TIMEOUT_SECONDS: "120"' in workflow assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "0"' in workflow assert 'OPENCODE_BACKOFF_MAX_SECONDS: "30"' in workflow