diff --git a/src/ucode/agents/__init__.py b/src/ucode/agents/__init__.py index f44b91b..0143e09 100644 --- a/src/ucode/agents/__init__.py +++ b/src/ucode/agents/__init__.py @@ -328,7 +328,7 @@ def check_gateway_endpoint(state: dict, tool: str) -> bool: if tool == "opencode": return bool(state.get("opencode_models")) if tool == "codex": - return bool(state.get("codex_models")) + return bool(state.get("codex_models")) or bool(state.get("oss_models")) if tool == "gemini": return bool(state.get("gemini_models")) if tool == "copilot": @@ -345,7 +345,7 @@ def check_gateway_endpoint(state: dict, tool: str) -> bool: _TOOL_DISCOVERY_SOURCES: dict[str, tuple[str, ...]] = { "claude": ("claude",), "opencode": ("claude", "gemini", "oss"), - "codex": ("codex",), + "codex": ("codex", "oss"), "gemini": ("gemini",), "copilot": ("claude", "codex"), "pi": ("claude", "codex", "gemini"), diff --git a/src/ucode/agents/codex.py b/src/ucode/agents/codex.py index 2a5c657..4169d51 100644 --- a/src/ucode/agents/codex.py +++ b/src/ucode/agents/codex.py @@ -107,9 +107,11 @@ def _provider_block( databricks_profile: str | None, use_pat: bool = False, provider: str | None = None, + base_url: str | None = None, ) -> dict: auth_argv = build_auth_token_argv(workspace, databricks_profile, use_pat=use_pat) - base_url = build_tool_base_url("codex", workspace) + if base_url is None: + base_url = build_tool_base_url("codex", workspace) http_headers = { "User-Agent": f"ucode/{ucode_version()} codex/{agent_version('codex')}", } @@ -139,13 +141,14 @@ def render_overlay( databricks_profile: str | None = None, use_pat: bool = False, provider: str | None = None, + base_url: str | None = None, ) -> dict: overlay: dict = {"model_provider": CODEX_MODEL_PROVIDER_NAME} if model: overlay["model"] = model overlay["model_providers"] = { CODEX_MODEL_PROVIDER_NAME: _provider_block( - workspace, databricks_profile, use_pat, provider + workspace, databricks_profile, use_pat, provider, base_url=base_url ), } return overlay @@ -157,6 +160,7 @@ def render_legacy_overlay( databricks_profile: str | None = None, use_pat: bool = False, provider: str | None = None, + base_url: str | None = None, ) -> dict: """Overlay for Codex CLI < 0.134.0, which only reads `~/.codex/config.toml`. @@ -171,7 +175,7 @@ def render_legacy_overlay( "profiles": {CODEX_PROFILE_NAME: profile_block}, "model_providers": { CODEX_MODEL_PROVIDER_NAME: _provider_block( - workspace, databricks_profile, use_pat, provider + workspace, databricks_profile, use_pat, provider, base_url=base_url ), }, } @@ -289,6 +293,25 @@ def _codex_model_id(model: str | None) -> str | None: return _openai_model_id(model) +def _is_oss_model(model: str | None, state: dict) -> bool: + """True when ``model`` should route through the MLflow OSS gateway path.""" + if not model: + return False + if model.startswith("system.ai.kimi-"): + return True + return model in (state.get("oss_models") or []) + + +def _codex_base_url(workspace: str, model: str | None, state: dict) -> str: + """Pick the right AI Gateway base URL for the resolved Codex model. + + GPT-family and OSS models (e.g. Kimi) both route through + ``/ai-gateway/codex/v1``; Codex appends ``/responses`` for the OpenAI + Responses wire API. + """ + return build_tool_base_url("codex", workspace) + + def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None: if not model: return None @@ -316,6 +339,7 @@ def write_tool_config(state: dict, model: str | None = None, provider: str | Non # Databricks endpoint id is pinned. chosen_model = None if provider else _codex_model_id(model or default_model(state)) databricks_profile = state.get("profile") + base_url = None if provider else _codex_base_url(workspace, chosen_model, state) if _use_legacy_layout(): # Codex < 0.134.0 only reads ~/.codex/config.toml. Write the shared @@ -329,6 +353,7 @@ def write_tool_config(state: dict, model: str | None = None, provider: str | Non databricks_profile, use_pat=bool(state.get("use_pat")), provider=provider, + base_url=base_url, ) doc = read_toml_safe(LEGACY_CODEX_CONFIG_PATH) deep_merge_dict(doc, overlay) @@ -351,6 +376,7 @@ def write_tool_config(state: dict, model: str | None = None, provider: str | Non databricks_profile, use_pat=bool(state.get("use_pat")), provider=provider, + base_url=base_url, ) doc = read_toml_safe(CODEX_CONFIG_PATH) deep_merge_dict(doc, overlay) @@ -371,24 +397,30 @@ def default_model(state: dict) -> str | None: "databricks-gpt-5" ahead of "databricks-gpt-5-5". Prefer the highest semantic version instead. - Only GPT-parseable ids are considered. Codex routes the chosen ``model`` - through the gateway as-is, so a non-GPT entry (e.g. ``moonshotai/kimi-k2.5``) - would be rejected with a Unity Catalog endpoint-name error. When no - candidate parses as GPT we return None rather than pinning an unroutable id. + Only GPT-parseable ids are considered for the default. If no GPT model is + available but the workspace exposes OSS models (e.g. ``system.ai.kimi-*``), + fall back to the first OSS model so users can route Codex through the + MLflow gateway path. """ codex_models = state.get("codex_models") or [] parsed: list[tuple[str, tuple[int, int | None, int | None, str]]] = [ (mid, gpt) for mid in codex_models if (gpt := _parse_gpt(mid)) is not None ] - if not parsed: - return None + if parsed: + + def _gpt_version_key(entry: tuple[str, tuple[int, int | None, int | None, str]]): + major, minor, patch, suffix = entry[1] + base_bonus = 1 if not suffix else 0 + return (major, minor or 0, patch or 0, base_bonus) - def _gpt_version_key(entry: tuple[str, tuple[int, int | None, int | None, str]]): - major, minor, patch, suffix = entry[1] - base_bonus = 1 if not suffix else 0 - return (major, minor or 0, patch or 0, base_bonus) + return max(parsed, key=_gpt_version_key)[0] - return max(parsed, key=_gpt_version_key)[0] + # If no GPT model is available but codex_models contains OSS entries + # (e.g. system.ai.kimi-*), fall back to the first one. + for model_id in codex_models: + if _is_oss_model(model_id, state): + return model_id + return None def launch(state: dict, tool_args: list[str]) -> None: diff --git a/src/ucode/cli.py b/src/ucode/cli.py index b2f23be..8eee79c 100644 --- a/src/ucode/cli.py +++ b/src/ucode/cli.py @@ -90,7 +90,7 @@ "claude": ("claude", "opencode", "copilot", "pi"), "codex": ("codex", "copilot", "pi"), "gemini": ("gemini", "opencode", "pi"), - "oss": ("opencode",), + "oss": ("opencode", "codex"), } @@ -273,7 +273,7 @@ def configure_shared_state( ) want_gemini = fetch_all or "gemini" in tools or "opencode" in tools or "pi" in tools want_codex = fetch_all or "codex" in tools or "copilot" in tools or "pi" in tools - want_oss = fetch_all or "opencode" in tools + want_oss = fetch_all or "opencode" in tools or "codex" in tools claude_reason: str | None = None gemini_reason: str | None = None @@ -822,7 +822,9 @@ def _auto_configure_tool(tool: str) -> None: raise RuntimeError(f"{spec['display']} validation failed — config reverted.") -def _launch_tool(tool_name: str, ctx: typer.Context, provider: str | None = None) -> None: +def _launch_tool( + tool_name: str, ctx: typer.Context, provider: str | None = None, model: str | None = None +) -> None: try: tool = normalize_tool(tool_name) existing = load_state() @@ -868,7 +870,7 @@ def _launch_tool(tool_name: str, ctx: typer.Context, provider: str | None = None # the workspace has no matching Databricks models. resolved_model = None else: - state, resolved_model = resolve_launch_model(tool, state, None) + state, resolved_model = resolve_launch_model(tool, state, model) state = configure_tool( tool, state, resolved_model, provider=provider, provider_models=provider_models ) @@ -904,9 +906,18 @@ def codex_cmd( "before any `--` separator.", ), ] = None, + model: Annotated[ + str | None, + typer.Option( + "--model", + help="Pin a specific Codex model id (e.g. system.ai.kimi-k2-7-code). " + "Useful for routing through the MLflow OSS gateway path instead of the " + "default GPT model; pass before any `--` separator.", + ), + ] = None, ) -> None: """Launch Codex via Databricks.""" - _launch_tool("codex", ctx, provider=provider) + _launch_tool("codex", ctx, provider=provider, model=model) @app.command("claude", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) diff --git a/src/ucode/databricks.py b/src/ucode/databricks.py index fe12259..3a2e164 100644 --- a/src/ucode/databricks.py +++ b/src/ucode/databricks.py @@ -1200,7 +1200,8 @@ def discover_model_services( - ``claude_models`` maps ``opus``/``sonnet``/``haiku`` to the newest matching ``system.ai.claude-*`` id (mirrors ``discover_claude_models``). - - ``codex_models`` is the list of ``system.ai.*gpt-*`` ids. + - ``codex_models`` is the list of Codex-routable ``system.ai.*`` ids + (GPT models plus OSS models that speak the OpenAI Responses API). - ``gemini_models`` is the list of ``system.ai.*gemini-*`` ids, newest first. - ``oss_models`` is the list of OSS-model ``system.ai.*`` ids. @@ -1221,9 +1222,9 @@ def discover_model_services( if candidates: claude_models[family] = candidates[0] - codex_models = [m for m in ids if "gpt-" in m] - gemini_models = sorted([m for m in ids if "gemini-" in m], key=model_version_sort_key) oss_models = [m for m in ids if "kimi-" in m] + codex_models = [m for m in ids if "gpt-" in m] + list(oss_models) + gemini_models = sorted([m for m in ids if "gemini-" in m], key=model_version_sort_key) if not (claude_models or codex_models or gemini_models or oss_models): sample = ", ".join(ids[:5]) diff --git a/tests/test_agent_codex.py b/tests/test_agent_codex.py index 9a68e03..fd06890 100644 --- a/tests/test_agent_codex.py +++ b/tests/test_agent_codex.py @@ -78,6 +78,19 @@ def test_no_provider_header_without_flag(self): headers = overlay["model_providers"]["ucode-databricks"]["http_headers"] assert "Databricks-Model-Provider-Service" not in headers + def test_uses_codex_base_url_for_oss_model(self): + overlay = codex.render_overlay( + WS, model="system.ai.kimi-k2-7-code", base_url=f"{WS}/ai-gateway/codex/v1" + ) + provider = overlay["model_providers"]["ucode-databricks"] + assert provider["base_url"] == f"{WS}/ai-gateway/codex/v1" + + def test_preserves_model_id_for_oss_model(self): + overlay = codex.render_overlay( + WS, model="system.ai.kimi-k2-7-code", base_url=f"{WS}/ai-gateway/codex/v1" + ) + assert overlay["model"] == "system.ai.kimi-k2-7-code" + class TestRenderOverlayUserAgent: def test_user_agent_set_on_provider(self, monkeypatch): @@ -241,6 +254,25 @@ def test_legacy_write_preserves_other_profiles_in_shared_config(self, tmp_path, assert doc["profiles"]["other"]["model_provider"] == "keep" assert doc["profiles"]["ucode"]["model_provider"] == "ucode-databricks" + def test_uses_codex_base_url_for_oss_model(self, tmp_path, monkeypatch): + config_path = tmp_path / ".codex" / "ucode.config.toml" + backup_path = tmp_path / "codex-ucode-config.backup.toml" + monkeypatch.setattr(codex, "CODEX_CONFIG_PATH", config_path) + monkeypatch.setattr(codex, "CODEX_BACKUP_PATH", backup_path) + monkeypatch.setattr(codex, "agent_version", lambda binary: "0.134.0") + monkeypatch.setattr(codex, "save_state", lambda state: None) + + state = { + "workspace": WS, + "codex_models": ["system.ai.kimi-k2-7-code"], + } + codex.write_tool_config(state) + + doc = read_toml_safe(config_path) + assert doc["model"] == "system.ai.kimi-k2-7-code" + provider = doc["model_providers"]["ucode-databricks"] + assert provider["base_url"] == f"{WS}/ai-gateway/codex/v1" + class TestCodexLegacyLayoutDetection: def test_new_codex_uses_modern_layout(self, monkeypatch): @@ -362,6 +394,18 @@ def test_prefers_base_over_suffixed_same_version(self): assert codex.default_model({"codex_models": models}) == "gpt-5-5" + def test_falls_back_to_first_oss_model_in_codex_models(self): + state = {"codex_models": ["system.ai.kimi-k2-7-code"]} + + assert codex.default_model(state) == "system.ai.kimi-k2-7-code" + + def test_prefers_gpt_over_oss_when_both_available(self): + state = { + "codex_models": ["databricks-gpt-5", "system.ai.kimi-k2-7-code"], + } + + assert codex.default_model(state) == "databricks-gpt-5" + def test_namespaced_models_use_same_version_parser(self): models = ["served-models/databricks-gpt-5", "served-models/databricks-gpt-5-5"] diff --git a/tests/test_agents_init.py b/tests/test_agents_init.py index 576fc22..c2af445 100644 --- a/tests/test_agents_init.py +++ b/tests/test_agents_init.py @@ -95,6 +95,12 @@ def test_claude_unavailable_when_no_models(self): def test_codex_available(self): assert check_gateway_endpoint({"codex_models": ["model-a"]}, "codex") is True + def test_codex_available_with_oss_models(self): + assert check_gateway_endpoint({"oss_models": ["system.ai.kimi-k2-7-code"]}, "codex") is True + + def test_codex_unavailable_without_models(self): + assert check_gateway_endpoint({}, "codex") is False + def test_gemini_available(self): assert check_gateway_endpoint({"gemini_models": ["gemini-2"]}, "gemini") is True diff --git a/tests/test_cli.py b/tests/test_cli.py index 3c5d404..333a5d6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -486,6 +486,27 @@ def test_no_extra_args_passes_empty_list(self): assert forwarded == [] +class TestCodexModelOption: + def test_model_option_overrides_default(self): + with ( + patch("ucode.cli.ensure_bootstrap_dependencies"), + patch("ucode.cli.load_state", return_value=MINIMAL_STATE), + patch("ucode.cli.ensure_provider_state", return_value=MINIMAL_STATE), + patch("ucode.cli.configure_shared_state", return_value=MINIMAL_STATE), + patch( + "ucode.cli.resolve_launch_model", + return_value=(MINIMAL_STATE, "system.ai.kimi-k2-7-code"), + ) as mock_resolve, + patch("ucode.cli.configure_tool", return_value=MINIMAL_STATE), + patch("ucode.cli.launch_agent") as mock_launch, + ): + result = runner.invoke(app, ["codex", "--model", "system.ai.kimi-k2-7-code"]) + assert result.exit_code == 0, result.output + mock_resolve.assert_called_once_with("codex", MINIMAL_STATE, "system.ai.kimi-k2-7-code") + forwarded = mock_launch.call_args[0][2] + assert forwarded == [] + + class TestConfigureAgentFlag: def test_no_flag_calls_configure_all(self): with ( diff --git a/tests/test_databricks.py b/tests/test_databricks.py index a3693f8..c58dec5 100644 --- a/tests/test_databricks.py +++ b/tests/test_databricks.py @@ -166,7 +166,9 @@ def test_buckets_families_by_name(self, monkeypatch): "opus": "system.ai.claude-opus-4-8", "sonnet": "system.ai.claude-sonnet-4-6", } - assert codex == ["system.ai.gpt-5"] + # Codex-routable models include GPT plus OSS models that speak the + # OpenAI Responses API (Kimi here; Llama is not assumed to). + assert codex == ["system.ai.gpt-5", "system.ai.kimi-k2-7-code"] # Gemini ordered newest-first via the shared sort key. assert gemini[0] == "system.ai.gemini-3-5-flash" assert oss == ["system.ai.kimi-k2-7-code"]