Skip to content

Commit e0ff167

Browse files
authored
Merge pull request #46 from CaddyGlow/feat/codex-msaf-compatibility
feat: Microsoft Agent Framework compatibility and bypass mode
2 parents eca62b1 + f387474 commit e0ff167

20 files changed

Lines changed: 1575 additions & 62 deletions

.ccproxy.codex.msaf.toml.example

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Example ccproxy config for Microsoft Agent Framework clients over Codex.
2+
3+
enable_plugins = true
4+
enabled_plugins = ["oauth_codex", "codex"]
5+
6+
[server]
7+
bypass_mode = false
8+
9+
[llm]
10+
# Keep OpenAI-compatible responses free from <thinking>...</thinking> blocks.
11+
openai_thinking_xml = false
12+
13+
[plugins.codex]
14+
enabled = true
15+
name = "codex"
16+
base_url = "https://chatgpt.com/backend-api/codex"
17+
requires_auth = true
18+
auth_type = "oauth"
19+
supports_streaming = true
20+
preferred_upstream_mode = "streaming"
21+
buffer_non_streaming = true
22+
enable_format_registry = true
23+
24+
# Microsoft Agent Framework sends its own instructions/reasoning payloads.
25+
# Do not prepend captured Codex CLI templates to generic OpenAI-compatible calls.
26+
inject_detection_payload = false
27+
28+
supported_input_formats = [
29+
"openai.responses",
30+
"openai.chat_completions",
31+
"anthropic.messages",
32+
]
33+
34+
detection_home_mode = "temp"
35+
36+
[[plugins.codex.models_endpoint]]
37+
id = "gpt-5.4"
38+
object = "model"
39+
created = 1735689600
40+
owned_by = "openai"
41+
root = "gpt-5.4"
42+
permission = []
43+
44+
[plugins.codex.oauth]
45+
base_url = "https://auth.openai.com"
46+
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
47+
scopes = ["openid", "profile", "email", "offline_access"]
48+
49+
[plugins.oauth_codex]
50+
enabled = true
51+
base_url = "https://auth.openai.com"
52+
authorize_url = "https://auth.openai.com/oauth/authorize"
53+
token_url = "https://auth.openai.com/oauth/token"
54+
profile_url = "https://api.openai.com/oauth/profile"
55+
client_id = "app_EMoamEEZ73f0CkXaXp7hrann"
56+
redirect_uri = "http://localhost:1455/auth/callback"
57+
callback_port = 1455
58+
scopes = ["openid", "profile", "email", "offline_access"]
59+
audience = "https://api.openai.com/v1"
60+
user_agent = "Codex-Code/1.0.43"
61+
headers = { User-Agent = "Codex-Code/1.0.43" }
62+
request_timeout = 30
63+
callback_timeout = 300
64+
use_pkce = true

ccproxy/core/plugins/factories.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from ccproxy.models.provider import ProviderConfig
1515
from ccproxy.services.adapters.base import BaseAdapter
1616
from ccproxy.services.adapters.http_adapter import BaseHTTPAdapter
17+
from ccproxy.services.adapters.mock_adapter import MockAdapter
1718
from ccproxy.services.interfaces import (
1819
IMetricsCollector,
1920
IRequestTracer,
@@ -215,6 +216,23 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
215216
Returns:
216217
Adapter instance
217218
"""
219+
settings = context.get("settings")
220+
service_container = context.get("service_container")
221+
if settings and getattr(settings.server, "bypass_mode", False):
222+
if not service_container:
223+
raise RuntimeError(
224+
f"Cannot initialize plugin '{self.plugin_name}' in bypass mode: "
225+
"service container is required to create mock adapter. "
226+
"This is likely a configuration issue."
227+
)
228+
logger.warning(
229+
"plugin_bypass_mode_enabled",
230+
plugin=self.plugin_name,
231+
adapter=self.adapter_class.__name__,
232+
category="lifecycle",
233+
)
234+
return MockAdapter(service_container.get_mock_handler())
235+
218236
# Extract services from context (one-time extraction)
219237
http_pool_manager: HTTPPoolManager | None = cast(
220238
"HTTPPoolManager | None", context.get("http_pool_manager")
@@ -232,7 +250,6 @@ async def create_adapter(self, context: PluginContext) -> BaseAdapter:
232250
config = context.get("config")
233251

234252
# Get all adapter dependencies from service container
235-
service_container = context.get("service_container")
236253
if not service_container:
237254
raise RuntimeError("Service container is required for adapter services")
238255

ccproxy/llms/formatters/context.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
"formatter_instructions", default=None
1212
)
1313
_TOOLS_VAR: ContextVar[list[Any] | None] = ContextVar("formatter_tools", default=None)
14+
_OPENAI_THINKING_XML_VAR: ContextVar[bool | None] = ContextVar(
15+
"formatter_openai_thinking_xml", default=None
16+
)
1417

1518

1619
def register_request(request: Any | None, instructions: str | None = None) -> None:
@@ -114,3 +117,24 @@ def get_last_request_tools() -> list[Any] | None:
114117

115118
cached = _TOOLS_VAR.get()
116119
return list(cached) if cached else None
120+
121+
122+
def register_openai_thinking_xml(enabled: bool | None) -> None:
123+
"""Cache OpenAI thinking serialization preference for active conversions.
124+
125+
Args:
126+
enabled: Whether thinking blocks should be serialized with XML wrappers.
127+
``None`` means downstream conversion logic should use its default.
128+
129+
Note:
130+
The value is stored in a ``ContextVar``, so concurrent async requests
131+
keep independent preferences without leaking into each other.
132+
"""
133+
134+
_OPENAI_THINKING_XML_VAR.set(enabled)
135+
136+
137+
def get_openai_thinking_xml() -> bool | None:
138+
"""Return the OpenAI thinking serialization preference for active conversions."""
139+
140+
return _OPENAI_THINKING_XML_VAR.get()

ccproxy/llms/formatters/openai_to_openai/responses.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
convert_openai_responses_usage_to_completion_usage,
1616
merge_thinking_segments,
1717
)
18+
from ccproxy.llms.formatters.context import get_openai_thinking_xml
1819
from ccproxy.llms.models import openai as openai_models
1920

2021
from ._helpers import (
@@ -333,6 +334,10 @@ def convert__openai_responses_to_openai_chat__response(
333334
response: openai_models.ResponseObject,
334335
) -> openai_models.ChatCompletionResponse:
335336
"""Convert an OpenAI ResponseObject to a ChatCompletionResponse."""
337+
include_thinking = get_openai_thinking_xml()
338+
if include_thinking is None:
339+
include_thinking = True
340+
336341
text_segments: list[str] = []
337342
added_reasoning: set[tuple[str, str]] = set()
338343
tool_calls: list[openai_models.ToolCall] = []
@@ -353,7 +358,7 @@ def convert__openai_responses_to_openai_chat__response(
353358
if thinking_text and len(thinking_text) > 30
354359
else thinking_text,
355360
)
356-
if thinking_text:
361+
if include_thinking and thinking_text:
357362
key = (signature or "", thinking_text)
358363
if key not in added_reasoning:
359364
text_segments.append(_wrap_thinking(signature, thinking_text))

ccproxy/llms/formatters/openai_to_openai/streams.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,14 @@
2727
get_last_instructions,
2828
get_last_request,
2929
get_last_request_tools,
30+
get_openai_thinking_xml,
3031
register_request,
3132
register_request_tools,
3233
)
3334
from ccproxy.llms.models import openai as openai_models
3435
from ccproxy.llms.streaming.accumulators import OpenAIAccumulator
3536

36-
from ._helpers import (
37-
_convert_tools_chat_to_responses,
38-
_get_attr,
39-
)
37+
from ._helpers import _convert_tools_chat_to_responses, _get_attr
4038
from .requests import _build_responses_payload_from_chat_request
4139
from .responses import (
4240
_collect_reasoning_segments,
@@ -61,6 +59,10 @@ def run(
6159
async def generator() -> AsyncGenerator[
6260
openai_models.ChatCompletionChunk, None
6361
]:
62+
include_thinking = get_openai_thinking_xml()
63+
if include_thinking is None:
64+
include_thinking = True
65+
6466
model_id = ""
6567
role_sent = False
6668

@@ -537,7 +539,7 @@ def create_text_chunk(
537539
for entry in summary_list:
538540
text = _get_attr(entry, "text")
539541
signature = _get_attr(entry, "signature")
540-
if isinstance(text, str) and text:
542+
if include_thinking and isinstance(text, str) and text:
541543
chunk_text = _wrap_thinking(signature, text)
542544
sequence_counter += 1
543545
yield openai_models.ChatCompletionChunk(

ccproxy/plugins/codex/adapter.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -262,26 +262,41 @@ async def prepare_provider_request(
262262

263263
# Parse body (format conversion is now handled by format chain)
264264
body_data = json.loads(body.decode()) if body else {}
265-
body_data = self._apply_request_template(body_data)
265+
if self._should_apply_detection_payload():
266+
body_data = self._apply_request_template(body_data)
267+
else:
268+
body_data = self._normalize_input_messages(body_data)
266269

267-
# Fetch detected instructions from detection service
268-
instructions = self._get_instructions()
270+
detected_instructions = (
271+
self._get_instructions() if self._should_apply_detection_payload() else ""
272+
)
269273

270274
existing_instructions = body_data.get("instructions")
271275
if isinstance(existing_instructions, str) and existing_instructions:
272-
if instructions:
273-
instructions = instructions + "\n" + existing_instructions
274-
else:
275-
instructions = existing_instructions
276+
instructions = (
277+
detected_instructions + "\n" + existing_instructions
278+
if detected_instructions
279+
else existing_instructions
280+
)
281+
else:
282+
instructions = detected_instructions
276283

277-
body_data["instructions"] = instructions
284+
if instructions:
285+
body_data["instructions"] = instructions
286+
else:
287+
body_data.pop("instructions", None)
278288

279289
# Codex backend requires stream=true, always override
280290
body_data["stream"] = True
281291
body_data["store"] = False
282292

283293
# Remove unsupported keys for Codex
284-
for key in ("max_output_tokens", "max_completion_tokens", "temperature"):
294+
for key in (
295+
"max_output_tokens",
296+
"max_completion_tokens",
297+
"max_tokens",
298+
"temperature",
299+
):
285300
body_data.pop(key, None)
286301

287302
list_input = body_data.get("input", [])
@@ -640,6 +655,9 @@ def _request_body_is_encoded(self, headers: dict[str, str]) -> bool:
640655
encoding = headers.get("content-encoding", "").strip().lower()
641656
return bool(encoding and encoding != "identity")
642657

658+
def _should_apply_detection_payload(self) -> bool:
659+
return bool(getattr(self.config, "inject_detection_payload", True))
660+
643661
def _detect_streaming_intent(self, body: bytes, headers: dict[str, str]) -> bool:
644662
if self._request_body_is_encoded(headers):
645663
accept = headers.get("accept", "").lower()

ccproxy/plugins/codex/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ class CodexSettings(ProviderConfig):
124124
enable_format_registry: bool = Field(
125125
default=True, description="Whether to enable format adapter registry"
126126
)
127+
inject_detection_payload: bool = Field(
128+
default=True,
129+
description=(
130+
"Whether to inject the captured Codex CLI instructions/template into "
131+
"provider requests. Disable this for generic OpenAI-compatible API usage."
132+
),
133+
)
127134

128135
# Detection configuration
129136
detection_home_mode: Literal["temp", "home"] = Field(

ccproxy/services/adapters/format_adapter.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from collections.abc import AsyncIterator, Awaitable, Callable
77
from typing import Any, Protocol, runtime_checkable
88

9+
from ccproxy.llms.formatters.context import register_openai_thinking_xml
10+
911

1012
FormatDict = dict[str, Any]
1113

@@ -63,6 +65,10 @@ def __init__(
6365
self._error = error
6466
self._stream = stream
6567
self.name = name or self.__class__.__name__
68+
self._openai_thinking_xml: bool | None = None
69+
70+
def configure_streaming(self, *, openai_thinking_xml: bool | None = None) -> None:
71+
self._openai_thinking_xml = openai_thinking_xml
6672

6773
async def convert_request(self, data: FormatDict) -> FormatDict:
6874
return await self._run_stage(self._request, data, stage="request")
@@ -92,6 +98,7 @@ async def _create_stream_iterator(
9298
f"{self.name} does not implement stream conversion"
9399
)
94100

101+
register_openai_thinking_xml(self._openai_thinking_xml)
95102
handler = self._stream(stream)
96103
handler = await _maybe_await(handler)
97104

@@ -121,6 +128,7 @@ async def _run_stage(
121128
f"{self.name} does not implement {stage} conversion"
122129
)
123130

131+
register_openai_thinking_xml(self._openai_thinking_xml)
124132
result = await _maybe_await(func(data))
125133
if not isinstance(result, dict):
126134
raise TypeError(

0 commit comments

Comments
 (0)