Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions nerve/agent/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,8 +973,12 @@ def _build_options(
self.config.agent.effort,
model or self.config.agent.model,
)
# Some subscriptions reject the context-1m beta for specific models
# (e.g. claude-sonnet-4-6) — skip the beta header for those.
betas = (
["context-1m-2025-08-07"] if self.config.agent.context_1m else []
["context-1m-2025-08-07"]
if self.config.agent.context_1m_enabled_for(model)
else []
)

# Build PreToolUse (file snapshots, image validation) +
Expand Down Expand Up @@ -2042,7 +2046,11 @@ async def _finalize_turn(
)

# Persist usage for context bar on session switch
max_context = 1_048_576 if self.config.agent.context_1m else 200_000
max_context = (
1_048_576
if self.config.agent.context_1m_enabled_for(st.last_model)
else 200_000
)
num_turns = (st.result_meta or {}).get("num_turns") or 1
if st.last_usage:
usage_data = {
Expand Down
20 changes: 20 additions & 0 deletions nerve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ class AgentConfig:
thinking: str = "max" # max, high, medium, low, disabled, adaptive, or number (budget_tokens)
effort: str = "max" # max, xhigh, high, medium, low
context_1m: bool = True # Enable 1M context window beta
# Substrings of model names for which the context-1m beta header must NOT
# be sent (some subscriptions reject the beta for specific models — e.g.
# claude-sonnet-4-6 returns 400 "long context beta not yet available for
# this subscription"). Match is case-insensitive substring on the resolved
# model name. Empty list = send beta for all models when context_1m=True.
context_1m_excluded_models: list[str] = field(default_factory=list)
# Hung-CLI detection: max idle time between SDK messages on a single
# turn before the engine treats the subprocess as dead and falls into
# the existing CLI-crash retry path. Set to 0 to disable (legacy
Expand All @@ -158,10 +164,24 @@ def from_dict(cls, d: dict) -> AgentConfig:
thinking=str(d.get("thinking", "max")),
effort=str(d.get("effort", "max")),
context_1m=d.get("context_1m", True),
context_1m_excluded_models=list(
d.get("context_1m_excluded_models", []) or []
),
cli_idle_timeout_seconds=int(d.get("cli_idle_timeout_seconds", 900)),
prompt_rewrite=PromptRewriteConfig.from_dict(d.get("prompt_rewrite") or {}),
)

def context_1m_enabled_for(self, model: str | None) -> bool:
"""Whether the context-1m beta applies to *model* (or the default
model when None). False if globally disabled or if the model name
matches any entry in ``context_1m_excluded_models``."""
if not self.context_1m:
return False
resolved = (model or self.model).lower()
return not any(
tok and tok.lower() in resolved for tok in self.context_1m_excluded_models
)


@dataclass
class TelegramConfig:
Expand Down