Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/askui/model_providers/anthropic_vlm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ToolChoiceParam,
)
from askui.models.shared.prompts import SystemPrompt
from askui.models.shared.request_size import ANTHROPIC_MAX_REQUEST_BYTES
from askui.models.shared.tools import ToolCollection
from askui.utils.model_pricing import ModelPricing

Expand Down Expand Up @@ -104,6 +105,11 @@ def model_id(self) -> str:
def pricing(self) -> ModelPricing | None:
return self._pricing

@property
@override
def max_request_bytes(self) -> int | None:
return ANTHROPIC_MAX_REQUEST_BYTES

@cached_property
def _messages_api(self) -> AnthropicMessagesApi:
"""Lazily initialise the AnthropicMessagesApi on first use."""
Expand Down
6 changes: 6 additions & 0 deletions src/askui/model_providers/askui_vlm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ToolChoiceParam,
)
from askui.models.shared.prompts import SystemPrompt
from askui.models.shared.request_size import ANTHROPIC_MAX_REQUEST_BYTES
from askui.models.shared.tools import ToolCollection

_DEFAULT_MODEL_ID = "claude-sonnet-4-6"
Expand Down Expand Up @@ -69,6 +70,11 @@ def __init__(
def model_id(self) -> str:
return self._model_id_value

@property
@override
def max_request_bytes(self) -> int | None:
return ANTHROPIC_MAX_REQUEST_BYTES

@cached_property
def _messages_api(self) -> AnthropicMessagesApi:
"""Lazily initialise the AnthropicMessagesApi on first use."""
Expand Down
9 changes: 9 additions & 0 deletions src/askui/model_providers/vlm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ def pricing(self) -> ModelPricing | None:
"""
return None

@property
def max_request_bytes(self) -> int | None:
"""Hard cap on the serialized request body size, in bytes.
Returns ``None`` when the provider has no known/specific limit, in
which case callers fall back to a conservative default. Override in
subclasses backed by an endpoint with a known cap.
"""
return None

@abstractmethod
def create_message(
self,
Expand Down
17 changes: 17 additions & 0 deletions src/askui/models/anthropic/messages_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Any, Tuple, cast

from anthropic import (
Expand Down Expand Up @@ -39,9 +40,15 @@
)
from askui.models.shared.messages_api import MessagesApi
from askui.models.shared.prompts import SystemPrompt
from askui.models.shared.request_size import (
ANTHROPIC_MAX_REQUEST_BYTES,
estimate_messages_bytes,
)
from askui.models.shared.tools import ToolCollection
from askui.utils.image_utils import image_to_base64

logger = logging.getLogger(__name__)


def _is_retryable_error(exception: BaseException) -> bool:
"""Check if the exception is a retryable error."""
Expand Down Expand Up @@ -193,6 +200,16 @@ def create_message(
temperature: float | None = None,
provider_options: dict[str, Any] | None = None,
) -> MessageParam:
estimated_bytes = estimate_messages_bytes(messages)
if estimated_bytes > ANTHROPIC_MAX_REQUEST_BYTES:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like that we estimate the Request side. I would prefere to deal with the Error. And then do the Truncation strategy.

Otherwise we don't know if the LLM Provider is inceasing theire 30 MB Limit

logger.warning(
"Estimated request size ~%d bytes exceeds the Anthropic "
"limit of %d bytes; the request may be rejected with a 400. "
"Configure a truncation strategy with byte enforcement.",
estimated_bytes,
ANTHROPIC_MAX_REQUEST_BYTES,
)

# convert each message to anthropic BetaMessageParam type
_messages = [from_message_param(message) for message in messages]

Expand Down
67 changes: 67 additions & 0 deletions src/askui/models/shared/request_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Request size estimation shared across truncation and providers.

The Anthropic Messages API rejects requests whose serialized body exceeds
~32 MB with a 400 ``BadRequestError``. Base64-encoded screenshots dominate
that payload. These helpers estimate the serialized byte size cheaply so
truncation strategies (and a provider-side safety net) can keep requests
under the limit.

The estimate reads cached string lengths (``len`` is O(1) on Python
strings, and base64 ``data`` is ASCII so its length equals its serialized
byte count), making a full pass O(number of blocks) rather than
O(payload size). Structural JSON overhead (field names, braces, quotes) is
not counted; it is sub-percent of image-heavy payloads and absorbed by the
threshold headroom callers apply on top of the hard limit.
"""

from askui.models.shared.agent_message_param import (
Base64ImageSourceParam,
BetaThinkingBlock,
ContentBlockParam,
ImageBlockParam,
MessageParam,
TextBlockParam,
ToolResultBlockParam,
ToolUseBlockParam,
)

# Hard cap on serialized request size for the Anthropic Messages API.
ANTHROPIC_MAX_REQUEST_BYTES = 30 * 1024 * 1024


def estimate_block_bytes(block: ContentBlockParam) -> int:
"""Cheaply estimate the serialized byte size of one content block.

Base64 image ``data`` is ASCII, so ``len`` equals its byte count and
is O(1) on Python strings. Walking blocks is therefore O(number of
blocks) rather than O(payload size), keeping the byte check cheap even
with many multi-megabyte screenshots.
"""
if isinstance(block, ImageBlockParam):
if isinstance(block.source, Base64ImageSourceParam):
return len(block.source.data)
return len(block.source.url)
if isinstance(block, TextBlockParam):
return len(block.text)
if isinstance(block, ToolResultBlockParam):
if isinstance(block.content, str):
return len(block.content)
return sum(estimate_block_bytes(nested) for nested in block.content)
if isinstance(block, ToolUseBlockParam):
return len(str(block.input)) + len(block.name)
if isinstance(block, BetaThinkingBlock):
return len(block.thinking) + len(block.signature)
# BetaRedactedThinkingBlock
return len(block.data)


def estimate_message_bytes(message: MessageParam) -> int:
"""Estimate the serialized byte size of a single message."""
if isinstance(message.content, str):
return len(message.content)
return sum(estimate_block_bytes(block) for block in message.content)


def estimate_messages_bytes(messages: list[MessageParam]) -> int:
"""Estimate the serialized byte size of a message history."""
return sum(estimate_message_bytes(message) for message in messages)
Loading
Loading