From 234a14781363d638c481e643d51318f9cb1a990c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 24 Apr 2026 16:14:18 +0000 Subject: [PATCH] optimize quota calculation hot path Consolidate ratio resolution and optimize string search in quota.go. Derive tiered completion ratio from eff ratios to avoid manual looping. Optimize containsASCIIFold for faster Claude detection. Co-authored-by: Laisky <4532436+Laisky@users.noreply.github.com> --- .jules/bolt.md | 4 +++ relay/quota/quota.go | 60 ++++++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 65fb9e85e3..4f1efa986d 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -38,3 +38,7 @@ **Learning:** The `quota.Compute` function, a critical hot path for every request, was performing multiple redundant lookups (`GetCompletionRatioWithThreeLayers`, `ResolveEffectivePricing`, `ResolveModelConfig`) that each performed similar map lookups and expensive deep-cloning of `ModelConfig` objects. These objects contain large nested structures for media pricing (Image, Audio, Video) that are entirely unused during standard token-based quota calculation. **Action:** Consolidate pricing resolution into a single `ResolveModelConfigRatioOnly` call that performs a shallow clone of the base struct and a targeted clone of the `Tiers` slice, while omitting media metadata. This reduced the `BenchmarkCompute` execution time by ~19% (2313ns -> 1874ns). Always use "RatioOnly" or targeted lookup functions when full configuration metadata is not required in high-throughput paths. + +## 2026-04-24 - [Streamline Quota Calculation and Fast ASCII Fold] +**Learning:** The quota calculation hot path in `relay/quota/quota.go` was performing manual tier iterations to resolve completion ratios, which is redundant since `eff.OutputRatio / eff.InputRatio` already provides the effective ratio for the matched tier. Additionally, string searches like `containsASCIIFold` (used for Claude detection) can be optimized by pre-calculating uppercase/lowercase variants of the first search byte to avoid expensive normalization calls in the search loop. +**Action:** Consolidate ratio resolution by deriving tiered completion ratios from effective pricing results and use fast-path byte matching in string search functions. diff --git a/relay/quota/quota.go b/relay/quota/quota.go index 9f40625587..e7244cb1bf 100644 --- a/relay/quota/quota.go +++ b/relay/quota/quota.go @@ -38,8 +38,6 @@ type ComputeResult struct { } // Compute calculates the quota required for the provided usage snapshot. -// It mirrors the logic used in controller helper functions so streaming -// billing and final reconciliation share the same pricing semantics. func Compute(input ComputeInput) ComputeResult { usage := input.Usage if usage == nil { @@ -51,9 +49,19 @@ func Compute(input ComputeInput) ComputeResult { pricingAdaptor := input.PricingAdaptor resolvedModelCfg, hasResolvedModelCfg := pricing.ResolveModelConfigRatioOnly(input.ModelName, input.ChannelModelConfigs, pricingAdaptor) + + // Resolve the completion ratio using a priority-ordered check to avoid redundant lookups. + var completionRatioResolved float64 + if override, ok := input.ChannelCompletionRatio[input.ModelName]; ok { + completionRatioResolved = override + } else if hasResolvedModelCfg && resolvedModelCfg.CompletionRatio != 0 { + completionRatioResolved = resolvedModelCfg.CompletionRatio + } else { + completionRatioResolved = pricing.GetCompletionRatioWithThreeLayers(input.ModelName, input.ChannelCompletionRatio, pricingAdaptor) + } + hasChannelModelRatioOverride := hasOverrideForModel(input.ModelName, input.ChannelModelRatio) baseRatio := input.ModelRatio - completionRatioResolved := resolveCompletionRatio(input.ModelName, resolvedModelCfg, hasResolvedModelCfg, input.ChannelCompletionRatio, pricingAdaptor) if hasResolvedModelCfg { // Preserve legacy fallback behavior: when channel config omits base ratio/completion @@ -81,39 +89,26 @@ func Compute(input ComputeInput) ComputeResult { if !hasChannelModelRatioOverride { usedModelRatio = eff.InputRatio } - baseComp := eff.OutputRatio - completionBaseRatio := eff.InputRatio - if hasChannelModelRatioOverride { - completionBaseRatio = usedModelRatio - baseComp = usedModelRatio * completionRatioResolved - for _, tier := range resolvedModelCfg.Tiers { - if promptTokens < tier.InputTokenThreshold { - break - } - if tier.CompletionRatio != 0 { - baseComp = usedModelRatio * tier.CompletionRatio - } - } - } - if completionBaseRatio != 0 { - baseComp = baseComp / completionBaseRatio + + // Optimization: Deriving the tiered completion ratio from eff.OutputRatio / eff.InputRatio + // avoids a redundant loop over tiers. Since eff.OutputRatio = eff.InputRatio * tierComp, + // the division recovers the effective completion ratio for the current tier. + if eff.InputRatio != 0 { + usedCompletionRatio = eff.OutputRatio / eff.InputRatio } else { - baseComp = 1.0 + usedCompletionRatio = 1.0 } - usedCompletionRatio = baseComp } else if pricingAdaptor != nil { // Optimized check: only use effective pricing if the input model ratio matches the adaptor base. // This avoids extra GetDefaultModelPricing() map lookups when not needed. adaptorBase := pricingAdaptor.GetModelRatio(input.ModelName) if math.Abs(baseRatio-adaptorBase) < 1e-12 { usedModelRatio = eff.InputRatio - baseComp := eff.OutputRatio if eff.InputRatio != 0 { - baseComp = eff.OutputRatio / eff.InputRatio + usedCompletionRatio = eff.OutputRatio / eff.InputRatio } else { - baseComp = 1.0 + usedCompletionRatio = 1.0 } - usedCompletionRatio = baseComp } } @@ -267,9 +262,21 @@ func containsASCIIFold(s string, substr string) bool { return false } + // substr is already expected to be lowercase from the caller (isClaudeModelName). + // We pre-calculate the uppercase variant of the first byte to allow a fast search + // that avoids calling asciiLower on every character in the model name string. + firstLower := substr[0] + var firstUpper byte + if firstLower >= 'a' && firstLower <= 'z' { + firstUpper = firstLower - ('a' - 'A') + } else { + firstUpper = firstLower + } + last := len(s) - len(substr) for i := 0; i <= last; i++ { - if asciiLower(s[i]) != substr[0] { + // Fast path: match the first byte against both possible cases. + if s[i] != firstLower && s[i] != firstUpper { continue } @@ -286,7 +293,6 @@ func containsASCIIFold(s string, substr string) bool { } return false } - // asciiLower converts ASCII uppercase bytes to lowercase. // Parameter: b is the byte to normalize. // Returns: the lowercase byte when b is an ASCII uppercase letter, otherwise b unchanged.