From afa7a422a5f506c6c1526af99b93153d7fb34a6e Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Tue, 14 Apr 2026 16:11:18 +0200
Subject: [PATCH 1/6] Add top-level Rate Limiting docs section with new guides

Introduces a dedicated Rate Limiting section in the sidebar with an
overview page, getting started guide, dynamic rate limiting guide,
combining policies guide, and monitoring/troubleshooting guide. Also
improves existing docs: adds sliding window explanation to concepts,
fixes self-referencing links in per-user-rate-limits-using-db, and
expands the rate-limit-exceeded error page with response format details.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../per-user-rate-limits-using-db.mdx         |  50 +--
 docs/concepts/rate-limiting.md                |  62 +++-
 docs/errors/rate-limit-exceeded.mdx           |  33 +-
 docs/rate-limiting/combining-policies.mdx     | 293 ++++++++++++++++++
 docs/rate-limiting/dynamic-rate-limiting.mdx  | 240 ++++++++++++++
 docs/rate-limiting/getting-started.mdx        | 163 ++++++++++
 .../monitoring-and-troubleshooting.mdx        | 243 +++++++++++++++
 docs/rate-limiting/overview.mdx               |  63 ++++
 sidebar.ts                                    |  29 ++
 9 files changed, 1140 insertions(+), 36 deletions(-)
 create mode 100644 docs/rate-limiting/combining-policies.mdx
 create mode 100644 docs/rate-limiting/dynamic-rate-limiting.mdx
 create mode 100644 docs/rate-limiting/getting-started.mdx
 create mode 100644 docs/rate-limiting/monitoring-and-troubleshooting.mdx
 create mode 100644 docs/rate-limiting/overview.mdx

diff --git a/docs/articles/per-user-rate-limits-using-db.mdx b/docs/articles/per-user-rate-limits-using-db.mdx
index ace69f2e2..ae3bf7e59 100644
--- a/docs/articles/per-user-rate-limits-using-db.mdx
+++ b/docs/articles/per-user-rate-limits-using-db.mdx
@@ -10,10 +10,9 @@ tags:
 ---
 
 In this example we show a more advanced implementation of
-[dynamic rate limiting](../articles/per-user-rate-limits-using-db.mdx). It uses
-a database lookup to get the customer details and combines that with the
-ZoneCache to improve performance, reduce latency and lower the load on the
-database.
+[dynamic rate limiting](../rate-limiting/dynamic-rate-limiting.mdx). It uses a
+database lookup to get the customer details and combines that with the ZoneCache
+to improve performance, reduce latency and lower the load on the database.
 
 In this example we use [Supabase](https://supabase.com) as the database but you
 could use your own API, [Xata](https://xata.io),
@@ -22,8 +21,8 @@ all.
 
 If you haven't already, check out the
 [rate-limiting policy](../policies/rate-limit-inbound.mdx) and the
-[dynamic rate limiting quickstart](../articles/per-user-rate-limits-using-db.mdx).
-Then you should be oriented to how dynamic rate limiting works.
+[dynamic rate limiting guide](../rate-limiting/dynamic-rate-limiting.mdx). Then
+you should be oriented to how dynamic rate limiting works.
 
 Below is a full implementation of a custom rate limiting function. In our
 example this is a module called `per-user-rate-limiting.ts`.
@@ -48,9 +47,18 @@ export async function rateLimitKey(
   context: ZuploContext,
   policyName: string,
 ): Promise<CustomRateLimitDetails> {
-  // We'll get the customer ID from the user data.
-  // This might be from a JWT or API Key metadata
-  const customerId = request.user.data.customerId;
+  // Get the customer ID from the user data.
+  // This might be from a JWT or API Key metadata.
+  // Ensure an authentication policy runs before this.
+  const customerId = request.user?.data?.customerId;
+  if (!customerId) {
+    context.log.error("No customerId found on request.user.data");
+    return {
+      key: request.user?.sub ?? "unknown",
+      requestsAllowed: FALLBACK_REQUESTS_ALLOWED,
+      timeWindowMinutes: 1,
+    };
+  }
 
   // We don't want to hit the database on every request
   // So we'll use the fast zone cache to cache this data
@@ -95,17 +103,21 @@ export async function rateLimitKey(
 The above function can be applied to a rate limiter with the following
 configuration in policies
 
-```json
+```json title="config/policies.json"
 {
-  "export": "RateLimitInboundPolicy",
-  "module": "$import(@zuplo/runtime)",
-  "options": {
-    "rateLimitBy": "function",
-    "requestsAllowed": 2,
-    "timeWindowMinutes": 1,
-    "identifier": {
-      "export": "rateLimitKey",
-      "module": "$import(./modules/per-user-rate-limiting)"
+  "name": "my-per-user-rate-limit-policy",
+  "policyType": "rate-limit-inbound",
+  "handler": {
+    "export": "RateLimitInboundPolicy",
+    "module": "$import(@zuplo/runtime)",
+    "options": {
+      "rateLimitBy": "function",
+      "requestsAllowed": 100,
+      "timeWindowMinutes": 1,
+      "identifier": {
+        "export": "rateLimitKey",
+        "module": "$import(./modules/per-user-rate-limiting)"
+      }
     }
   }
 }
diff --git a/docs/concepts/rate-limiting.md b/docs/concepts/rate-limiting.md
index f8c558f91..8b078e4ba 100644
--- a/docs/concepts/rate-limiting.md
+++ b/docs/concepts/rate-limiting.md
@@ -7,9 +7,13 @@ given time window. It protects your backend from traffic spikes, enforces fair
 usage across consumers, and enables tiered access for different customer plans.
 
 Zuplo's rate limiter uses a **sliding window algorithm** enforced globally
-across all edge locations. When a client exceeds the limit, they receive a
-`429 Too Many Requests` response with a `retry-after` header indicating when
-they can retry.
+across all edge locations. Unlike a fixed window algorithm (which resets
+counters at fixed intervals and can allow bursts at window boundaries), the
+sliding window continuously tracks requests over a rolling time period. This
+produces smoother, more predictable throttling behavior.
+
+When a client exceeds the limit, they receive a `429 Too Many Requests` response
+with a `retry-after` header indicating when they can retry.
 
 ## Rate limiting policies
 
@@ -73,12 +77,12 @@ example, counting compute units or tokens instead of raw requests).
 
 ## Choosing a policy
 
-| Scenario                                               | Policy                                        |
-| ------------------------------------------------------ | --------------------------------------------- |
-| Fixed requests-per-minute limit for all callers        | Rate Limiting                                 |
-| Different limits per customer tier (free vs. paid)     | Rate Limiting with a custom function          |
-| Counting multiple resources (requests + compute units) | Complex Rate Limiting                         |
-| Usage-based billing with variable cost per request     | Complex Rate Limiting with dynamic increments |
+| Scenario                                               | Policy                                                     |
+| ------------------------------------------------------ | ---------------------------------------------------------- |
+| Fixed requests-per-minute limit for all callers        | Rate Limiting                                              |
+| Different limits per customer tier (free vs. paid)     | Rate Limiting with a custom function                       |
+| Counting multiple resources (requests + compute units) | Complex Rate Limiting (enterprise)                         |
+| Usage-based billing with variable cost per request     | Complex Rate Limiting with dynamic increments (enterprise) |
 
 ## How `rateLimitBy` works
 
@@ -91,6 +95,15 @@ Groups requests by the client's IP address. No authentication is required. This
 is the simplest option and works well for public APIs or as a first layer of
 protection.
 
+:::caution
+
+Be aware that multiple clients behind the same corporate proxy, cloud NAT, or
+shared Wi-Fi network can share a single IP address. In these cases, IP-based
+rate limiting can unfairly throttle unrelated users. For authenticated APIs,
+prefer `rateLimitBy: "user"` instead.
+
+:::
+
 ### `user`
 
 Groups requests by the authenticated user's identity (`request.user.sub`). When
@@ -101,6 +114,14 @@ using JWT authentication, it comes from the token's `sub` claim.
 This is the recommended mode for authenticated APIs because it ties limits to
 the actual consumer rather than a shared IP address.
 
+:::note
+
+The `user` mode requires an authentication policy (such as API Key
+Authentication or JWT authentication) earlier in the policy pipeline. If no
+authenticated user is present on the request, the policy returns an error.
+
+:::
+
 ### `function`
 
 Groups requests using a custom TypeScript function that you provide. The
@@ -224,17 +245,30 @@ These serve different purposes:
 You can apply multiple rate limiting policies to the same route. For example,
 you might enforce both a per-minute and a per-hour limit. When using multiple
 policies, apply the longest time window first, followed by shorter durations.
+This ordering ensures that the broadest limit is checked first — if a caller has
+exhausted their hourly quota, the request is rejected immediately without
+incrementing the shorter-duration counter.
 
 ## Additional options
 
 Both rate limiting policies support the following additional options:
 
 - `headerMode` - Set to `"retry-after"` (default) to include the `retry-after`
-  header in 429 responses, or `"none"` to omit it
-- `mode` - Set to `"strict"` (default) for synchronous enforcement, or `"async"`
-  for non-blocking checks that may allow some requests over the limit
-- `throwOnFailure` - Set to `true` to return an error if the rate limit service
-  is unreachable, or `false` (default) to allow the request through
+  header in 429 responses, or `"none"` to omit it. The `retry-after` value is
+  returned as a number of seconds (delay-seconds format).
+- `mode` - Set to `"strict"` (default) or `"async"`. In **strict** mode, the
+  request is held until the rate limit check completes — the backend is never
+  called if the limit is exceeded. This adds some latency to every request
+  because the check hits a globally distributed rate limit service. In **async**
+  mode, the request proceeds to the backend in parallel with the rate limit
+  check. This minimizes added latency but means some requests may get through
+  even after the limit is exceeded. Async mode is a good fit when low latency
+  matters more than exact enforcement.
+- `throwOnFailure` - Controls behavior when the rate limit service is
+  unreachable. When set to `false` (default), requests are allowed through
+  (fail-open). When set to `true`, the policy returns an error to the client.
+  The fail-open default prevents a rate limit service outage from blocking all
+  traffic to your API.
 
 ## Related resources
 
diff --git a/docs/errors/rate-limit-exceeded.mdx b/docs/errors/rate-limit-exceeded.mdx
index 8ad81f8b3..18d7c27b2 100644
--- a/docs/errors/rate-limit-exceeded.mdx
+++ b/docs/errors/rate-limit-exceeded.mdx
@@ -4,6 +4,29 @@ title: Rate Limit Exceeded (RATE_LIMIT_EXCEEDED)
 
 The request was rejected because the client exceeded the configured rate limit.
 
+## Response format
+
+The 429 response uses the
+[Problem Details](../programmable-api/http-problems.mdx) format:
+
+```json
+{
+  "type": "https://httpproblems.com/http-status/429",
+  "title": "Too Many Requests",
+  "status": 429,
+  "detail": "Rate limit exceeded",
+  "instance": "/your-route",
+  "trace": {
+    "requestId": "4d54e4ee-c003-4d75-aba9-e09a6d707b08",
+    "timestamp": "2026-04-14T12:00:00.000Z",
+    "buildId": "ec44e831-3a02-467e-a26c-7e401e4473bf"
+  }
+}
+```
+
+If `headerMode` is set to `"retry-after"` (the default), the response includes a
+`Retry-After` header with the number of seconds to wait before retrying.
+
 ## Common Causes
 
 - **Too many requests** — The client sent more requests than the rate limit
@@ -24,8 +47,12 @@ The request was rejected because the client exceeded the configured rate limit.
 
 ## For API Operators
 
-- Review the rate limiting policy configuration in the route settings.
+- Review the rate limiting policy configuration in the route settings. Check the
+  `requestsAllowed` and `timeWindowMinutes` values and verify that the
+  `rateLimitBy` identifier is resolving correctly.
 - Consider using
-  [dynamic rate limiting](../articles/step-5-dynamic-rate-limiting.mdx) to set
+  [dynamic rate limiting](../rate-limiting/dynamic-rate-limiting.mdx) to set
   different limits per customer tier.
-- Check the rate limit metrics to determine if limits need adjustment.
+- Use your [logging integration](../articles/logging.mdx) to filter for 429
+  responses and identify which consumers are being throttled. Break down by user
+  or IP to spot noisy neighbors.
diff --git a/docs/rate-limiting/combining-policies.mdx b/docs/rate-limiting/combining-policies.mdx
new file mode 100644
index 000000000..2d1204305
--- /dev/null
+++ b/docs/rate-limiting/combining-policies.mdx
@@ -0,0 +1,293 @@
+---
+title: Combining Rate Limit Policies
+sidebar_label: Combining Policies
+description:
+  Apply multiple rate limits to the same route, combine rate limiting with
+  quotas, and design multi-layer protection strategies.
+---
+
+Real-world APIs rarely need just one rate limiting boundary. A payment endpoint
+might need a per-minute burst limit to protect against runaway scripts _and_ a
+per-hour cap to enforce fair usage. A monetized API might pair a monthly quota
+with a per-second spike guard. Zuplo supports all of these patterns by letting
+you stack multiple policies on the same route.
+
+## Multiple rate limits on one route
+
+You can apply two or more rate limiting policies to a single route. Each policy
+maintains its own counter independently, and the request must pass every policy
+to reach the backend.
+
+A common pattern is combining a short-window burst limit with a longer-window
+sustained limit. The following example enforces both a 1,000-requests-per-hour
+ceiling and a 100-requests-per-minute burst limit on the same route.
+
+### Define the policies
+
+```json title="config/policies.json"
+{
+  "policies": [
+    {
+      "name": "rate-limit-hourly",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 1000,
+          "timeWindowMinutes": 60
+        }
+      }
+    },
+    {
+      "name": "rate-limit-per-minute",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 100,
+          "timeWindowMinutes": 1
+        }
+      }
+    }
+  ]
+}
+```
+
+### Attach them to a route
+
+List both policies in the route's inbound pipeline. Place the longest time
+window first:
+
+```json title="config/routes.oas.json (excerpt)"
+{
+  "x-zuplo-route": {
+    "policies": {
+      "inbound": ["rate-limit-hourly", "rate-limit-per-minute"]
+    }
+  }
+}
+```
+
+:::tip
+
+Apply the longest time window first. If a caller already exhausted the hourly
+quota, the request is rejected immediately without incrementing the per-minute
+counter. This avoids wasting counter writes on requests that would fail anyway.
+
+:::
+
+Each policy tracks its own sliding window counter scoped by its `name`. A
+request that passes the hourly check still gets evaluated against the per-minute
+check. If either policy rejects the request, the client receives a
+`429 Too Many Requests` response.
+
+## Rate limiting vs. quotas
+
+Rate limiting and quotas both cap usage, but they solve different problems.
+
+| Aspect            | Rate limiting                                       | Quota                                        |
+| ----------------- | --------------------------------------------------- | -------------------------------------------- |
+| **Time window**   | Short: seconds, minutes, or hours                   | Long: hourly, daily, weekly, or monthly      |
+| **Purpose**       | Protect backends from traffic spikes                | Enforce billing-period usage caps            |
+| **Counter reset** | Sliding window rolls continuously                   | Fixed period anchored to a start date        |
+| **Typical use**   | "100 requests per minute per user"                  | "10,000 requests per month per subscription" |
+| **Policy**        | [Rate Limiting](../policies/rate-limit-inbound.mdx) | [Quota](../policies/quota-inbound.mdx)       |
+
+Use rate limiting when you need to smooth traffic and prevent bursts. Use quotas
+when you need to enforce a usage allowance over a billing cycle. In many APIs,
+you use both together: a monthly quota to cap total usage and a per-minute rate
+limit to prevent any single caller from overwhelming the backend within that
+quota.
+
+### Example: quota plus rate limit
+
+```json title="config/policies.json"
+{
+  "policies": [
+    {
+      "name": "monthly-quota",
+      "policyType": "quota-inbound",
+      "handler": {
+        "export": "QuotaInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "period": "monthly",
+          "quotaBy": "user",
+          "allowances": {
+            "requests": 10000
+          }
+        }
+      }
+    },
+    {
+      "name": "burst-rate-limit",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 100,
+          "timeWindowMinutes": 1
+        }
+      }
+    }
+  ]
+}
+```
+
+On the route, place the quota policy first so that callers who already used
+their monthly allowance are rejected before the rate limit counter is
+incremented:
+
+```json title="config/routes.oas.json (excerpt)"
+{
+  "x-zuplo-route": {
+    "policies": {
+      "inbound": ["monthly-quota", "burst-rate-limit"]
+    }
+  }
+}
+```
+
+## Rate limiting with monetization
+
+The [Monetization policy](../articles/monetization/monetization-policy.md)
+handles subscription validation, quota enforcement, and metering in one step. It
+already enforces billing-period usage limits tied to the customer's plan, so you
+do not need a separate quota policy on monetized routes.
+
+Rate limiting is still valuable alongside monetization. A customer with a 50,000
+requests-per-month plan could theoretically send all 50,000 requests in a single
+minute, which would overwhelm your backend even though it falls within the
+monthly allowance. Adding a rate limiting policy prevents that spike.
+
+```json title="config/routes.oas.json (excerpt)"
+{
+  "x-zuplo-route": {
+    "policies": {
+      "inbound": ["monetization-inbound", "rate-limit-per-minute"]
+    }
+  }
+}
+```
+
+:::note
+
+The monetization policy handles API key authentication internally. You do not
+need a separate `api-key-auth` policy on monetized routes. Place the
+monetization policy first so that `request.user` is populated before the rate
+limit policy runs.
+
+:::
+
+These two layers are complementary:
+
+- **Monetization** enforces monthly or billing-period usage limits and tracks
+  metered usage for billing.
+- **Rate limiting** enforces per-minute or per-second spike protection to keep
+  your backend healthy.
+
+## Counter scoping
+
+Rate limit counters are scoped by the policy's `name` field combined with the
+caller identifier (user, IP, or custom key). Understanding this scoping is
+important when you apply the same policy type to multiple routes.
+
+### Shared counters
+
+If two routes reference the same policy name, they share a counter. A caller who
+makes 60 requests to `/orders` and 40 requests to `/products` — both using a
+policy named `rate-limit-per-minute` — counts as 100 total requests against that
+policy's limit.
+
+```json title="config/routes.oas.json (excerpt)"
+{
+  "paths": {
+    "/orders": {
+      "get": {
+        "x-zuplo-route": {
+          "policies": {
+            "inbound": ["rate-limit-per-minute"]
+          }
+        }
+      }
+    },
+    "/products": {
+      "get": {
+        "x-zuplo-route": {
+          "policies": {
+            "inbound": ["rate-limit-per-minute"]
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+Shared counters are useful when you want a single global limit that applies
+across all routes for a given caller.
+
+### Independent counters
+
+To give each route its own counter, create separate policy instances with
+different names:
+
+```json title="config/policies.json"
+{
+  "policies": [
+    {
+      "name": "rate-limit-orders",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 100,
+          "timeWindowMinutes": 1
+        }
+      }
+    },
+    {
+      "name": "rate-limit-products",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 200,
+          "timeWindowMinutes": 1
+        }
+      }
+    }
+  ]
+}
+```
+
+Now a caller can make 100 requests per minute to `/orders` and 200 requests per
+minute to `/products` independently. Exhausting the orders limit does not affect
+the products limit.
+
+:::warning
+
+If you duplicate a policy definition and forget to change the `name`, both
+routes share the same counter. Always verify that policy names are distinct when
+you intend independent counters.
+
+:::
+
+## Related resources
+
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx)
+- [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
+- [Quota policy reference](../policies/quota-inbound.mdx)
+- [Monetization policy](../articles/monetization/monetization-policy.md)
+- [How rate limiting works](../concepts/rate-limiting.md)
+- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx)
diff --git a/docs/rate-limiting/dynamic-rate-limiting.mdx b/docs/rate-limiting/dynamic-rate-limiting.mdx
new file mode 100644
index 000000000..9612cb7d6
--- /dev/null
+++ b/docs/rate-limiting/dynamic-rate-limiting.mdx
@@ -0,0 +1,240 @@
+---
+title: Dynamic Rate Limiting
+sidebar_label: Dynamic Rate Limiting
+description:
+  Learn how to implement dynamic rate limiting with custom functions to apply
+  different limits based on customer tier, route, or any request property.
+---
+
+Static rate limits apply the same threshold to every caller. Dynamic rate
+limiting lets you determine limits at request time — so premium customers get
+higher throughput, free-tier users get a lower ceiling, and internal services
+can bypass limits entirely.
+
+Dynamic rate limiting works with both the
+[Rate Limiting policy](../policies/rate-limit-inbound.mdx) and the
+[Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx).
+
+## How it works
+
+When you set `rateLimitBy` to `"function"`, the policy calls a TypeScript
+function you provide on every request. That function returns a
+`CustomRateLimitDetails` object that tells the rate limiter:
+
+- **`key`** — The string used to group requests into buckets (e.g., a user ID or
+  API key consumer name).
+- **`requestsAllowed`** (optional) — Overrides the policy's default
+  `requestsAllowed` for this request.
+- **`timeWindowMinutes`** (optional) — Overrides the policy's default
+  `timeWindowMinutes` for this request.
+
+Returning `undefined` skips rate limiting for that request entirely.
+
+## Create a rate limit function
+
+Create a new module (for example `modules/rate-limit.ts`) with a function that
+inspects the request and returns the appropriate limits.
+
+The following example reads a `customerType` field from the authenticated user's
+metadata and applies different limits per tier:
+
+```ts title="modules/rate-limit.ts"
+import {
+  CustomRateLimitDetails,
+  ZuploContext,
+  ZuploRequest,
+} from "@zuplo/runtime";
+
+export function rateLimit(
+  request: ZuploRequest,
+  context: ZuploContext,
+  policyName: string,
+): CustomRateLimitDetails | undefined {
+  const user = request.user;
+
+  // Premium customers get 1000 requests per minute
+  if (user.data.customerType === "premium") {
+    return {
+      key: user.sub,
+      requestsAllowed: 1000,
+      timeWindowMinutes: 1,
+    };
+  }
+
+  // Free customers get 50 requests per minute
+  if (user.data.customerType === "free") {
+    return {
+      key: user.sub,
+      requestsAllowed: 50,
+      timeWindowMinutes: 1,
+    };
+  }
+
+  // Default for any other customer type
+  return {
+    key: user.sub,
+    requestsAllowed: 100,
+    timeWindowMinutes: 1,
+  };
+}
+```
+
+:::tip
+
+When using [API key authentication](../articles/api-key-authentication.mdx), the
+`user.data` object contains the metadata you set when creating the API key
+consumer. When using JWT authentication, it contains the decoded token claims.
+
+:::
+
+## Configure the policy
+
+Wire the function into the rate limiting policy by setting `rateLimitBy` to
+`"function"` and pointing the `identifier` option at your module:
+
+```json title="config/policies.json"
+{
+  "name": "my-dynamic-rate-limit-policy",
+  "policyType": "rate-limit-inbound",
+  "handler": {
+    "export": "RateLimitInboundPolicy",
+    "module": "$import(@zuplo/runtime)",
+    "options": {
+      "rateLimitBy": "function",
+      "requestsAllowed": 100,
+      "timeWindowMinutes": 1,
+      "identifier": {
+        "export": "rateLimit",
+        "module": "$import(./modules/rate-limit)"
+      }
+    }
+  }
+}
+```
+
+The `requestsAllowed` and `timeWindowMinutes` values in the policy configuration
+serve as defaults. Your function can override them per request, or omit them to
+use the defaults.
+
+## Common patterns
+
+### Tier-based limits from API key metadata
+
+Store a `plan` or `customerType` field in your API key consumer metadata, then
+branch on it in your rate limit function. This is the simplest approach and
+requires no external lookups.
+
+### Route-based limits
+
+Use `request.url` or `request.params` to apply different limits to different
+endpoints. For example, a search endpoint might allow 10 requests per minute
+while a read endpoint allows 100.
+
+```ts
+export function rateLimit(
+  request: ZuploRequest,
+  context: ZuploContext,
+  policyName: string,
+): CustomRateLimitDetails | undefined {
+  const isSearch = new URL(request.url).pathname.includes("/search");
+
+  return {
+    key: request.user.sub,
+    requestsAllowed: isSearch ? 10 : 100,
+    timeWindowMinutes: 1,
+  };
+}
+```
+
+### Method-based limits
+
+Apply different limits to read operations (GET) vs. write operations (POST, PUT,
+DELETE). Write-heavy endpoints often need tighter limits to protect backends:
+
+```ts
+export function rateLimit(
+  request: ZuploRequest,
+  context: ZuploContext,
+  policyName: string,
+): CustomRateLimitDetails | undefined {
+  const isWrite = ["POST", "PUT", "DELETE", "PATCH"].includes(request.method);
+
+  return {
+    key: request.user.sub,
+    requestsAllowed: isWrite ? 20 : 200,
+    timeWindowMinutes: 1,
+  };
+}
+```
+
+### Database-driven limits
+
+For limits that change frequently or are managed outside your gateway
+configuration, look them up from a database at request time. Use the
+[ZoneCache](../programmable-api/zone-cache.mdx) to avoid hitting the database on
+every request.
+
+See
+[Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+for a complete example using Supabase and ZoneCache.
+
+### Skip rate limiting for specific requests
+
+Return `undefined` to bypass rate limiting entirely. This is useful for health
+checks, internal services, or admin users:
+
+```ts
+export function rateLimit(
+  request: ZuploRequest,
+  context: ZuploContext,
+  policyName: string,
+): CustomRateLimitDetails | undefined {
+  if (request.user.data.role === "admin") {
+    return undefined;
+  }
+
+  return {
+    key: request.user.sub,
+    requestsAllowed: 100,
+    timeWindowMinutes: 1,
+  };
+}
+```
+
+## Testing
+
+To verify that dynamic limits are applied correctly, create API key consumers
+with different metadata values (for example, one with
+`{"customerType": "premium"}` and one with `{"customerType": "free"}`).
+
+Make requests with each key until you receive a `429 Too Many Requests`
+response. For example, with a free-tier key limited to 50 requests per minute:
+
+```bash
+# Replace with your API URL and key
+for i in $(seq 1 55); do
+  curl -s -o /dev/null -w "%{http_code}\n" \
+    -H "Authorization: Bearer YOUR_API_KEY" \
+    https://your-api.zuplo.dev/your-route
+done
+```
+
+The first 50 requests return `200`. Requests 51-55 return `429` with a
+`Retry-After` header. Repeat with the premium key and confirm the higher limit
+applies.
+
+:::tip
+
+Rate limit counters are per-environment. Preview and development environments
+have their own counters separate from production, so testing does not affect
+production limits.
+
+:::
+
+## Related resources
+
+- [How rate limiting works](../concepts/rate-limiting.md) — Full explanation of
+  `rateLimitBy` modes and configuration options
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx)
+- [Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+  — Advanced example with database lookups and caching
diff --git a/docs/rate-limiting/getting-started.mdx b/docs/rate-limiting/getting-started.mdx
new file mode 100644
index 000000000..a8811b48a
--- /dev/null
+++ b/docs/rate-limiting/getting-started.mdx
@@ -0,0 +1,163 @@
+---
+title: Getting Started with Rate Limiting
+sidebar_label: Getting Started
+description:
+  Add rate limiting to an existing Zuplo API gateway project in minutes.
+---
+
+This guide walks you through adding a basic rate limiting policy to an existing
+Zuplo project, attaching it to a route, and verifying that it works. For
+background on how rate limiting works in Zuplo, see the
+[Introduction](./overview.mdx).
+
+## Prerequisites
+
+- An existing Zuplo project with at least one route configured in
+  `config/routes.oas.json`
+- The [Zuplo CLI](../cli/overview.mdx) installed, or access to the
+  [Zuplo Portal](https://portal.zuplo.com)
+
+## 1. Add the policy
+
+Open `config/policies.json` and add a rate limiting policy to the `policies`
+array. This example limits each IP address to 2 requests per minute, which makes
+it easy to test.
+
+```json title="config/policies.json"
+{
+  "policies": [
+    {
+      "name": "rate-limit-inbound",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "ip",
+          "requestsAllowed": 2,
+          "timeWindowMinutes": 1
+        }
+      }
+    }
+  ]
+}
+```
+
+The key options are:
+
+- **`rateLimitBy`** -- How to group requests into rate limit buckets. `"ip"`
+  groups by the caller's IP address and requires no authentication.
+- **`requestsAllowed`** -- The maximum number of requests allowed in the time
+  window.
+- **`timeWindowMinutes`** -- The length of the sliding time window in minutes.
+
+:::tip
+
+If your project already has other policies in `config/policies.json`, add the
+rate limiting entry to the existing `policies` array rather than replacing it.
+
+:::
+
+## 2. Attach the policy to a route
+
+Open `config/routes.oas.json` and add the policy name to the `policies.inbound`
+array inside the `x-zuplo-route` object of the route you want to protect.
+
+```json title="config/routes.oas.json"
+{
+  "paths": {
+    "/my-route": {
+      "get": {
+        "operationId": "get-my-route",
+        "x-zuplo-route": {
+          "corsPolicy": "anything-goes",
+          "handler": {
+            "export": "urlForwardHandler",
+            "module": "$import(@zuplo/runtime)",
+            "options": {
+              "baseUrl": "https://api.example.com"
+            }
+          },
+          "policies": {
+            "inbound": ["rate-limit-inbound"]
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+The `"rate-limit-inbound"` string must match the `name` field from the policy
+you defined in `config/policies.json`. When a request hits this route, Zuplo
+runs each inbound policy in array order before forwarding to the handler.
+
+:::note
+
+You can attach the same policy to multiple routes. Add its name to the
+`policies.inbound` array on each route that needs rate limiting.
+
+:::
+
+## 3. Test the rate limit
+
+Start your local dev server (or deploy to a Zuplo environment) and send requests
+to the protected route. With the configuration above, the third request within a
+one-minute window returns a `429` response.
+
+:::note
+
+Rate limiting requires a connection to Zuplo cloud services. If you have not
+already done so, link your local project by running `npx zuplo link` and
+selecting an environment. See
+[Connecting to Zuplo Services Locally](../articles/local-development-services.mdx)
+for details.
+
+:::
+
+```bash
+# Send three requests in quick succession
+for i in 1 2 3; do
+  echo "--- Request $i ---"
+  curl -s -w "\nHTTP Status: %{http_code}\n" http://localhost:9000/my-route
+done
+```
+
+The first two requests return a `200` response from your upstream service. The
+third request returns a `429 Too Many Requests` response in
+[Problem Details](https://httpproblems.com) format:
+
+```json
+{
+  "type": "https://httpproblems.com/http-status/429",
+  "title": "Too Many Requests",
+  "status": 429,
+  "detail": "Rate limit exceeded",
+  "instance": "/my-route",
+  "trace": {
+    "requestId": "4d54e4ee-c003-4d75-aba9-e09a6d707b08",
+    "timestamp": "2026-04-14T12:00:00.000Z",
+    "buildId": "ec44e831-3a02-467e-a26c-7e401e4473bf"
+  }
+}
+```
+
+The response also includes a `Retry-After` header with the number of seconds
+until the client can send another request (for example, `Retry-After: 42`).
+
+:::tip
+
+Set `requestsAllowed` to a low number like `2` during development so you can
+trigger the limit quickly. Increase it before deploying to production.
+
+:::
+
+## Next steps
+
+- [How rate limiting works](../concepts/rate-limiting.md) -- Deep dive into
+  `rateLimitBy` modes, the sliding window algorithm, and advanced configuration
+  options
+- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) -- Vary rate limits per
+  caller using a custom TypeScript function
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) -- Full
+  list of policy options including `mode`, `headerMode`, and `throwOnFailure`
diff --git a/docs/rate-limiting/monitoring-and-troubleshooting.mdx b/docs/rate-limiting/monitoring-and-troubleshooting.mdx
new file mode 100644
index 000000000..66569fbbe
--- /dev/null
+++ b/docs/rate-limiting/monitoring-and-troubleshooting.mdx
@@ -0,0 +1,243 @@
+---
+title: Monitoring and Troubleshooting Rate Limits
+sidebar_label: Monitoring & Troubleshooting
+description:
+  Monitor rate limit events, debug unexpected 429 responses, and understand
+  failure modes.
+---
+
+Rate limiting only delivers value when you can observe it in action. Without
+visibility into which consumers hit limits, how often requests are rejected, and
+whether the rate limit service itself is healthy, you are operating blind. This
+guide covers how to monitor rate limit activity, understand failure modes,
+choose the right enforcement mode, and diagnose common issues.
+
+## Monitoring rate limit events
+
+Zuplo produces structured logs for every request, including those rejected with
+a `429 Too Many Requests` status code. Ship these logs to an external provider
+to build dashboards and alerts around rate limit activity.
+
+### Setting up log shipping
+
+Configure a [logging plugin](../articles/logging.mdx) in your `zuplo.runtime.ts`
+file to send logs to your observability platform. Zuplo supports AWS CloudWatch,
+Datadog, Dynatrace, Google Cloud Logging, Loki, New Relic, Splunk, Sumo Logic,
+and VMware Log Insight. You can also build a
+[custom logging plugin](../articles/custom-logging-example.mdx) for unsupported
+providers.
+
+### Filtering for rate-limited requests
+
+Every log entry includes default fields you can filter on:
+
+- **`requestId`** -- Correlate a specific rejected request end-to-end using the
+  `zp-rid` response header.
+- **`environment`** and **`environmentStage`** -- Distinguish between
+  `production`, `preview`, and `working-copy` environments.
+
+To break down rate-limited requests by consumer or IP, add custom log properties
+in a policy that runs before or alongside the rate limit check:
+
+```ts
+import { ZuploContext, ZuploRequest } from "@zuplo/runtime";
+
+export default async function policy(
+  request: ZuploRequest,
+  context: ZuploContext,
+) {
+  // Tag every log entry with the consumer identity for filtering
+  context.log.setLogProperties!({
+    rateLimitIdentity:
+      request.user?.sub ?? request.headers.get("true-client-ip") ?? "unknown",
+  });
+  return request;
+}
+```
+
+This adds a `rateLimitIdentity` field to all log entries for the request, making
+it straightforward to group 429 responses by consumer in your logging dashboard.
+
+### Setting up alerts
+
+Configure alerts in your logging provider for the following conditions:
+
+- **Spike in 429 responses** -- A sudden increase may indicate a
+  misconfiguration, an attack, or a legitimate traffic surge.
+- **429 rate exceeding a threshold** -- If more than a small percentage of
+  requests return 429, the rate limit may be set too low for normal traffic.
+- **Zero 429 responses over an extended period** -- If you expect rate limiting
+  to be active but see no rejections, the policy may not be attached to the
+  correct routes.
+
+### Metrics plugins
+
+For quantitative monitoring, Zuplo supports
+[metrics plugins](../articles/metrics-plugins.mdx) that send request latency,
+request size, and response size data to Datadog, Dynatrace, New Relic, or any
+OpenTelemetry-compatible collector. While these metrics do not track rate limit
+counters directly, the `statusCode` dimension (when enabled) allows you to chart
+429 response rates alongside overall request volume.
+
+## Understanding failure modes
+
+The rate limiting policies depend on a globally distributed rate limit service
+to track request counters. Understanding what happens when that service is
+unreachable helps you make the right availability tradeoff.
+
+### Fail-open (default)
+
+By default, `throwOnFailure` is set to `false`. If the rate limit service is
+unreachable, the policy allows the request through. This fail-open behavior
+prevents a rate limit service outage from blocking all traffic to your API.
+
+The tradeoff is that during an outage, rate limits are not enforced and clients
+can exceed their configured thresholds.
+
+### Fail-closed
+
+Set `throwOnFailure` to `true` to return an error when the rate limit service is
+unreachable. This guarantees that no request bypasses rate limiting, but it
+means a service disruption blocks all traffic on routes using that policy.
+
+```json
+{
+  "options": {
+    "rateLimitBy": "user",
+    "requestsAllowed": 100,
+    "timeWindowMinutes": 1,
+    "throwOnFailure": true
+  }
+}
+```
+
+:::warning
+
+Only use `throwOnFailure: true` when allowing unlimited traffic is more
+dangerous than rejecting all traffic. For most APIs, the fail-open default is
+the safer choice.
+
+:::
+
+### Detecting fail-open conditions
+
+Because fail-open requests succeed with a `200` (or other normal status code),
+they do not produce a 429 log entry. To detect when the rate limit service is
+unreachable, monitor for a sudden drop in 429 responses during periods when you
+expect rate limiting to be active. A complete absence of 429s alongside steady
+or increasing traffic volume is a strong signal that the service is in fail-open
+mode.
+
+## Strict vs. async mode in production
+
+The `mode` option controls whether the rate limit check blocks the request or
+runs in parallel with it.
+
+### Strict mode (default)
+
+In `strict` mode, every request waits for the rate limit service to confirm
+whether the request is within limits before proceeding to the backend. This
+provides exact enforcement -- no request exceeds the configured threshold.
+
+The tradeoff is added latency on every request due to the round-trip to the rate
+limit service.
+
+### Async mode
+
+In `async` mode, the request proceeds to the backend immediately while the rate
+limit check runs in parallel. If the check determines the limit is exceeded, the
+result applies to the _next_ request, not the current one.
+
+This means some requests may get through after the limit is reached. In
+practice, the overshoot depends on your request rate and the latency of the rate
+limit check. For an API receiving 100 requests per second with a 10ms check
+time, approximately one extra request may slip through per window.
+
+:::tip
+
+Use `async` mode when low latency matters more than exact enforcement -- for
+example, on high-throughput public endpoints where a few extra requests over the
+limit are acceptable. Use `strict` mode when precise enforcement is required,
+such as billing-sensitive endpoints or APIs with hard backend capacity limits.
+
+:::
+
+## Common troubleshooting scenarios
+
+### Unexpected 429 responses
+
+**Shared IP addresses.** When `rateLimitBy` is set to `"ip"`, multiple clients
+behind the same corporate proxy, cloud NAT, or shared Wi-Fi share a single rate
+limit bucket. One heavy user exhausts the limit for everyone on that IP. Switch
+to `rateLimitBy: "user"` for authenticated APIs to avoid this.
+
+**Missing authentication policy.** The `"user"` mode requires an authentication
+policy (such as API Key Authentication or JWT) earlier in the policy pipeline to
+populate `request.user`. If no authentication policy runs first, the rate limit
+policy returns an error instead of applying per-user limits. Verify that
+authentication appears before rate limiting in the route's inbound policy list.
+
+**Multiple rate limit policies on the same route.** If a route has both a
+per-minute and a per-hour rate limit policy, a request can be rejected by either
+one. Check all rate limit policies attached to the route, and verify the
+ordering (longest time window first, then shorter durations).
+
+**Lower limits than expected.** If you use a custom `rateLimitBy: "function"`,
+verify that the function returns the expected `requestsAllowed` and
+`timeWindowMinutes` values. Log the returned values during development to
+confirm the function resolves correctly for each consumer.
+
+### Rate limits not applying
+
+**Policy not attached to the route.** Defining a rate limit policy in
+`policies.json` does not activate it. The policy name must appear in the
+`policies.inbound` array of each route in `routes.oas.json` where you want it
+enforced. Verify the route configuration.
+
+**Typo in the policy name.** The policy name in `routes.oas.json` must exactly
+match the `name` field in `policies.json`. A mismatched name silently skips the
+policy. Check for case sensitivity and extra whitespace.
+
+**Custom function returning `undefined`.** When `rateLimitBy` is set to
+`"function"` and the identifier function returns `undefined`, rate limiting is
+skipped for that request entirely. This is by design -- it allows you to
+selectively exempt certain requests -- but it can cause confusion if the
+function has an unhandled code path that returns `undefined` unintentionally.
+
+### Different behavior across environments
+
+Rate limit counters are scoped per environment. Production, preview, and
+working-copy environments each maintain their own separate counters. A request
+that is rate-limited in production does not affect the counter in a preview
+environment, and vice versa.
+
+This means:
+
+- Testing rate limits in a preview branch does not interfere with production
+  traffic.
+- Rate limit thresholds you observe in a low-traffic preview environment may
+  behave differently under production load.
+- After deploying a new environment, counters start fresh.
+
+:::note
+
+If you observe rate limits triggering in one environment but not another,
+confirm that both environments use the same policy configuration and that the
+traffic volume is comparable.
+
+:::
+
+## Related resources
+
+- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) --
+  Understanding the 429 response format and client-side remediation
+- [How rate limiting works](../concepts/rate-limiting.md) -- Algorithm details,
+  `rateLimitBy` modes, and combining policies
+- [Logging](../articles/logging.mdx) -- Configuring log shipping to external
+  providers
+- [Metrics Plugins](../articles/metrics-plugins.mdx) -- Sending request metrics
+  to Datadog, Dynatrace, New Relic, or OpenTelemetry
+- [Proactive monitoring](../articles/monitoring-your-gateway.mdx) -- Health
+  checks and end-to-end gateway monitoring
+- [Troubleshooting](../articles/troubleshooting.md) -- General gateway
+  troubleshooting guide
diff --git a/docs/rate-limiting/overview.mdx b/docs/rate-limiting/overview.mdx
new file mode 100644
index 000000000..4219c6758
--- /dev/null
+++ b/docs/rate-limiting/overview.mdx
@@ -0,0 +1,63 @@
+---
+title: Rate Limiting
+sidebar_label: Introduction
+---
+
+Rate limiting controls how many requests a client can make to your API within a
+given time window. It protects your backend from traffic spikes, enforces fair
+usage across consumers, and enables tiered access for different customer plans.
+
+Zuplo's rate limiter uses a sliding window algorithm enforced globally across
+all edge locations. When a client exceeds the limit, they receive a
+`429 Too Many Requests` response with a `Retry-After` header indicating when
+they can retry.
+
+## Policies
+
+Zuplo provides two rate limiting policies.
+
+The [Rate Limiting policy](../policies/rate-limit-inbound.mdx) enforces a single
+request counter per time window. Use it when you need a straightforward "X
+requests per Y minutes" limit.
+
+The [Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
+supports multiple named counters in a single policy, each tracking a different
+resource or unit of work. Use it for usage-based pricing where different
+endpoints consume different amounts of a resource. This policy requires an
+enterprise plan.
+
+## Dynamic rate limiting
+
+Static rate limits apply the same threshold to every caller. With dynamic rate
+limiting, you provide a custom TypeScript function that determines limits at
+request time — so premium customers get higher throughput, free-tier users get a
+lower ceiling, and internal services can bypass limits entirely.
+
+- [Dynamic Rate Limiting guide](./dynamic-rate-limiting.mdx) — Implement custom
+  rate limit functions
+- [Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+  — Advanced example using database lookups and the ZoneCache
+
+## Rate limiting and monetization
+
+If you use Zuplo's
+[Monetization](../articles/monetization/monetization-policy.md) feature, the
+monetization policy handles quota enforcement based on subscription plans. You
+can still add a rate limiting policy to provide per-second or per-minute spike
+protection on top of monthly billing quotas. These serve different purposes:
+
+- **Monetization quotas** enforce monthly or billing-period usage limits tied to
+  a subscription plan
+- **Rate limiting** protects against short-duration traffic spikes that could
+  overwhelm your backend
+
+## Next steps
+
+- [How rate limiting works](../concepts/rate-limiting.md) — Deep dive into the
+  algorithm, `rateLimitBy` modes, configuration options, and combining policies
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) —
+  Configuration reference
+- [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
+  — Multi-counter configuration reference
+- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) —
+  Troubleshooting 429 responses
diff --git a/sidebar.ts b/sidebar.ts
index e49a0ab4e..d64e0c2d7 100644
--- a/sidebar.ts
+++ b/sidebar.ts
@@ -446,6 +446,35 @@ export const documentation: Navigation = [
       "articles/api-key-buckets",
     ],
   },
+  {
+    type: "category",
+    label: "Rate Limiting",
+    icon: "gauge",
+    items: [
+      "rate-limiting/overview",
+      "rate-limiting/getting-started",
+      "concepts/rate-limiting",
+      {
+        type: "category",
+        label: "Policies",
+        items: [
+          "policies/rate-limit-inbound",
+          "policies/complex-rate-limit-inbound",
+        ],
+      },
+      {
+        type: "category",
+        label: "Guides",
+        items: [
+          "rate-limiting/dynamic-rate-limiting",
+          "rate-limiting/combining-policies",
+          "articles/per-user-rate-limits-using-db",
+          "rate-limiting/monitoring-and-troubleshooting",
+        ],
+      },
+      "errors/rate-limit-exceeded",
+    ],
+  },
   {
     type: "category",
     label: "MCP Server",

From 36bf3530290a0754d1fc3a03e2483ab42788c042 Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Wed, 15 Apr 2026 16:31:11 +0200
Subject: [PATCH 2/6] Expand rate limiting docs from gap review and consolidate
 section

Tier 1 improvements from a junior-dev + staff-engineer gap review of the
rate limiting documentation:

- Add "Choosing your approach" decision table and numbered reading order
  to the overview so a first-time reader lands on the right next page.
- Add production-limit sizing guidance (with reference figures from
  Stripe, GitHub, Twilio, Shopify) and a full authenticated-user pipeline
  example with a two-key curl test to Getting Started.
- Add a "Key terms" section to the concepts deep-dive that defines
  counter/bucket, rate limit key, and the identifier-vs-key distinction.
- Promote the shared-counter-name warning from Combining Policies into
  Getting Started so the footgun shows up the first time a reader
  defines a policy name.
- Move the npx zuplo link prerequisite from a :::note in step 3 up to
  Prerequisites where it is actually read.

Consolidate rate-limiting files into a single section:

- Move docs/concepts/rate-limiting.md to
  docs/rate-limiting/how-it-works.md.
- Move docs/articles/per-user-rate-limits-using-db.mdx to
  docs/rate-limiting/per-user-rate-limits-using-db.mdx.
- Update sidebar entries and all internal cross-links across the
  rate-limiting guides, the concepts deep-dive, and the zone-cache
  reference page.
- Add five vercel.json redirects covering both the old HTML URLs and
  their .md variants, and fix the existing chained redirect from
  /docs/examples/per-user-rate-limits-using-db so it points directly to
  the new location.

Park Tier 2/3 items that need SME input (distributed-system semantics,
latency quantification, throwOnFailure framing, standards headers,
rollout/observability guides) in rate-limiting-docs-followups.md.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/programmable-api/zone-cache.mdx          |   2 +-
 docs/rate-limiting/combining-policies.mdx     |   2 +-
 docs/rate-limiting/dynamic-rate-limiting.mdx  |   6 +-
 docs/rate-limiting/getting-started.mdx        | 162 +++++++++++++++---
 .../how-it-works.md}                          |  28 ++-
 .../monitoring-and-troubleshooting.mdx        |   2 +-
 docs/rate-limiting/overview.mdx               |  56 +++++-
 .../per-user-rate-limits-using-db.mdx         |  10 +-
 rate-limiting-docs-followups.md               | 147 ++++++++++++++++
 sidebar.guides.ts                             |   2 +-
 sidebar.ts                                    |   8 +-
 vercel.json                                   |  22 ++-
 12 files changed, 396 insertions(+), 51 deletions(-)
 rename docs/{concepts/rate-limiting.md => rate-limiting/how-it-works.md} (87%)
 rename docs/{articles => rate-limiting}/per-user-rate-limits-using-db.mdx (90%)
 create mode 100644 rate-limiting-docs-followups.md

diff --git a/docs/programmable-api/zone-cache.mdx b/docs/programmable-api/zone-cache.mdx
index de4f546ab..b2e6389b9 100644
--- a/docs/programmable-api/zone-cache.mdx
+++ b/docs/programmable-api/zone-cache.mdx
@@ -13,7 +13,7 @@ time-to-live (TTL) after which it expires and is removed from the cache. Each
 cached object can be up to 512 MB in size.
 
 There's an demonstration of ZoneCache use in the
-[Per User Rate Limits Using a Database](../articles/per-user-rate-limits-using-db.mdx)
+[Per User Rate Limits Using a Database](../rate-limiting/per-user-rate-limits-using-db.mdx)
 example.
 
 ## Constructor
diff --git a/docs/rate-limiting/combining-policies.mdx b/docs/rate-limiting/combining-policies.mdx
index 2d1204305..81d1acf3d 100644
--- a/docs/rate-limiting/combining-policies.mdx
+++ b/docs/rate-limiting/combining-policies.mdx
@@ -289,5 +289,5 @@ you intend independent counters.
 - [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
 - [Quota policy reference](../policies/quota-inbound.mdx)
 - [Monetization policy](../articles/monetization/monetization-policy.md)
-- [How rate limiting works](../concepts/rate-limiting.md)
+- [How rate limiting works](./how-it-works.md)
 - [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx)
diff --git a/docs/rate-limiting/dynamic-rate-limiting.mdx b/docs/rate-limiting/dynamic-rate-limiting.mdx
index 9612cb7d6..6d9d32226 100644
--- a/docs/rate-limiting/dynamic-rate-limiting.mdx
+++ b/docs/rate-limiting/dynamic-rate-limiting.mdx
@@ -175,7 +175,7 @@ configuration, look them up from a database at request time. Use the
 every request.
 
 See
-[Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+[Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
 for a complete example using Supabase and ZoneCache.
 
 ### Skip rate limiting for specific requests
@@ -233,8 +233,8 @@ production limits.
 
 ## Related resources
 
-- [How rate limiting works](../concepts/rate-limiting.md) — Full explanation of
+- [How rate limiting works](./how-it-works.md) — Full explanation of
   `rateLimitBy` modes and configuration options
 - [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx)
-- [Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+- [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
   — Advanced example with database lookups and caching
diff --git a/docs/rate-limiting/getting-started.mdx b/docs/rate-limiting/getting-started.mdx
index a8811b48a..dab9a3fed 100644
--- a/docs/rate-limiting/getting-started.mdx
+++ b/docs/rate-limiting/getting-started.mdx
@@ -13,9 +13,15 @@ background on how rate limiting works in Zuplo, see the
 ## Prerequisites
 
 - An existing Zuplo project with at least one route configured in
-  `config/routes.oas.json`
+  `config/routes.oas.json`.
 - The [Zuplo CLI](../cli/overview.mdx) installed, or access to the
-  [Zuplo Portal](https://portal.zuplo.com)
+  [Zuplo Portal](https://portal.zuplo.com).
+- To test rate limiting locally, the project must be linked to a Zuplo
+  environment. Run `npx zuplo link` once in the project directory and select an
+  environment. Rate limiting uses a globally distributed counter service, so an
+  unlinked local project cannot enforce limits. See
+  [Connecting to Zuplo Services Locally](../articles/local-development-services.mdx)
+  for more detail.
 
 ## 1. Add the policy
 
@@ -58,6 +64,18 @@ rate limiting entry to the existing `policies` array rather than replacing it.
 
 :::
 
+:::warning
+
+The `name` field (`rate-limit-inbound` above) is what scopes the counter. Every
+route that references this exact name shares the same counter. If you later copy
+this policy block to create a second limit, change the `name` — a forgotten
+rename silently merges two unrelated limits into one. Policy names must also
+match exactly between `config/policies.json` and `config/routes.oas.json`; a
+typo there causes the policy to be skipped without any error. See
+[Counter scoping](./combining-policies.mdx#counter-scoping) for the full rules.
+
+:::
+
 ## 2. Attach the policy to a route
 
 Open `config/routes.oas.json` and add the policy name to the `policies.inbound`
@@ -105,16 +123,6 @@ Start your local dev server (or deploy to a Zuplo environment) and send requests
 to the protected route. With the configuration above, the third request within a
 one-minute window returns a `429` response.
 
-:::note
-
-Rate limiting requires a connection to Zuplo cloud services. If you have not
-already done so, link your local project by running `npx zuplo link` and
-selecting an environment. See
-[Connecting to Zuplo Services Locally](../articles/local-development-services.mdx)
-for details.
-
-:::
-
 ```bash
 # Send three requests in quick succession
 for i in 1 2 3; do
@@ -145,19 +153,129 @@ third request returns a `429 Too Many Requests` response in
 The response also includes a `Retry-After` header with the number of seconds
 until the client can send another request (for example, `Retry-After: 42`).
 
-:::tip
+## 4. Choose production limits
+
+The `requestsAllowed: 2` value above exists so the limit triggers on your third
+curl. Production APIs need numbers that reflect real usage. There is no single
+right answer, but these reference points from widely used APIs are a useful
+starting point:
+
+| API     | Typical per-consumer limit                                 |
+| ------- | ---------------------------------------------------------- |
+| Stripe  | 100 read and 100 write requests per second per account     |
+| GitHub  | 5,000 authenticated requests per hour per user             |
+| Twilio  | 100 requests per second per account (varies by resource)   |
+| Shopify | 40 requests per app per store (bucket refills at 2/second) |
+
+When sizing your own limit, consider three inputs:
+
+- **What your backend can sustain.** Start from a conservative fraction of your
+  backend's measured capacity so that a single caller cannot exhaust it.
+- **What legitimate callers actually do.** If p99 usage for your best customers
+  is 10 requests per minute, a 100-per-minute limit leaves headroom without
+  being permissive.
+- **How your customers are structured.** Per-API-key limits usually give tighter
+  control than per-IP; a single corporate IP can hide dozens of real users.
+
+It is almost always easier to _raise_ a limit in response to a support ticket
+than to _lower_ one that customers have started relying on. When in doubt, start
+low, measure, and increase.
+
+## 5. Rate limit authenticated users
+
+IP-based limits are a good first layer but they penalise every user behind a
+shared NAT or corporate proxy. For an authenticated API, limit per consumer
+instead. This requires an authentication policy earlier in the pipeline so that
+`request.user` is populated before the rate limit policy runs.
+
+The full policies configuration looks like this:
+
+```json title="config/policies.json"
+{
+  "policies": [
+    {
+      "name": "api-key-auth",
+      "policyType": "api-key-inbound",
+      "handler": {
+        "export": "ApiKeyInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "allowUnauthenticatedRequests": false
+        }
+      }
+    },
+    {
+      "name": "rate-limit-per-user",
+      "policyType": "rate-limit-inbound",
+      "handler": {
+        "export": "RateLimitInboundPolicy",
+        "module": "$import(@zuplo/runtime)",
+        "options": {
+          "rateLimitBy": "user",
+          "requestsAllowed": 60,
+          "timeWindowMinutes": 1
+        }
+      }
+    }
+  ]
+}
+```
+
+Attach both policies to the route, with authentication first so the rate limit
+policy has a user to group by:
+
+```json title="config/routes.oas.json (excerpt)"
+{
+  "x-zuplo-route": {
+    "policies": {
+      "inbound": ["api-key-auth", "rate-limit-per-user"]
+    }
+  }
+}
+```
+
+Create two API keys in the Zuplo Portal (or with the CLI) so you can verify that
+each consumer has its own counter. Then send requests with each key:
+
+```bash
+# Replace with the tokens from your two API keys.
+KEY_A="zpka_xxxxxxxxxxxxxxxxxxxxxx"
+KEY_B="zpka_yyyyyyyyyyyyyyyyyyyyyy"
+
+# Burn through the limit on key A; key B should still succeed.
+for i in $(seq 1 61); do
+  curl -s -o /dev/null -w "A #$i: %{http_code}\n" \
+    -H "Authorization: Bearer $KEY_A" \
+    http://localhost:9000/my-route
+done
+
+curl -s -w "\nB #1: %{http_code}\n" \
+  -H "Authorization: Bearer $KEY_B" \
+  http://localhost:9000/my-route
+```
+
+Requests 1–60 for key A return `200`, request 61 returns `429`, and the first
+request for key B still returns `200`. That confirms the counter is scoped to
+each consumer, not shared across the API key pool.
+
+:::note
 
-Set `requestsAllowed` to a low number like `2` during development so you can
-trigger the limit quickly. Increase it before deploying to production.
+See [API Key Authentication](../articles/api-key-authentication.mdx) for the
+full walkthrough of creating and managing API keys. If you use JWT
+authentication instead, replace the `api-key-auth` policy with your JWT policy —
+the rate limit policy works the same way as long as `request.user.sub` is
+populated.
 
 :::
 
 ## Next steps
 
-- [How rate limiting works](../concepts/rate-limiting.md) -- Deep dive into
-  `rateLimitBy` modes, the sliding window algorithm, and advanced configuration
-  options
-- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) -- Vary rate limits per
-  caller using a custom TypeScript function
-- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) -- Full
-  list of policy options including `mode`, `headerMode`, and `throwOnFailure`
+- [How rate limiting works](./how-it-works.md) — Deep dive into `rateLimitBy`
+  modes, the sliding window algorithm, and advanced configuration options.
+- [Combining Policies](./combining-policies.mdx) — Stack per-minute and per-hour
+  limits, and combine rate limiting with quotas or monetization.
+- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Vary rate limits per
+  caller using a custom TypeScript function (for example, higher limits for paid
+  plans).
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Full
+  list of policy options including `mode`, `headerMode`, and `throwOnFailure`.
diff --git a/docs/concepts/rate-limiting.md b/docs/rate-limiting/how-it-works.md
similarity index 87%
rename from docs/concepts/rate-limiting.md
rename to docs/rate-limiting/how-it-works.md
index 8b078e4ba..dca01ae11 100644
--- a/docs/concepts/rate-limiting.md
+++ b/docs/rate-limiting/how-it-works.md
@@ -13,7 +13,29 @@ sliding window continuously tracks requests over a rolling time period. This
 produces smoother, more predictable throttling behavior.
 
 When a client exceeds the limit, they receive a `429 Too Many Requests` response
-with a `retry-after` header indicating when they can retry.
+with a `Retry-After` header indicating when they can retry.
+
+## Key terms
+
+A few terms show up repeatedly in the rate limiting docs. They are related but
+not interchangeable.
+
+- **Counter (or bucket)** — The running tally Zuplo keeps for a single caller
+  and a single policy. Each unique combination of policy `name` and caller
+  identifier gets its own counter. Two different policies tracking the same
+  caller do _not_ share a counter; two different callers under the same policy
+  do not share a counter either.
+- **Rate limit key** — The string value that identifies a caller for bucketing.
+  For `rateLimitBy: "ip"` the key is the client's IP address; for `"user"` it is
+  `request.user.sub`; for `"function"` it is whatever your custom function
+  returns as `CustomRateLimitDetails.key`; for `"all"` there is a single
+  implicit key shared by every request to the route.
+- **`identifier` option** — A field in the policy's configuration that points
+  Zuplo at your custom TypeScript function when `rateLimitBy` is `"function"`.
+  Zuplo calls that function on each request, and the function returns a
+  `CustomRateLimitDetails` object whose `key` property becomes the rate limit
+  key. In short: `identifier` is _where the function lives_; `key` is _what the
+  function returns_.
 
 ## Rate limiting policies
 
@@ -184,7 +206,7 @@ Returning `undefined` skips rate limiting for that request entirely.
 
 The function can also be `async` if you need to look up limits from a database
 or external service. See
-[Per-user rate limiting using a database](../articles/per-user-rate-limits-using-db.mdx)
+[Per-user rate limiting using a database](./per-user-rate-limits-using-db.mdx)
 for a complete example using the ZoneCache for performance.
 
 Wire the function into the policy configuration using the `identifier` option:
@@ -275,6 +297,6 @@ Both rate limiting policies support the following additional options:
 - [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx)
 - [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
 - [Dynamic Rate Limiting tutorial](../articles/step-5-dynamic-rate-limiting.mdx)
-- [Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+- [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
 - [Quota policy](../policies/quota-inbound.mdx)
 - [Monetization policy](../articles/monetization/monetization-policy.md)
diff --git a/docs/rate-limiting/monitoring-and-troubleshooting.mdx b/docs/rate-limiting/monitoring-and-troubleshooting.mdx
index 66569fbbe..5bafd9233 100644
--- a/docs/rate-limiting/monitoring-and-troubleshooting.mdx
+++ b/docs/rate-limiting/monitoring-and-troubleshooting.mdx
@@ -231,7 +231,7 @@ traffic volume is comparable.
 
 - [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) --
   Understanding the 429 response format and client-side remediation
-- [How rate limiting works](../concepts/rate-limiting.md) -- Algorithm details,
+- [How rate limiting works](./how-it-works.md) -- Algorithm details,
   `rateLimitBy` modes, and combining policies
 - [Logging](../articles/logging.mdx) -- Configuring log shipping to external
   providers
diff --git a/docs/rate-limiting/overview.mdx b/docs/rate-limiting/overview.mdx
index 4219c6758..d93770a5a 100644
--- a/docs/rate-limiting/overview.mdx
+++ b/docs/rate-limiting/overview.mdx
@@ -12,6 +12,29 @@ all edge locations. When a client exceeds the limit, they receive a
 `429 Too Many Requests` response with a `Retry-After` header indicating when
 they can retry.
 
+## Choosing your approach
+
+Pick a `rateLimitBy` mode and a policy based on what your API looks like today.
+If you are not sure, start from the first row that matches and read the linked
+guide.
+
+| Use case                                                    | `rateLimitBy` | Policy                                                                           | Learn more                                                                |
+| ----------------------------------------------------------- | ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| Public API with no authentication                           | `ip`          | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [Getting Started](./getting-started.mdx)                                  |
+| Authenticated API, same limit for every consumer            | `user`        | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [Combining Policies](./combining-policies.mdx)                            |
+| Tiered limits (free, pro, enterprise) from API key metadata | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx)                      |
+| Tiered limits sourced from a database                       | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Per-user limits with a database](./per-user-rate-limits-using-db.mdx)    |
+| Single global cap on an expensive endpoint                  | `all`         | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [How rate limiting works](./how-it-works.md#all)                          |
+| Usage-based pricing counting multiple resources per request | `user`        | [Complex Rate Limiting](../policies/complex-rate-limit-inbound.mdx) (enterprise) | [How rate limiting works](./how-it-works.md#complex-rate-limiting-policy) |
+
+:::note
+
+`rateLimitBy: "user"` requires an authentication policy (such as API key or JWT
+authentication) earlier in the route's policy pipeline. Without it, the rate
+limit policy has no user to group requests by and returns an error.
+
+:::
+
 ## Policies
 
 Zuplo provides two rate limiting policies.
@@ -35,7 +58,7 @@ lower ceiling, and internal services can bypass limits entirely.
 
 - [Dynamic Rate Limiting guide](./dynamic-rate-limiting.mdx) — Implement custom
   rate limit functions
-- [Per-user rate limiting with a database](../articles/per-user-rate-limits-using-db.mdx)
+- [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
   — Advanced example using database lookups and the ZoneCache
 
 ## Rate limiting and monetization
@@ -51,13 +74,28 @@ protection on top of monthly billing quotas. These serve different purposes:
 - **Rate limiting** protects against short-duration traffic spikes that could
   overwhelm your backend
 
-## Next steps
+## Recommended reading order
+
+If you are new to rate limiting in Zuplo, read these pages in order. Each one
+builds on the previous.
+
+1. [Getting Started](./getting-started.mdx) — Add a basic IP-based rate limit to
+   an existing project and confirm it works end to end.
+2. [How rate limiting works](./how-it-works.md) — Learn what the sliding window
+   algorithm does, what each `rateLimitBy` mode means, and which configuration
+   options are available.
+3. [Combining Policies](./combining-policies.mdx) — Stack multiple limits on the
+   same route and combine rate limiting with quotas or monetization.
+4. [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Vary limits per caller
+   by writing a small TypeScript function.
+5. [Monitoring and Troubleshooting](./monitoring-and-troubleshooting.mdx) —
+   Observe limits in production and diagnose unexpected behaviour.
+
+Reference material to bookmark:
 
-- [How rate limiting works](../concepts/rate-limiting.md) — Deep dive into the
-  algorithm, `rateLimitBy` modes, configuration options, and combining policies
-- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) —
-  Configuration reference
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Every
+  configuration option for the standard policy.
 - [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
-  — Multi-counter configuration reference
-- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) —
-  Troubleshooting 429 responses
+  — Multi-counter configuration (enterprise).
+- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) — What a 429
+  response looks like and how to debug unexpected ones.
diff --git a/docs/articles/per-user-rate-limits-using-db.mdx b/docs/rate-limiting/per-user-rate-limits-using-db.mdx
similarity index 90%
rename from docs/articles/per-user-rate-limits-using-db.mdx
rename to docs/rate-limiting/per-user-rate-limits-using-db.mdx
index ae3bf7e59..b620a3308 100644
--- a/docs/articles/per-user-rate-limits-using-db.mdx
+++ b/docs/rate-limiting/per-user-rate-limits-using-db.mdx
@@ -10,9 +10,9 @@ tags:
 ---
 
 In this example we show a more advanced implementation of
-[dynamic rate limiting](../rate-limiting/dynamic-rate-limiting.mdx). It uses a
-database lookup to get the customer details and combines that with the ZoneCache
-to improve performance, reduce latency and lower the load on the database.
+[dynamic rate limiting](./dynamic-rate-limiting.mdx). It uses a database lookup
+to get the customer details and combines that with the ZoneCache to improve
+performance, reduce latency and lower the load on the database.
 
 In this example we use [Supabase](https://supabase.com) as the database but you
 could use your own API, [Xata](https://xata.io),
@@ -21,8 +21,8 @@ all.
 
 If you haven't already, check out the
 [rate-limiting policy](../policies/rate-limit-inbound.mdx) and the
-[dynamic rate limiting guide](../rate-limiting/dynamic-rate-limiting.mdx). Then
-you should be oriented to how dynamic rate limiting works.
+[dynamic rate limiting guide](./dynamic-rate-limiting.mdx). Then you should be
+oriented to how dynamic rate limiting works.
 
 Below is a full implementation of a custom rate limiting function. In our
 example this is a module called `per-user-rate-limiting.ts`.
diff --git a/rate-limiting-docs-followups.md b/rate-limiting-docs-followups.md
new file mode 100644
index 000000000..b006a4f8e
--- /dev/null
+++ b/rate-limiting-docs-followups.md
@@ -0,0 +1,147 @@
+# Rate Limiting Docs — Followups
+
+Generated from a dual-perspective (junior dev + staff engineer) gap review of
+the rate limiting documentation on `feat/add-rate-limiting-section`.
+
+**Status of this document**
+
+- Tier 1 items have been implemented on this branch. See the commit alongside
+  this file for the changes.
+- Tier 2 and Tier 3 below are _not_ implemented and are parked here for future
+  work. Many of them depend on information from runtime engineers (consistency
+  model, cardinality limits, latency numbers, etc.) rather than on writing
+  alone.
+
+Files in scope for the review (policy reference pages were explicitly excluded):
+
+- `docs/rate-limiting/overview.mdx`
+- `docs/rate-limiting/getting-started.mdx`
+- `docs/rate-limiting/dynamic-rate-limiting.mdx`
+- `docs/rate-limiting/combining-policies.mdx`
+- `docs/rate-limiting/monitoring-and-troubleshooting.mdx`
+- `docs/rate-limiting/how-it-works.md` (moved from
+  `docs/concepts/rate-limiting.md`)
+- `docs/rate-limiting/per-user-rate-limits-using-db.mdx` (moved from
+  `docs/articles/per-user-rate-limits-using-db.mdx`)
+- `docs/errors/rate-limit-exceeded.mdx`
+
+---
+
+## Tier 2 — New technical content (needs SME input)
+
+These are claims the docs currently make without substantiating. Each item
+likely requires a conversation with the runtime team before it can be written
+honestly.
+
+### 1. Distributed-system semantics in `docs/rate-limiting/how-it-works.md`
+
+The docs assert that rate limits are "enforced globally across all edge
+locations" but never explain the mechanism. A new section should cover:
+
+- Consistency model of the global counter (eventual vs. strong).
+- How counters are aggregated across regions / edge POPs.
+- Behaviour at window boundaries under replication (the double-burst problem).
+- Clock handling — server time vs. client time, tolerance for clock skew.
+- What happens if a region / POP is unreachable (counter divergence, failover).
+- Whether a caller can "escape" a limit by switching regions.
+- Maximum distinct keys (cardinality) before performance degrades. Is there a
+  soft limit, a hard cap, or eviction behaviour?
+
+### 2. Quantify strict-mode latency and async-mode overshoot
+
+- `rate-limiting/how-it-works.md` says strict mode "adds some latency to every
+  request because the check hits a globally distributed rate limit service"
+  without numbers. Needs a typical p50/p99 figure, and whether it varies by
+  region.
+- `monitoring-and-troubleshooting.mdx` gives "approximately one extra request
+  may slip through per window" for async mode at 100 req/s and 10 ms of latency.
+  That needs to be expressed as a worst-case formula (e.g.
+  `overshoot ≈ rate × check_latency`) so readers can reason about their own
+  numbers, not just the example.
+
+### 3. ZoneCache vs. rate-limit-window race in the DB guide
+
+In `docs/rate-limiting/per-user-rate-limits-using-db.mdx`, the ZoneCache TTL is
+60 seconds and the limit window is 1 minute. Add:
+
+- A sequence diagram showing a tier change mid-window and which limit applies.
+- Concurrency notes: what happens if two Zuplo instances both miss the cache and
+  query the database?
+- Guidance on acceptable staleness — if a customer's tier changes, how long
+  until the new tier is enforced?
+- Connection pooling / query timeout / circuit breaker recommendations for the
+  database call.
+- Cardinality guidance — how many distinct keys can this pattern support?
+
+### 4. Reframe `throwOnFailure` as an architectural decision
+
+Currently `throwOnFailure` is documented as a boolean with "`false` is safer".
+This is an availability-vs.-correctness choice:
+
+- Fail-open means DoS traffic bypasses rate limiting during an outage.
+- Fail-closed means a rate-limit-service outage takes down every rate-limited
+  route.
+
+Rewrite the option description in `monitoring-and-troubleshooting.mdx` (and link
+from the policy reference) to present it as a decision with explicit
+consequences, and recommend how to monitor for a silent fail-open condition.
+
+### 5. Standards-compliant response headers
+
+Add to `docs/errors/rate-limit-exceeded.mdx`:
+
+- Whether Zuplo emits IETF draft
+  [`RateLimit-*`](https://datatracker.ietf.org/doc/draft-ietf-httpapi-ratelimit-headers/)
+  headers (`RateLimit-Limit`, `RateLimit-Remaining`, `RateLimit-Reset`), only
+  `Retry-After`, or both.
+- Explicit RFC citations: RFC 6585 for `429`, RFC 7231 for `Retry-After` format
+  (delay-seconds vs. HTTP-date), RFC 9457 for the Problem Details body.
+
+---
+
+## Tier 3 — New pages
+
+### 6. "Rate Limit Deployment & Rollout" guide
+
+Should cover:
+
+- Observe-only / dry-run mode — how to log rejections without enforcing.
+- Canary rollout — applying new limits to a subset of traffic first.
+- Rollback procedures when a limit change causes incidents.
+- Per-environment testing (preview → production) and per-region verification
+  when the limit or dynamic function varies by environment.
+
+### 7. "Observability Checklist" in `monitoring-and-troubleshooting.mdx`
+
+A concrete, copy-paste-ready section that lists:
+
+- The exact metrics to export to an external logging / metrics platform
+  (rejected count, overage count, per-key histogram, window-usage distribution).
+- Sample queries for "top 10 keys approaching their limit" and "region with
+  highest rejection rate".
+- Alert recipes for a silent fail-open condition and for async-mode overshoot
+  exceeding an acceptable threshold.
+
+### 8. "Common Mistakes" checklist at the end of `combining-policies.mdx`
+
+A short checklist covering:
+
+- Forgetting to attach a policy to a route.
+- Policy name typos between `policies.json` and `routes.oas.json` (silent
+  failure).
+- Ordering: short window before long window (wastes counter writes).
+- `rateLimitBy: "user"` without an auth policy earlier in the pipeline.
+- Accidental shared counters across unrelated routes (already covered but could
+  be expanded).
+
+---
+
+## Out of scope for now (captured so we don't lose them)
+
+- **Security considerations** — enumeration attacks via 429 timing, rate-limit
+  bypass via header injection, amplification attacks against the rate-limit
+  service itself, shared-counter exhaustion as a multi-tenant concern. These
+  belong in a dedicated security section and need a security review.
+- **Monetization + rate-limit period-boundary behaviour** — what happens when a
+  monthly quota rolls over while a rate limit window is still active? Needs a
+  call with the monetization team to answer accurately.
diff --git a/sidebar.guides.ts b/sidebar.guides.ts
index ae53ef182..ea79823f8 100644
--- a/sidebar.guides.ts
+++ b/sidebar.guides.ts
@@ -66,7 +66,7 @@ export const guides: Navigation = [
     type: "category",
     label: "Performance & Data",
     items: [
-      "articles/per-user-rate-limits-using-db",
+      "rate-limiting/per-user-rate-limits-using-db",
       "articles/lazy-load-configuration-into-cache",
       "articles/archiving-requests-to-storage",
       "articles/check-ip-address",
diff --git a/sidebar.ts b/sidebar.ts
index d64e0c2d7..37cd48df0 100644
--- a/sidebar.ts
+++ b/sidebar.ts
@@ -161,7 +161,7 @@ const policies: Navigation = [
     items: [
       "articles/multiple-auth-policies",
       "articles/graphql-security",
-      "articles/per-user-rate-limits-using-db",
+      "rate-limiting/per-user-rate-limits-using-db",
       "articles/composite-policy-reference",
     ],
   },
@@ -341,7 +341,7 @@ export const documentation: Navigation = [
       "concepts/project-structure",
       "concepts/authentication",
       "concepts/api-keys",
-      "concepts/rate-limiting",
+      "rate-limiting/how-it-works",
       "articles/routing",
       "articles/policies",
       "articles/openapi",
@@ -453,7 +453,7 @@ export const documentation: Navigation = [
     items: [
       "rate-limiting/overview",
       "rate-limiting/getting-started",
-      "concepts/rate-limiting",
+      "rate-limiting/how-it-works",
       {
         type: "category",
         label: "Policies",
@@ -468,7 +468,7 @@ export const documentation: Navigation = [
         items: [
           "rate-limiting/dynamic-rate-limiting",
           "rate-limiting/combining-policies",
-          "articles/per-user-rate-limits-using-db",
+          "rate-limiting/per-user-rate-limits-using-db",
           "rate-limiting/monitoring-and-troubleshooting",
         ],
       },
diff --git a/vercel.json b/vercel.json
index edaf247ef..85d3473d7 100644
--- a/vercel.json
+++ b/vercel.json
@@ -465,7 +465,27 @@
     },
     {
       "source": "/docs/examples/per-user-rate-limits-using-db{/}?",
-      "destination": "/docs/articles/per-user-rate-limits-using-db",
+      "destination": "/docs/rate-limiting/per-user-rate-limits-using-db",
+      "permanent": true
+    },
+    {
+      "source": "/docs/articles/per-user-rate-limits-using-db{/}?",
+      "destination": "/docs/rate-limiting/per-user-rate-limits-using-db",
+      "permanent": true
+    },
+    {
+      "source": "/docs/articles/per-user-rate-limits-using-db.md",
+      "destination": "/docs/rate-limiting/per-user-rate-limits-using-db.md",
+      "permanent": true
+    },
+    {
+      "source": "/docs/concepts/rate-limiting{/}?",
+      "destination": "/docs/rate-limiting/how-it-works",
+      "permanent": true
+    },
+    {
+      "source": "/docs/concepts/rate-limiting.md",
+      "destination": "/docs/rate-limiting/how-it-works.md",
       "permanent": true
     },
     {

From ffe53f693fc76066df21cff20536bd8ce5a87fed Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Wed, 15 Apr 2026 17:27:32 +0200
Subject: [PATCH 3/6] Merge rate limiting overview into Getting Started
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following an IA + technical-writing review of the section's first three
pages, the standalone overview page is absorbed into Getting Started so
the section has two distinct entry points rather than three overlapping
ones.

- Move the "Choose an approach" decision table onto Getting Started,
  placed before Prerequisites so readers pick a rateLimitBy mode before
  the walkthrough. The Learn-more column now links to in-page anchors
  for the scenarios the guide covers directly (IP-based and
  authenticated user-mode).
- Rewrite the Getting Started intro so it defines rate limiting once and
  then states the guide's scope, with a direct pointer to How It Works
  for the algorithm and configuration depth.
- Regroup the Next steps list by intent: Understand, Customize, Combine,
  Operate, Reference.
- Rewrite the How It Works opening so it no longer duplicates the
  Getting Started intro word-for-word; position it explicitly as the
  deep dive for readers who want the mechanics. Retitle the page to
  "How Rate Limiting Works" and add the sidebar label "How It Works".
- Regroup the Related resources at the bottom of How It Works into
  "Go deeper on configuration", "Learn by example", and "Combine with
  other policies", replacing the unordered link dump.
- Drop the "Rate limiting and monetization" subsection from the section
  entry points — that material now lives exclusively in
  combining-policies.mdx.

Delete docs/rate-limiting/overview.mdx and add vercel.json redirects for
both the HTML URL and the .md variant pointing to the merged Getting
Started page. Remove the overview slug from the sidebar.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/rate-limiting/getting-started.mdx |  84 ++++++++++++++++----
 docs/rate-limiting/how-it-works.md     |  34 ++++++---
 docs/rate-limiting/overview.mdx        | 101 -------------------------
 rate-limiting-docs-followups.md        |   5 +-
 sidebar.ts                             |   1 -
 vercel.json                            |  10 +++
 6 files changed, 108 insertions(+), 127 deletions(-)
 delete mode 100644 docs/rate-limiting/overview.mdx

diff --git a/docs/rate-limiting/getting-started.mdx b/docs/rate-limiting/getting-started.mdx
index dab9a3fed..21f240016 100644
--- a/docs/rate-limiting/getting-started.mdx
+++ b/docs/rate-limiting/getting-started.mdx
@@ -2,13 +2,48 @@
 title: Getting Started with Rate Limiting
 sidebar_label: Getting Started
 description:
-  Add rate limiting to an existing Zuplo API gateway project in minutes.
+  Pick a rate limiting strategy and add it to an existing Zuplo project, with
+  hands-on examples for IP-based and authenticated per-user limits.
 ---
 
-This guide walks you through adding a basic rate limiting policy to an existing
-Zuplo project, attaching it to a route, and verifying that it works. For
-background on how rate limiting works in Zuplo, see the
-[Introduction](./overview.mdx).
+Rate limiting caps how many requests a client can make to your API within a time
+window. It protects your backend from traffic spikes, enforces fair usage across
+consumers, and supports tiered access for different customer plans. When a
+client exceeds the configured limit, they receive a `429 Too Many Requests`
+response with a `Retry-After` header indicating when they can retry.
+
+This guide walks you through picking a `rateLimitBy` strategy, adding the policy
+to a route, and testing it end to end. If you want the sliding window algorithm,
+every `rateLimitBy` mode in detail, and the full set of configuration levers,
+read [How Rate Limiting Works](./how-it-works.md) alongside or after this guide.
+
+## Choose an approach
+
+Pick a `rateLimitBy` mode based on what your API looks like today. If you are
+not sure, start from the first row that matches and follow the linked guide or
+section below.
+
+| Use case                                                    | `rateLimitBy` | Policy                                                                           | Learn more                                                                |
+| ----------------------------------------------------------- | ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| Public API with no authentication                           | `ip`          | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | Follow the steps below                                                    |
+| Authenticated API, same limit for every consumer            | `user`        | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [§5 Rate limit authenticated users](#5-rate-limit-authenticated-users)    |
+| Tiered limits (free, pro, enterprise) from API key metadata | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx)                      |
+| Tiered limits sourced from a database                       | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Per-user limits with a database](./per-user-rate-limits-using-db.mdx)    |
+| Single global cap on an expensive endpoint                  | `all`         | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [How rate limiting works](./how-it-works.md#all)                          |
+| Usage-based pricing counting multiple resources per request | `user`        | [Complex Rate Limiting](../policies/complex-rate-limit-inbound.mdx) (enterprise) | [How rate limiting works](./how-it-works.md#complex-rate-limiting-policy) |
+
+:::note
+
+`rateLimitBy: "user"` requires an authentication policy (such as API key or JWT
+authentication) earlier in the route's policy pipeline. Without it, the rate
+limit policy has no user to group requests by and returns an error. Section 5
+below walks through the full authenticated setup.
+
+:::
+
+For a definition of `rateLimitBy`, the sliding window algorithm, and the full
+list of configuration options (`mode`, `headerMode`, `throwOnFailure`, and
+more), see [How Rate Limiting Works](./how-it-works.md).
 
 ## Prerequisites
 
@@ -270,12 +305,35 @@ populated.
 
 ## Next steps
 
-- [How rate limiting works](./how-it-works.md) — Deep dive into `rateLimitBy`
-  modes, the sliding window algorithm, and advanced configuration options.
-- [Combining Policies](./combining-policies.mdx) — Stack per-minute and per-hour
-  limits, and combine rate limiting with quotas or monetization.
-- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Vary rate limits per
-  caller using a custom TypeScript function (for example, higher limits for paid
+**Understand the mechanics:**
+
+- [How Rate Limiting Works](./how-it-works.md) — The sliding window algorithm,
+  every `rateLimitBy` mode in detail, and advanced options like `mode`,
+  `headerMode`, and `throwOnFailure`.
+
+**Customize the behavior:**
+
+- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Vary limits per caller
+  using a custom TypeScript function (for example, higher limits for paid
   plans).
-- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Full
-  list of policy options including `mode`, `headerMode`, and `throwOnFailure`.
+- [Per-user limits with a database](./per-user-rate-limits-using-db.mdx) — An
+  advanced example using ZoneCache and a database lookup to drive limits per
+  customer.
+
+**Combine with other policies:**
+
+- [Combining Policies](./combining-policies.mdx) — Stack per-minute and per-hour
+  limits, pair rate limiting with quotas, and layer in monetization.
+
+**Operate in production:**
+
+- [Monitoring and Troubleshooting](./monitoring-and-troubleshooting.mdx) —
+  Observe limits in production, alert on silent failures, and diagnose
+  unexpected 429s.
+
+**Reference:**
+
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Every
+  configuration option for the standard policy.
+- [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
+  — Multi-counter configuration for usage-based pricing (enterprise).
diff --git a/docs/rate-limiting/how-it-works.md b/docs/rate-limiting/how-it-works.md
index dca01ae11..b5ef99c07 100644
--- a/docs/rate-limiting/how-it-works.md
+++ b/docs/rate-limiting/how-it-works.md
@@ -1,10 +1,13 @@
 ---
-title: Rate Limiting
+title: How Rate Limiting Works
+sidebar_label: How It Works
 ---
 
-Rate limiting controls how many requests a client can make to your API within a
-given time window. It protects your backend from traffic spikes, enforces fair
-usage across consumers, and enables tiered access for different customer plans.
+This page covers the mechanics behind Zuplo's rate limiter: how requests are
+counted, what each `rateLimitBy` mode does in detail, and every configuration
+option available. If you just want to add a rate limit to your API, start with
+the [Getting Started guide](./getting-started.mdx) instead — this page is the
+deep dive you can read alongside or after it.
 
 Zuplo's rate limiter uses a **sliding window algorithm** enforced globally
 across all edge locations. Unlike a fixed window algorithm (which resets
@@ -12,9 +15,6 @@ counters at fixed intervals and can allow bursts at window boundaries), the
 sliding window continuously tracks requests over a rolling time period. This
 produces smoother, more predictable throttling behavior.
 
-When a client exceeds the limit, they receive a `429 Too Many Requests` response
-with a `Retry-After` header indicating when they can retry.
-
 ## Key terms
 
 A few terms show up repeatedly in the rate limiting docs. They are related but
@@ -294,9 +294,23 @@ Both rate limiting policies support the following additional options:
 
 ## Related resources
 
-- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx)
+**Go deeper on configuration:**
+
+- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Every
+  option for the standard policy.
 - [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
+  — Multi-counter limits for usage-based pricing (enterprise).
+
+**Learn by example:**
+
 - [Dynamic Rate Limiting tutorial](../articles/step-5-dynamic-rate-limiting.mdx)
+  — Tiered limits by customer type.
 - [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
-- [Quota policy](../policies/quota-inbound.mdx)
-- [Monetization policy](../articles/monetization/monetization-policy.md)
+  — Look up limits at request time using ZoneCache and a database.
+
+**Combine with other policies:**
+
+- [Quota policy](../policies/quota-inbound.mdx) — Monthly or billing-period
+  usage caps.
+- [Monetization policy](../articles/monetization/monetization-policy.md) —
+  Subscription-based access control and metering.
diff --git a/docs/rate-limiting/overview.mdx b/docs/rate-limiting/overview.mdx
deleted file mode 100644
index d93770a5a..000000000
--- a/docs/rate-limiting/overview.mdx
+++ /dev/null
@@ -1,101 +0,0 @@
----
-title: Rate Limiting
-sidebar_label: Introduction
----
-
-Rate limiting controls how many requests a client can make to your API within a
-given time window. It protects your backend from traffic spikes, enforces fair
-usage across consumers, and enables tiered access for different customer plans.
-
-Zuplo's rate limiter uses a sliding window algorithm enforced globally across
-all edge locations. When a client exceeds the limit, they receive a
-`429 Too Many Requests` response with a `Retry-After` header indicating when
-they can retry.
-
-## Choosing your approach
-
-Pick a `rateLimitBy` mode and a policy based on what your API looks like today.
-If you are not sure, start from the first row that matches and read the linked
-guide.
-
-| Use case                                                    | `rateLimitBy` | Policy                                                                           | Learn more                                                                |
-| ----------------------------------------------------------- | ------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
-| Public API with no authentication                           | `ip`          | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [Getting Started](./getting-started.mdx)                                  |
-| Authenticated API, same limit for every consumer            | `user`        | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [Combining Policies](./combining-policies.mdx)                            |
-| Tiered limits (free, pro, enterprise) from API key metadata | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx)                      |
-| Tiered limits sourced from a database                       | `function`    | [Rate Limiting](../policies/rate-limit-inbound.mdx) with a custom function       | [Per-user limits with a database](./per-user-rate-limits-using-db.mdx)    |
-| Single global cap on an expensive endpoint                  | `all`         | [Rate Limiting](../policies/rate-limit-inbound.mdx)                              | [How rate limiting works](./how-it-works.md#all)                          |
-| Usage-based pricing counting multiple resources per request | `user`        | [Complex Rate Limiting](../policies/complex-rate-limit-inbound.mdx) (enterprise) | [How rate limiting works](./how-it-works.md#complex-rate-limiting-policy) |
-
-:::note
-
-`rateLimitBy: "user"` requires an authentication policy (such as API key or JWT
-authentication) earlier in the route's policy pipeline. Without it, the rate
-limit policy has no user to group requests by and returns an error.
-
-:::
-
-## Policies
-
-Zuplo provides two rate limiting policies.
-
-The [Rate Limiting policy](../policies/rate-limit-inbound.mdx) enforces a single
-request counter per time window. Use it when you need a straightforward "X
-requests per Y minutes" limit.
-
-The [Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
-supports multiple named counters in a single policy, each tracking a different
-resource or unit of work. Use it for usage-based pricing where different
-endpoints consume different amounts of a resource. This policy requires an
-enterprise plan.
-
-## Dynamic rate limiting
-
-Static rate limits apply the same threshold to every caller. With dynamic rate
-limiting, you provide a custom TypeScript function that determines limits at
-request time — so premium customers get higher throughput, free-tier users get a
-lower ceiling, and internal services can bypass limits entirely.
-
-- [Dynamic Rate Limiting guide](./dynamic-rate-limiting.mdx) — Implement custom
-  rate limit functions
-- [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
-  — Advanced example using database lookups and the ZoneCache
-
-## Rate limiting and monetization
-
-If you use Zuplo's
-[Monetization](../articles/monetization/monetization-policy.md) feature, the
-monetization policy handles quota enforcement based on subscription plans. You
-can still add a rate limiting policy to provide per-second or per-minute spike
-protection on top of monthly billing quotas. These serve different purposes:
-
-- **Monetization quotas** enforce monthly or billing-period usage limits tied to
-  a subscription plan
-- **Rate limiting** protects against short-duration traffic spikes that could
-  overwhelm your backend
-
-## Recommended reading order
-
-If you are new to rate limiting in Zuplo, read these pages in order. Each one
-builds on the previous.
-
-1. [Getting Started](./getting-started.mdx) — Add a basic IP-based rate limit to
-   an existing project and confirm it works end to end.
-2. [How rate limiting works](./how-it-works.md) — Learn what the sliding window
-   algorithm does, what each `rateLimitBy` mode means, and which configuration
-   options are available.
-3. [Combining Policies](./combining-policies.mdx) — Stack multiple limits on the
-   same route and combine rate limiting with quotas or monetization.
-4. [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Vary limits per caller
-   by writing a small TypeScript function.
-5. [Monitoring and Troubleshooting](./monitoring-and-troubleshooting.mdx) —
-   Observe limits in production and diagnose unexpected behaviour.
-
-Reference material to bookmark:
-
-- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Every
-  configuration option for the standard policy.
-- [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
-  — Multi-counter configuration (enterprise).
-- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) — What a 429
-  response looks like and how to debug unexpected ones.
diff --git a/rate-limiting-docs-followups.md b/rate-limiting-docs-followups.md
index b006a4f8e..e428ec1eb 100644
--- a/rate-limiting-docs-followups.md
+++ b/rate-limiting-docs-followups.md
@@ -14,8 +14,9 @@ the rate limiting documentation on `feat/add-rate-limiting-section`.
 
 Files in scope for the review (policy reference pages were explicitly excluded):
 
-- `docs/rate-limiting/overview.mdx`
-- `docs/rate-limiting/getting-started.mdx`
+- `docs/rate-limiting/getting-started.mdx` (absorbed the former
+  `docs/rate-limiting/overview.mdx` during the IA/tech-writer restructure — see
+  below)
 - `docs/rate-limiting/dynamic-rate-limiting.mdx`
 - `docs/rate-limiting/combining-policies.mdx`
 - `docs/rate-limiting/monitoring-and-troubleshooting.mdx`
diff --git a/sidebar.ts b/sidebar.ts
index 37cd48df0..2729575c3 100644
--- a/sidebar.ts
+++ b/sidebar.ts
@@ -451,7 +451,6 @@ export const documentation: Navigation = [
     label: "Rate Limiting",
     icon: "gauge",
     items: [
-      "rate-limiting/overview",
       "rate-limiting/getting-started",
       "rate-limiting/how-it-works",
       {
diff --git a/vercel.json b/vercel.json
index 85d3473d7..245d8e68b 100644
--- a/vercel.json
+++ b/vercel.json
@@ -488,6 +488,16 @@
       "destination": "/docs/rate-limiting/how-it-works.md",
       "permanent": true
     },
+    {
+      "source": "/docs/rate-limiting/overview{/}?",
+      "destination": "/docs/rate-limiting/getting-started",
+      "permanent": true
+    },
+    {
+      "source": "/docs/rate-limiting/overview.md",
+      "destination": "/docs/rate-limiting/getting-started.md",
+      "permanent": true
+    },
     {
       "source": "/docs/examples/rick-and-morty-api-developer-portal-example{/}?",
       "destination": "/docs/articles/rick-and-morty-api-developer-portal-example",

From c4c7839f891731082437c878964360bdabe8f171 Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Wed, 15 Apr 2026 18:05:00 +0200
Subject: [PATCH 4/6] Trim How Rate Limiting Works to unique reference content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The page still had significant overlap with Getting Started, the policy
reference pages, and other rate-limiting guides. Cut five sections whose
content is already covered elsewhere, and trim a sixth to pure reference.

Removed (duplicated elsewhere):

- Rate limiting policies (Rate Limiting + Complex Rate Limiting
  subsections with JSON examples) — Getting Started §1 and the policy
  reference pages already cover this.
- Choosing a policy (table) — Getting Started's "Choose an approach"
  table is the canonical decision surface.
- Combining rate limiting with authentication — Getting Started §5 is
  the concrete pipeline + curl test.
- Rate limiting and monetization — lives in combining-policies.mdx.
- Combining multiple rate limit policies — lives in combining-policies.mdx.

Trimmed:

- Custom rate limit functions (was "Dynamic rate limiting with custom
  functions") — keep the function signature and CustomRateLimitDetails
  field reference; drop the tier-branching example and wire-up JSON,
  which are in dynamic-rate-limiting.mdx. Add one sentence describing
  the identifier option and pointers to the concrete walkthroughs.

Strengthening:

- The `user` mode note now cross-links to Getting Started §5 instead of
  just asserting the auth requirement.
- The `function` mode subsection cross-links to the Custom rate limit
  functions section below.
- Fixed a leftover `retry-after` → `Retry-After` casing inconsistency.
- Added Combining Policies to the Related resources group since the
  inline section was removed.

Net: page is now 191 lines (was 302), with 5 H2 sections (was 10).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/rate-limiting/how-it-works.md | 217 ++++++-----------------------
 1 file changed, 46 insertions(+), 171 deletions(-)

diff --git a/docs/rate-limiting/how-it-works.md b/docs/rate-limiting/how-it-works.md
index b5ef99c07..cd1f0d93f 100644
--- a/docs/rate-limiting/how-it-works.md
+++ b/docs/rate-limiting/how-it-works.md
@@ -37,79 +37,13 @@ not interchangeable.
   key. In short: `identifier` is _where the function lives_; `key` is _what the
   function returns_.
 
-## Rate limiting policies
-
-Zuplo provides two rate limiting policies, each suited to different levels of
-complexity.
-
-### Rate Limiting policy
-
-The [Rate Limiting policy](../policies/rate-limit-inbound.mdx) enforces a single
-request counter per time window. Configure a maximum number of requests, a time
-window, and how to identify callers.
-
-```json
-{
-  "name": "my-rate-limit-policy",
-  "policyType": "rate-limit-inbound",
-  "handler": {
-    "export": "RateLimitInboundPolicy",
-    "module": "$import(@zuplo/runtime)",
-    "options": {
-      "rateLimitBy": "user",
-      "requestsAllowed": 100,
-      "timeWindowMinutes": 1
-    }
-  }
-}
-```
-
-Use this policy when you need a straightforward "X requests per Y minutes"
-limit.
-
-### Complex Rate Limiting policy
-
-The [Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
-supports **multiple named counters** in a single policy. Each counter tracks a
-different resource or unit of work.
-
-```json
-{
-  "name": "my-complex-rate-limit-policy",
-  "policyType": "complex-rate-limit-inbound",
-  "handler": {
-    "export": "ComplexRateLimitInboundPolicy",
-    "module": "$import(@zuplo/runtime)",
-    "options": {
-      "rateLimitBy": "user",
-      "timeWindowMinutes": 1,
-      "limits": {
-        "requests": 100,
-        "compute": 500
-      }
-    }
-  }
-}
-```
-
-You can override counter increments programmatically per request using
-`ComplexRateLimitInboundPolicy.setIncrements()`. This is useful for usage-based
-pricing where different endpoints consume different amounts of a resource (for
-example, counting compute units or tokens instead of raw requests).
-
-## Choosing a policy
-
-| Scenario                                               | Policy                                                     |
-| ------------------------------------------------------ | ---------------------------------------------------------- |
-| Fixed requests-per-minute limit for all callers        | Rate Limiting                                              |
-| Different limits per customer tier (free vs. paid)     | Rate Limiting with a custom function                       |
-| Counting multiple resources (requests + compute units) | Complex Rate Limiting (enterprise)                         |
-| Usage-based billing with variable cost per request     | Complex Rate Limiting with dynamic increments (enterprise) |
-
 ## How `rateLimitBy` works
 
 The `rateLimitBy` option determines how the rate limiter groups requests into
-buckets. Both policies support the same four modes.
+buckets. Both the standard
+[Rate Limiting policy](../policies/rate-limit-inbound.mdx) and the
+[Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
+support the same four modes.
 
 ### `ip`
 
@@ -119,10 +53,10 @@ protection.
 
 :::caution
 
-Be aware that multiple clients behind the same corporate proxy, cloud NAT, or
-shared Wi-Fi network can share a single IP address. In these cases, IP-based
-rate limiting can unfairly throttle unrelated users. For authenticated APIs,
-prefer `rateLimitBy: "user"` instead.
+Multiple clients behind the same corporate proxy, cloud NAT, or shared Wi-Fi
+network can share a single IP address. In these cases, IP-based rate limiting
+can unfairly throttle unrelated users. For authenticated APIs, prefer
+`rateLimitBy: "user"` instead.
 
 :::
 
@@ -138,9 +72,11 @@ the actual consumer rather than a shared IP address.
 
 :::note
 
-The `user` mode requires an authentication policy (such as API Key
-Authentication or JWT authentication) earlier in the policy pipeline. If no
-authenticated user is present on the request, the policy returns an error.
+The `user` mode requires an authentication policy (such as API key or JWT
+authentication) earlier in the policy pipeline. If no authenticated user is
+present on the request, the policy returns an error. See
+[Getting Started §5](./getting-started.mdx#5-rate-limit-authenticated-users) for
+a full authenticated pipeline example.
 
 :::
 
@@ -149,10 +85,9 @@ authenticated user is present on the request, the policy returns an error.
 Groups requests using a custom TypeScript function that you provide. The
 function returns a `CustomRateLimitDetails` object containing a grouping key
 and, optionally, overridden values for `requestsAllowed` and
-`timeWindowMinutes`.
-
-This mode enables dynamic rate limiting where limits vary based on customer
-tier, route parameters, or any other request property.
+`timeWindowMinutes`. See
+[Custom rate limit functions](#custom-rate-limit-functions) below for the
+function signature and field reference.
 
 ### `all`
 
@@ -160,10 +95,12 @@ Applies a single shared counter across all requests to the route, regardless of
 who makes them. Use this for global rate limits on endpoints that call
 resource-constrained backends.
 
-## Dynamic rate limiting with custom functions
+## Custom rate limit functions
 
-When `rateLimitBy` is set to `"function"`, you provide a TypeScript module that
-determines the rate limit at request time. The function signature is:
+When `rateLimitBy` is set to `"function"`, Zuplo calls a TypeScript function you
+provide on every request. The function receives the request, context, and policy
+name, and returns a `CustomRateLimitDetails` object describing how to count that
+request.
 
 ```ts
 import {
@@ -177,108 +114,44 @@ export function rateLimit(
   context: ZuploContext,
   policyName: string,
 ): CustomRateLimitDetails | undefined {
-  const user = request.user;
-
-  if (user.data.customerType === "premium") {
-    return {
-      key: user.sub,
-      requestsAllowed: 1000,
-      timeWindowMinutes: 1,
-    };
-  }
-
   return {
-    key: user.sub,
-    requestsAllowed: 50,
+    key: request.user.sub,
+    requestsAllowed: 100,
     timeWindowMinutes: 1,
   };
 }
 ```
 
-The `CustomRateLimitDetails` object has the following properties:
-
-- `key` - The string used to group requests into rate limit buckets
-- `requestsAllowed` (optional) - Overrides the policy's `requestsAllowed` value
-- `timeWindowMinutes` (optional) - Overrides the policy's `timeWindowMinutes`
-  value
-
-Returning `undefined` skips rate limiting for that request entirely.
-
-The function can also be `async` if you need to look up limits from a database
-or external service. See
-[Per-user rate limiting using a database](./per-user-rate-limits-using-db.mdx)
-for a complete example using the ZoneCache for performance.
-
-Wire the function into the policy configuration using the `identifier` option:
-
-```json
-{
-  "export": "RateLimitInboundPolicy",
-  "module": "$import(@zuplo/runtime)",
-  "options": {
-    "rateLimitBy": "function",
-    "requestsAllowed": 50,
-    "timeWindowMinutes": 1,
-    "identifier": {
-      "export": "rateLimit",
-      "module": "$import(./modules/rate-limit)"
-    }
-  }
-}
-```
-
-:::note
-
-The `requestsAllowed` and `timeWindowMinutes` values in the policy configuration
-serve as defaults. The custom function can override them per request.
-
-:::
-
-## Combining rate limiting with authentication
-
-Rate limiting works best when combined with authentication so that limits apply
-per consumer rather than per IP. A typical policy pipeline is:
-
-1. **Authentication** (e.g., API Key Authentication) -- validates credentials
-   and populates `request.user`
-2. **Rate Limiting** with `rateLimitBy: "user"` -- enforces per-consumer limits
-   using `request.user.sub`
-
-With API key authentication, the consumer's metadata (stored when creating the
-key) is available at `request.user.data`. A custom rate limit function can read
-fields like `customerType` or `plan` from the metadata to apply tiered limits.
-
-## Rate limiting and monetization
+### `CustomRateLimitDetails`
 
-If you use Zuplo's
-[Monetization](../articles/monetization/monetization-policy.md) feature, the
-monetization policy handles quota enforcement based on subscription plans. You
-can still add a rate limiting policy after the monetization policy to provide
-per-second or per-minute spike protection on top of monthly billing quotas.
-These serve different purposes:
+- `key` (required) — The string used to group requests into rate limit buckets.
+- `requestsAllowed` (optional) — Overrides the policy's `requestsAllowed` value
+  for this request.
+- `timeWindowMinutes` (optional) — Overrides the policy's `timeWindowMinutes`
+  value for this request.
 
-- **Monetization quotas** enforce monthly or billing-period usage limits tied to
-  a subscription plan
-- **Rate limiting** protects against short-duration traffic spikes that could
-  overwhelm your backend
+Returning `undefined` skips rate limiting for the request entirely — useful for
+health checks or privileged callers. The function can also be `async` if you
+need to await a database lookup or external service call.
 
-## Combining multiple rate limit policies
+Wire the function into the policy using the `identifier` option. The policy's
+configured `requestsAllowed` and `timeWindowMinutes` serve as defaults; the
+function can override them per request.
 
-You can apply multiple rate limiting policies to the same route. For example,
-you might enforce both a per-minute and a per-hour limit. When using multiple
-policies, apply the longest time window first, followed by shorter durations.
-This ordering ensures that the broadest limit is checked first — if a caller has
-exhausted their hourly quota, the request is rejected immediately without
-incrementing the shorter-duration counter.
+For concrete walkthroughs (tier-based, route-based, method-based,
+database-backed, selective bypass), see
+[Dynamic Rate Limiting](./dynamic-rate-limiting.mdx). For an advanced
+database-backed example with caching, see
+[Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx).
 
 ## Additional options
 
 Both rate limiting policies support the following additional options:
 
-- `headerMode` - Set to `"retry-after"` (default) to include the `retry-after`
-  header in 429 responses, or `"none"` to omit it. The `retry-after` value is
+- `headerMode` — Set to `"retry-after"` (default) to include the `Retry-After`
+  header in 429 responses, or `"none"` to omit it. The `Retry-After` value is
   returned as a number of seconds (delay-seconds format).
-- `mode` - Set to `"strict"` (default) or `"async"`. In **strict** mode, the
+- `mode` — Set to `"strict"` (default) or `"async"`. In **strict** mode, the
   request is held until the rate limit check completes — the backend is never
   called if the limit is exceeded. This adds some latency to every request
   because the check hits a globally distributed rate limit service. In **async**
@@ -286,7 +159,7 @@ Both rate limiting policies support the following additional options:
   check. This minimizes added latency but means some requests may get through
   even after the limit is exceeded. Async mode is a good fit when low latency
   matters more than exact enforcement.
-- `throwOnFailure` - Controls behavior when the rate limit service is
+- `throwOnFailure` — Controls behavior when the rate limit service is
   unreachable. When set to `false` (default), requests are allowed through
   (fail-open). When set to `true`, the policy returns an error to the client.
   The fail-open default prevents a rate limit service outage from blocking all
@@ -310,6 +183,8 @@ Both rate limiting policies support the following additional options:
 
 **Combine with other policies:**
 
+- [Combining Policies](./combining-policies.mdx) — Stack multiple rate limits,
+  and pair rate limiting with quotas or monetization.
 - [Quota policy](../policies/quota-inbound.mdx) — Monthly or billing-period
   usage caps.
 - [Monetization policy](../articles/monetization/monetization-policy.md) —

From f517f6851872d67f7af93e7625b42fd31f4fb220 Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Wed, 15 Apr 2026 18:15:18 +0200
Subject: [PATCH 5/6] Remove Rate Limit Exceeded error from Rate Limiting
 sidebar

The errors/rate-limit-exceeded page stays on disk and remains
reachable via its cross-link from monitoring-and-troubleshooting, but
no longer appears as a nav entry under Rate Limiting.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 sidebar.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sidebar.ts b/sidebar.ts
index 2729575c3..b4a3c073c 100644
--- a/sidebar.ts
+++ b/sidebar.ts
@@ -471,7 +471,6 @@ export const documentation: Navigation = [
           "rate-limiting/monitoring-and-troubleshooting",
         ],
       },
-      "errors/rate-limit-exceeded",
     ],
   },
   {

From 639d2cc8d5bebd8aa5ff0e81b024aa22a085d2be Mon Sep 17 00:00:00 2001
From: Martyn Davies <martynrdavies@gmail.com>
Date: Wed, 15 Apr 2026 18:16:36 +0200
Subject: [PATCH 6/6] Remove Rate Limiting from the Concepts sidebar

rate-limiting/how-it-works is already listed under the Rate Limiting
category, which is now its canonical home. Drop the duplicate entry
from Concepts so there is a single path through the nav.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 sidebar.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sidebar.ts b/sidebar.ts
index b4a3c073c..c715409f8 100644
--- a/sidebar.ts
+++ b/sidebar.ts
@@ -341,7 +341,6 @@ export const documentation: Navigation = [
       "concepts/project-structure",
       "concepts/authentication",
       "concepts/api-keys",
-      "rate-limiting/how-it-works",
       "articles/routing",
       "articles/policies",
       "articles/openapi",