From b069b70bdef5593670c6146d74d695d1eac6e989 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:41:53 +0100 Subject: [PATCH 01/20] add howtos --- app/_data/schemas/frontmatter/tags.json | 1 + .../set-up-ai-proxy-advanced-with-kimi.md | 95 +++++++++++++++++++ .../ai-gateway/set-up-ai-proxy-with-kimi.md | 94 ++++++++++++++++++ app/ai-gateway/ai-providers/kimi.md | 87 +++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md create mode 100644 app/ai-gateway/ai-providers/kimi.md diff --git a/app/_data/schemas/frontmatter/tags.json b/app/_data/schemas/frontmatter/tags.json index 571c3211d8..2a2f02c525 100644 --- a/app/_data/schemas/frontmatter/tags.json +++ b/app/_data/schemas/frontmatter/tags.json @@ -117,6 +117,7 @@ "kafka", "kds", "key-auth", + "kimi", "kong-manager", "kongair", "kong-identity", diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md new file mode 100644 index 0000000000..b46f91e54b --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -0,0 +1,95 @@ +--- +title: Set up AI Proxy Advanced with kimi in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-advanced-with-kimi/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy Advanced + url: /plugins/ai-proxy-advanced/ + +description: Configure the AI Proxy Advanced plugin to create a chat route using Kimi. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy-advanced + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - kimi + +tldr: + q: How do I use the AI Proxy Advanced plugin with Kimi? + a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. + +tools: + - deck + +prereqs: + inline: + - title: Kimi + include_content: prereqs/deepseek + icon_url: /assets/icons/deepseek.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `deepseek-chat` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy-advanced + config: + targets: + - route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: openai + name: deepseek-chat + options: + upstream_url: https://api.deepseek.com/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $DEEPSEEK_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md new file mode 100644 index 0000000000..56a8687bac --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -0,0 +1,94 @@ +--- +title: Set up AI Proxy with Kimi in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-with-kimi/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy + url: /plugins/ai-proxy/ + +description: Configure the AI Proxy plugin to create a chat route using Kimi. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - kimi + +tldr: + q: How do I use the AI Proxy plugin with Kimi? + a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. + +tools: + - deck + +prereqs: + inline: + - title: Kimi + include_content: prereqs/deepseek + icon_url: /assets/icons/deepseek.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `deepseek-chat` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: openai + name: deepseek-chat + options: + upstream_url: https://api.deepseek.com/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $DEEPSEEK_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md new file mode 100644 index 0000000000..d5ecc74b9e --- /dev/null +++ b/app/ai-gateway/ai-providers/kimi.md @@ -0,0 +1,87 @@ +--- +title: "Kimi provider" +layout: reference +content_type: reference +description: Reference for supported capabilities for Kimi provider +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/ai-providers/ + +permalink: /ai-gateway/ai-providers/kimi/ + +works_on: + - on-prem + - konnect + +products: + - gateway + - ai-gateway + +tools: + - admin-api + - konnect-api + - deck + - kic + - terraform + +tags: + - ai + +plugins: + - ai-proxy-advanced + - ai-proxy + +min_version: + gateway: '3.14' + +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} plugins" + url: /plugins/?category=ai + - text: AI Providers + url: /ai-gateway/ai-providers/ + +how_to_list: + config: + products: + - ai-gateway + tags: + - kimi + description: true + view_more: false +--- + + +{% include plugins/ai-proxy/providers/providers.md providers=site.data.plugins.ai-proxy provider_name="Kimi" %} + +## Configure {{ provider.name }} with AI Proxy + +To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. + +Here's a minimal configuration for chat completions: + +{% entity_example %} +type: plugin +data: + name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${key} + model: + provider: deepseek + name: deepseek-chat + +variables: + key: + value: "$DEEPSEEK_API_KEY" +{% endentity_example %} + +{:.success} +> For more configuration options and examples, see: +> - [AI Proxy examples](/plugins/ai-proxy/examples/) +> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) + +{% include plugins/ai-proxy/providers/how-tos.md %} From e8c9e39358b2daf95e120fef8790ce137dad84b5 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:52:03 +0100 Subject: [PATCH 02/20] correct API key and base urls --- .../set-up-ai-proxy-advanced-with-kimi.md | 16 ++++++++-------- .../ai-gateway/set-up-ai-proxy-with-kimi.md | 14 +++++++------- app/_includes/prereqs/kimi.md | 12 ++++++++++++ 3 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 app/_includes/prereqs/kimi.md diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index b46f91e54b..a58e5d4491 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -1,5 +1,5 @@ --- -title: Set up AI Proxy Advanced with kimi in {{site.base_gateway}} +title: Set up AI Proxy Advanced with Kimi in {{site.base_gateway}} permalink: /how-to/set-up-ai-proxy-advanced-with-kimi/ content_type: how_to related_resources: @@ -44,8 +44,8 @@ tools: prereqs: inline: - title: Kimi - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg + include_content: prereqs/kimi + icon_url: /assets/icons/kimi.svg entities: services: - example-service @@ -64,9 +64,9 @@ cleanup: ## Configure the plugin -To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. +To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. -In this example, we'll use the `deepseek-chat` model: +In this example, we'll use the `kimi-k2.6` model: {% entity_examples %} entities: @@ -80,14 +80,14 @@ entities: header_value: Bearer ${api_key} model: provider: openai - name: deepseek-chat + name: kimi-k2.6 options: - upstream_url: https://api.deepseek.com/chat/completions + upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 temperature: 1.0 variables: api_key: - value: $DEEPSEEK_API_KEY + value: $MOONSHOT_API_KEY {% endentity_examples %} ## Validate diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index 56a8687bac..c139e7eb29 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -44,8 +44,8 @@ tools: prereqs: inline: - title: Kimi - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg + include_content: prereqs/kimi + icon_url: /assets/icons/kimi.svg entities: services: - example-service @@ -64,9 +64,9 @@ cleanup: ## Configure the plugin -To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. +To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. -In this example, we'll use the `deepseek-chat` model: +In this example, we'll use the `kimi-k2.6` model: {% entity_examples %} entities: @@ -79,14 +79,14 @@ entities: header_value: Bearer ${api_key} model: provider: openai - name: deepseek-chat + name: kimi-k2.6 options: - upstream_url: https://api.deepseek.com/chat/completions + upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 temperature: 1.0 variables: api_key: - value: $DEEPSEEK_API_KEY + value: $MOONSHOT_API_KEY {% endentity_examples %} ## Validate diff --git a/app/_includes/prereqs/kimi.md b/app/_includes/prereqs/kimi.md new file mode 100644 index 0000000000..61625fb065 --- /dev/null +++ b/app/_includes/prereqs/kimi.md @@ -0,0 +1,12 @@ +This tutorial requires a DeepSeek API key. + +1. Create a [Kimi Platform](https://platform.kimi.ai/) account. +1. Click **API keys**. +1. Click **Create new API key**. +1. In the **Name** field, enter `Kong`. +1. Click **Create API key**. +1. Click **Copy**. +1. Export the key to your environment: + ```sh + export DECK_MOONSHOT_API_KEY='YOUR MOONSHOT API KEY' + ``` \ No newline at end of file From a87f15da0bbf278207b353f59ffecdb406e99d84 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:59:52 +0100 Subject: [PATCH 03/20] add Kimi to vale dict --- .github/styles/base/Dictionary.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 8d1266def5..5a570da9d5 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -465,6 +465,7 @@ kiali Kibana kibibytes kic +Kimi Knative Knative knative From e484f7170dd5ff40ec8925aa705701ee712d9ce4 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 15:32:30 +0100 Subject: [PATCH 04/20] updated some provider references --- .github/styles/base/Dictionary.txt | 1 + app/ai-gateway/ai-providers/kimi.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 5a570da9d5..7dc61a22da 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -465,6 +465,7 @@ kiali Kibana kibibytes kic +kimi Kimi Knative Knative diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index d5ecc74b9e..336d8f6c20 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -71,12 +71,12 @@ data: header_name: Authorization header_value: Bearer ${key} model: - provider: deepseek - name: deepseek-chat + provider: kimi + name: kimi-k2.6 variables: key: - value: "$DEEPSEEK_API_KEY" + value: "$MOONSHOT_API_KEY" {% endentity_example %} {:.success} From 7648eebc93f746ed1cd72d197034fb3dffe57fdf Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 15:58:50 +0100 Subject: [PATCH 05/20] kimi provider not just kimi via openai provider --- app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md | 2 +- app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index a58e5d4491..828e673364 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -79,7 +79,7 @@ entities: header_name: Authorization header_value: Bearer ${api_key} model: - provider: openai + provider: kimi name: kimi-k2.6 options: upstream_url: https://api.moonshot.ai/v1/chat/completions diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index c139e7eb29..959a4a6b22 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -78,8 +78,7 @@ entities: header_name: Authorization header_value: Bearer ${api_key} model: - provider: openai - name: kimi-k2.6 + provider: kimi options: upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 From 8dc0e0dfa749c44e70c46f54aedbac0d880b4c65 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 27 Apr 2026 10:28:24 +0100 Subject: [PATCH 06/20] Update app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md Co-authored-by: tomek-labuk --- app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index 959a4a6b22..78035270c1 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -64,7 +64,7 @@ cleanup: ## Configure the plugin -To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. +To set up AI Proxy with Kimi, use the `kimi` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. In this example, we'll use the `kimi-k2.6` model: From ab88b10903b6addc1d6effeab881fdeb74a7f2bd Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 28 Apr 2026 13:52:48 +0100 Subject: [PATCH 07/20] add kimi to landing pages list --- app/_landing_pages/ai-gateway/ai-providers.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index 37efd703a4..87b3514db2 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -146,6 +146,13 @@ rows: icon: /assets/icons/deepseek.svg cta: url: /ai-gateway/ai-providers/deepseek/ + - blocks: + - type: icon_card + config: + title: Kimi + icon: /assets/icons/kimi.svg + cta: + url: /ai-gateway/ai-providers/kimi/ - blocks: - type: icon_card config: From cde8c25caff7755734d25c856ba1657e13118715 Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 28 Apr 2026 14:43:25 +0100 Subject: [PATCH 08/20] kimi icon --- app/_assets/icons/products/kimi.svg | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 app/_assets/icons/products/kimi.svg diff --git a/app/_assets/icons/products/kimi.svg b/app/_assets/icons/products/kimi.svg new file mode 100644 index 0000000000..949cf16d79 --- /dev/null +++ b/app/_assets/icons/products/kimi.svg @@ -0,0 +1,4 @@ + + + + From dcac1d69b3b7d45e22d902c85f86c85ac71507c2 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 18 May 2026 14:55:27 +0100 Subject: [PATCH 09/20] add missing yaml entry and consistent version reqs --- app/_data/plugins/ai-proxy.yaml | 22 +++++++++++++++++++ .../set-up-ai-proxy-advanced-with-kimi.md | 2 +- app/ai-gateway/ai-providers/kimi.md | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/app/_data/plugins/ai-proxy.yaml b/app/_data/plugins/ai-proxy.yaml index 54082456a4..a5c9a3a88c 100644 --- a/app/_data/plugins/ai-proxy.yaml +++ b/app/_data/plugins/ai-proxy.yaml @@ -919,6 +919,28 @@ providers: provider_specific: [] statistics_logging: [] + - name: 'Kimi' + url_patterns: + - 'https://api.moonshot.ai' + min_version: '2.0.0' + chat: + supported: true + streaming: true + upstream_path: '`/v1/chat/completions`' + route_type: 'llm/v1/chat' + model_example: 'kimi-k2.6' + min_version: '2.0.0' + embeddings: + supported: false + image: + generations: + supported: false + edits: + supported: false + limitations: + provider_specific: [] + statistics_logging: [] + parameters: provider: 'config.model.provider' route_type: 'config.route_type' diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index 828e673364..5b36fb4e4d 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -19,7 +19,7 @@ works_on: - konnect min_version: - gateway: '3.14' + gateway: '2.0.0' plugins: - ai-proxy-advanced diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 336d8f6c20..f6146e70c3 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -32,7 +32,7 @@ plugins: - ai-proxy min_version: - gateway: '3.14' + gateway: '2.0.0' related_resources: - text: "{{site.ai_gateway}}" From d6c763119db9a113d0efafedaa9513ab72310287 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 1 Jun 2026 11:39:16 +0200 Subject: [PATCH 10/20] feat(ai-gateway): AI Gateway 2.0 entities (#5263) --- .github/styles/base/Dictionary.txt | 1 + api-specs/konnect/ai-gateway/v2/openapi.yaml | 19 + app/_ai_gateway_entities/ai-agent.md | 309 ++++++++++ .../ai-consumer-credential.md | 130 +++++ app/_ai_gateway_entities/ai-consumer-group.md | 135 +++++ app/_ai_gateway_entities/ai-consumer.md | 140 +++++ .../ai-data-plane-certificate.md | 124 ++++ .../ai-data-plane-node.md | 95 +++ app/_ai_gateway_entities/ai-gateway.md | 127 ++++ app/_ai_gateway_entities/ai-mcp-server.md | 544 ++++++++++++++++++ app/_ai_gateway_entities/ai-model.md | 419 ++++++++++++++ app/_ai_gateway_entities/ai-policy.md | 139 +++++ app/_ai_gateway_entities/ai-provider.md | 153 +++++ app/_ai_gateway_entities/ai-vault.md | 106 ++++ app/_api/konnect/ai-gateway/_index.md | 3 + app/_assets/javascripts/apps/EntitySchema.vue | 9 +- app/_data/entity_examples/config.yml | 44 +- app/_data/konnect_oas_data.json | 21 + app/_data/products/ai-gateway.yml | 8 +- .../entity_example/format/admin-api.md | 12 +- .../components/entity_example/format/deck.md | 4 +- .../entity_example/format/konnect-api.md | 12 +- .../components/entity_example/format/ui_ai.md | 83 +++ app/_landing_pages/ai-gateway/entities.yaml | 109 ++++ .../entity_example/presenters/admin-api.rb | 32 +- .../entity_example/presenters/konnect-api.rb | 31 +- .../drops/entity_example/presenters/ui.rb | 6 +- app/_plugins/drops/entity_schema.rb | 10 +- jekyll.yml | 12 + vite.config.ts | 6 +- 30 files changed, 2808 insertions(+), 35 deletions(-) create mode 100644 api-specs/konnect/ai-gateway/v2/openapi.yaml create mode 100644 app/_ai_gateway_entities/ai-agent.md create mode 100644 app/_ai_gateway_entities/ai-consumer-credential.md create mode 100644 app/_ai_gateway_entities/ai-consumer-group.md create mode 100644 app/_ai_gateway_entities/ai-consumer.md create mode 100644 app/_ai_gateway_entities/ai-data-plane-certificate.md create mode 100644 app/_ai_gateway_entities/ai-data-plane-node.md create mode 100644 app/_ai_gateway_entities/ai-gateway.md create mode 100644 app/_ai_gateway_entities/ai-mcp-server.md create mode 100644 app/_ai_gateway_entities/ai-model.md create mode 100644 app/_ai_gateway_entities/ai-policy.md create mode 100644 app/_ai_gateway_entities/ai-provider.md create mode 100644 app/_ai_gateway_entities/ai-vault.md create mode 100644 app/_api/konnect/ai-gateway/_index.md create mode 100644 app/_includes/components/entity_example/format/ui_ai.md create mode 100644 app/_landing_pages/ai-gateway/entities.yaml diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 4656efd372..9061bb1108 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -13,6 +13,7 @@ ai_rate_limiting_policy agentic Agno Agno's +AIGateway Alertmanager Alibaba allow_terminated diff --git a/api-specs/konnect/ai-gateway/v2/openapi.yaml b/api-specs/konnect/ai-gateway/v2/openapi.yaml new file mode 100644 index 0000000000..170981dc02 --- /dev/null +++ b/api-specs/konnect/ai-gateway/v2/openapi.yaml @@ -0,0 +1,19 @@ +openapi: 3.0.0 +info: + title: Konnect AI Gateway + version: 0.0.0 + description: Internal API for managing Kong AI Gateway policies. + contact: + name: Kong + url: 'https://cloud.konghq.com' +servers: + - url: 'https://us.api.konghq.com/v1' + description: US Region Base URL + - url: 'https://eu.api.konghq.com/v1' + description: EU Region Base URL + - url: 'https://au.api.konghq.com/v1' + description: AU Region Base URL + - url: 'https://me.api.konghq.com/v1' + description: Middle-East Production region + - url: 'https://in.api.konghq.com/v1' + description: India Production region diff --git a/app/_ai_gateway_entities/ai-agent.md b/app/_ai_gateway_entities/ai-agent.md new file mode 100644 index 0000000000..9ffd7b9cb8 --- /dev/null +++ b/app/_ai_gateway_entities/ai-agent.md @@ -0,0 +1,309 @@ +--- +title: AI Agents +content_type: reference +entities: + - ai-agent +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-agent/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Agent entity used by {{site.ai_gateway}} for A2A and HTTP agent configurations. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayAgent +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: A2A protocol specification + url: https://a2aproject.github.io/A2A/ +faqs: + - q: What's the difference between an `a2a` Agent and an `http` Agent? + a: | + An `a2a` Agent applies Agent-to-Agent protocol awareness (JSON-RPC and REST binding detection, + agent-card URL rewriting, structured A2A telemetry) to traffic flowing to an upstream agent. + An `http` Agent is a generic HTTP route to an upstream agent without A2A-specific processing. + Use `a2a` when the upstream speaks the A2A protocol and you want observability tied to A2A + task and message semantics. + + - q: Does the Agent entity modify request routing or aggregate responses? + a: | + No. The runtime behind an Agent operates as a transparent proxy. It detects A2A requests, + records telemetry, and rewrites agent-card URLs to the gateway address. It does not change + routing decisions, merge responses, or hold task state on behalf of clients. + + - q: Why is the agent-card URL rewritten? + a: | + A2A clients use agent-card responses (at `/.well-known/agent-card.json`) to discover where to + send subsequent requests. Rewriting the `url` field, and any `additionalInterfaces[].url` + fields, to the {{site.ai_gateway}} address means clients route follow-up traffic through the + gateway instead of bypassing it. The rewrite honors `X-Forwarded-*` headers when the gateway + sits behind a load balancer. + + - q: How does streaming work? + a: | + Server-sent events (`Content-Type: text/event-stream`) pass through chunk-by-chunk without + buffering. The runtime counts SSE events, captures time-to-first-byte, and extracts task state + from the final event for analytics. Latency is preserved. + + - q: How do I limit which consumers can reach an Agent? + a: | + Set the `acls` field on the Agent with allow or deny lists. Each entry is a string that + references a Consumer, Consumer Group, or Authenticated Group by name. + + - q: Can the same plugin run on an Agent that I'd attach to a route or service? + a: | + Plugin configuration that applies to the Agent goes through the [Policy entity](/ai-gateway/entities/ai-policy/). + Attach Policies to the Agent through its `policies` field. + + - q: How do I configure agents in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure agent proxying using {{site.base_gateway}} plugins directly (for example, the AI A2A Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. +--- + +## What is an Agent? + +An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, and a configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. + +For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behavior on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. + +Agents can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/agents +{% endtable %} + +## How A2A traffic flows + +When an Agent has type `a2a`, proxied traffic is processed in four phases: + +1. **Access**. Detects whether the request is an A2A operation (JSON-RPC or REST binding). When statistics logging is enabled, this starts an OpenTelemetry span and records the request body for payload logging if that's also enabled. +1. **Header filter**. Detects streaming responses (`Content-Type: text/event-stream`) and records time to first byte. Buffers agent-card responses for URL rewriting. +1. **Body filter**. Streams SSE chunks through to the client without buffering. Buffers non-streaming responses to extract task metadata. Rewrites agent-card URLs to the gateway address. Emits analytics at end of response. +1. **Log**. Finalizes the OpenTelemetry span with task state, task ID, and any error information. + +Non-A2A traffic, and traffic to `http` Agents, is proxied without these steps. + + +{% mermaid %} +sequenceDiagram + autonumber + participant Client as A2A Client + participant Gateway as {{site.ai_gateway}}
(Agent) + participant Agent as Upstream A2A Agent + + Client->>Gateway: A2A request (JSON-RPC or REST) + Note over Gateway: Detect A2A binding and method
Start OTel span (if logging enabled) + + Gateway->>Agent: Proxied request
(Accept-Encoding removed if logging enabled) + + alt Streaming response (SSE) + Agent-->>Gateway: text/event-stream chunks + Note over Gateway: Pass through each chunk
Count SSE events, track TTFB + Gateway-->>Client: SSE chunks (unchanged) + Note over Gateway: On final chunk:
Extract task state, set analytics + else Non-streaming response + Agent->>Gateway: JSON response + Note over Gateway: Buffer response
Extract task metadata + Gateway->>Client: Response (unchanged) + end + + Note over Gateway: Finish OTel span
Emit ai.a2a metrics to log plugins +{% endmermaid %} + + +## Core A2A protocol elements + +A2A defines the communication elements between agents. The runtime surfaces data tied to these elements in log output and OpenTelemetry spans for `a2a` Agents. + +{% table %} +columns: + - title: Element + key: element + - title: Description + key: description + - title: Purpose + key: purpose +rows: + - element: Agent Card + description: A JSON metadata document describing an agent's identity, capabilities, endpoint, skills, and authentication requirements. + purpose: Enables clients to discover agents and understand how to interact with them. + - element: Task + description: A stateful unit of work initiated by an agent, with a unique ID and defined lifecycle. + purpose: Tracks long-running operations and supports multi-turn interactions. + - element: Message + description: A single turn of communication between a client and an agent, containing content and a role (`user` or `agent`). + purpose: Conveys instructions, context, questions, answers, or status updates that are not formal artifacts. + - element: Part + description: The fundamental content container (for example, `TextPart`, `FilePart`, `DataPart`) used within messages and artifacts. + purpose: Provides flexibility for agents to exchange different content types within messages and artifacts. + - element: Artifact + description: A tangible output generated by an agent during a task (for example, a document, image, or structured data). + purpose: Carries the concrete output of a task in a structured, retrievable form. +{% endtable %} + +### Protocol detection + +A2A traffic is auto-detected per request and non-A2A traffic passes through without overhead. + +#### REST binding + +Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: + + +{% table %} +columns: + - title: HTTP method + key: method + - title: Path suffix + key: path + - title: A2A operation + key: operation + - title: Canonical method + key: canonical +rows: + - method: "`POST`" + path: "`/v1/message:send`" + operation: SendMessage + canonical: "`message/send`" + - method: "`POST`" + path: "`/v1/message:stream`" + operation: SendStreamingMessage + canonical: "`message/stream`" + - method: "`GET`" + path: "`/.well-known/agent-card.json`" + operation: GetAgentCard + canonical: "`agent/getCard`" + - method: "`GET`" + path: "`/v1/extendedAgentCard`" + operation: GetExtendedAgentCard + canonical: "`agent/getExtendedAgentCard`" + - method: "`GET`" + path: "`/v1/tasks/{id}`" + operation: GetTask + canonical: "`tasks/get`" + - method: "`GET`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" + - method: "`POST`" + path: "`/v1/tasks/{id}:cancel`" + operation: CancelTask + canonical: "`tasks/cancel`" + - method: "`POST`" + path: "`/v1/tasks/{id}:subscribe`" + operation: SubscribeToTask + canonical: "`tasks/resubscribe`" + - method: "`POST`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" +{% endtable %} + + +The canonical method name is what appears in OpenTelemetry span attributes and log output. + +#### JSON-RPC binding + +Detected by the `"jsonrpc"` field in the request body, combined with a recognized A2A method name or an `A2A-Version` request header. Recognized methods include `message/send`, `message/stream`, `tasks/get`, `tasks/list`, `tasks/cancel`, `tasks/resubscribe`, the `tasks/pushNotificationConfig/*` family, and `agent/getExtendedAgentCard`. + +A request carrying an `A2A-Version` header is treated as JSON-RPC even if the method isn't in the recognized list. When an unknown method is accepted this way, the `method` field in log output is recorded as `"unknown"` to bound metric cardinality. The OpenTelemetry span's `kong.a2a.operation` attribute still receives the actual method name. + +### Agent-card URL rewriting + +When an upstream agent returns an agent card, the runtime rewrites the `url` field, and any `additionalInterfaces[].url` fields, to the {{site.ai_gateway}} address. A2A clients then discover the gateway as the canonical endpoint instead of contacting the upstream directly. The rewrite uses `X-Forwarded-*` headers to construct the correct scheme, host, and port when the gateway is deployed behind a load balancer or reverse proxy. + +## Logging and observability + +When Statistics logging is enabled, {{site.ai_gateway}} records structured A2A telemetry per request and exposes it in {{site.konnect_short_name}} analytics, attached log plugins, and OpenTelemetry when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. For the canonical metric and attribute list, see [A2A metrics](/ai-gateway/ai-otel-metrics/#a2a-metrics). + +The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. + +{:.info} +> When statistics logging is enabled, the runtime removes the `Accept-Encoding` request header +> before forwarding to the upstream. This prevents compressed responses that the runtime can't +> parse for metadata extraction. + +Payload logging additionally captures request and response bodies. Payloads are truncated at the configured payload size limit. + +{:.warning} +> Payload logging may expose sensitive data. Only enable it when you're prepared to handle +> request and response bodies in your logging pipeline. + +You can view A2A analytics in {{site.konnect_short_name}} Explorer and Dashboards through the [Agentic usage analytics](/observability/explorer/?tab=agentic-usage#metrics) view. + +### Log output fields + +{% include /plugins/ai-a2a-proxy/log-output-fields.md %} + +### OpenTelemetry span attributes + +When statistics logging is enabled and {{site.base_gateway}} tracing is configured, the runtime creates a `kong.a2a` child span with the following attributes: + +{% include /plugins/ai-a2a-proxy/otel-span-attributes.md %} + +### Task states + +Task state values surfaced in logs and spans are normalized to lowercase A2A spec format, regardless of the upstream SDK version: `submitted`, `working`, `input-required`, `completed`, `canceled`, `failed`, `rejected`, `auth-required`, `unknown`. + +## Access control + +The `acls` field controls which identities are allowed to reach the Agent. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced before traffic reaches the upstream agent. + +For per-request authentication and identity, attach an authentication Policy to the Agent. + +## Attach Policies + +Policies are how plugin configurations apply to an Agent. Attach them through the Agent's `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one Agent; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Set up an Agent + +The following example creates an `a2a` Agent that proxies traffic to an upstream A2A agent at `https://booking-agent.internal.kongair.com`, with statistics logging enabled and access restricted to the `internal-teams` Consumer Group. + +{% entity_example %} +type: agent +data: + display_name: KongAir Flight Booking Agent + name: kongair-flight-booking-agent + type: a2a + acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + url: https://booking-agent.internal.kongair.com + logging: + statistics: true + payloads: false + max_payload_size: 524288 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer-credential.md b/app/_ai_gateway_entities/ai-consumer-credential.md new file mode 100644 index 0000000000..a151e8f38a --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer-credential.md @@ -0,0 +1,130 @@ +--- +title: AI Consumer Credentials +content_type: reference +entities: + - ai-consumer-credential +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer-credential/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Credentials issued to AI Consumers for authenticating to {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerCredential +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ +faqs: + - q: Why are credentials a separate entity instead of a field on the Consumer? + a: | + Each credential has its own lifecycle, identifier, and (for API keys) TTL. Modeling them as + a sub-entity of the Consumer lets you list, rotate, and revoke individual credentials + independently of the Consumer record. + + - q: What credential types are supported? + a: | + Two types: `api-key` and `oauth`. The `type` of the Credential must match the Consumer's + `type`. An `api-key` credential carries the `api_key` value (and an optional `ttl`). An + `oauth` credential carries a `custom_id` that maps to the OAuth provider's identifier. + + - q: Can a Consumer have multiple credentials? + a: | + Yes. Issue one Credential per environment, client, or rotation cycle, and revoke individual + Credentials without affecting the others. + + - q: Is the API key value visible after creation? + a: | + No. The `api_key` field is write-only; subsequent reads return the Credential's metadata + (`name`, `display_name`, `ttl`, timestamps) but not the secret. Distribute the key value at + creation time, and rotate by issuing a new Credential and revoking the old one. + + - q: What's the relationship between `ttl` and the Consumer's lifecycle? + a: | + `ttl` controls how long the API key value remains valid in seconds. When it elapses, the + Credential stops authenticating but the Credential record (and the parent Consumer) remain. + Issue a new Credential to keep the Consumer authenticating. +--- + +## What is a Consumer Credential? + +A Consumer Credential is the {{site.ai_gateway}} entity that represents the secret material a [Consumer](/ai-gateway/entities/ai-consumer/) presents to authenticate to {{site.ai_gateway}}. + +Credentials are nested under their owning Consumer: each Credential belongs to exactly one Consumer, and removing the Consumer removes its Credentials. + +Consumer Credentials are managed through the {{site.ai_gateway}} entity API: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers/{consumerId}/credentials +{% endtable %} + +## Credential types + +The `type` field on a Credential must match the parent Consumer's `type`: + +* **`api-key`**: the Credential carries an `api_key` value the client presents on each request. An optional `ttl` (seconds) bounds the validity period; once it elapses, the value no longer authenticates. +* **`oauth`**: the Credential carries a `custom_id` that maps a Consumer to an OAuth identity issued by an external provider. {{site.ai_gateway}} works with any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The `custom_id` is typically the OIDC `sub` claim or the Client ID issued by the OAuth provider. The actual access token is issued and validated by the OAuth provider, not stored on the Credential. + +The `api_key` field is write-only and cannot be retrieved after creation. Treat creation responses as the only opportunity to capture the key value. + +## Lifecycle + +Each Credential has its own UUID and supports independent list, get, and delete operations through the nested endpoints under its parent Consumer. There is no `PUT` operation: rotation is an explicit "create new, delete old" flow, which avoids long-lived stale references. + +Deleting a Credential immediately stops it from authenticating. Deleting the parent Consumer removes all of its Credentials. + +## Set up an API key Credential + +The following example issues a 24-hour API key credential to an existing Consumer named `mobile-app-production`. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - Dev Key + name: mobile-app-dev-key + type: api-key + api_key: + ttl: 86400 +{% endentity_example %} + +{:.warning} +> Don't commit `api_key` values to source control. Inject them at creation time from a +> secret-management system, and treat any value checked into a configuration file as compromised. + +## Set up an OAuth Credential + +The following example issues an OAuth credential that maps an external OIDC client ID to a Consumer. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - OIDC Mapping + name: mobile-app-oidc-mapping + type: oauth + custom_id: 0oatibf4t2PlDxqgR1d7 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer-group.md b/app/_ai_gateway_entities/ai-consumer-group.md new file mode 100644 index 0000000000..38ecc83a3b --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer-group.md @@ -0,0 +1,135 @@ +--- +title: AI Consumer Groups +content_type: reference +entities: + - ai-consumer-group +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer-group/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Consumer Groups for {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerGroup +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.base_gateway}} Consumer Group entity" + url: /gateway/entities/consumer-group/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer Group different from a {{site.base_gateway}} Consumer Group? + a: | + The runtime entity is a regular Kong Consumer Group. The {{site.ai_gateway}} surface adds + the entity convention (`display_name`, `name`, `labels`) and a required `policies` array + for attaching plugin instances at the group scope. + + - q: Can I edit the underlying Kong Consumer Group that {{site.ai_gateway}} generates? + a: | + No. The generated Kong Consumer Group is protected from direct modification through the + standard `/consumer-groups` Admin API. Update the AI Consumer Group instead. + + - q: How do I assign a Consumer to a Consumer Group? + a: | + Set the `consumer_groups` array on the Consumer entity to reference this group by + `name` or `id`. Membership is managed from the Consumer side. + See the [Consumer entity](/ai-gateway/entities/ai-consumer/) reference. + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The Consumer's `consumer_groups` array accepts one or more references. + + - q: How do I attach Policies to a Consumer Group? + a: | + Add the Policy's `name` or `id` to the Consumer Group's `policies` array. + The plugin runs when a member of the group is identified during a request. + See the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + + - q: How do I gate access to a Model, Agent, or MCP Server with a Consumer Group? + a: | + Add the Consumer Group's name to the parent entity's `acls.allow` or `acls.deny` list. + ACLs accept Consumer, Consumer Group, and Authenticated Group names. + See the [Model entity](/ai-gateway/entities/ai-model/) reference. +--- + +## What is a Consumer Group? + +A Consumer Group is the {{site.ai_gateway}} entity that represents a collection of Consumers grouped for the purpose of applying shared Policies and access controls. + +Use Consumer Groups to scope group-wide behavior, such as rate limits, prompt guards, or content moderation, without configuring each Consumer individually. Consumer Groups can appear in the `acls` field of Model, Agent, and MCP Server entities, where they gate access to those parent entities. + +Consumer Groups can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumer-groups +{% endtable %} + +## Configure a Consumer Group + +When you create a Consumer Group, the configuration steps generally follow this order: + +1. Create the group with a display name, name, and optional description. +1. Optionally attach Policies for group-wide plugin execution (such as rate limits or content moderation). +1. Assign Consumers to the group through each Consumer's `consumer_groups` array. +1. Optionally use the Consumer Group in `acls` on Model, Agent, or MCP Server entities to control access. + +For a concrete example, see [Set up a Consumer Group](#set-up-a-consumer-group). + +## Membership + +A Consumer Group doesn't list its members directly. Membership is set on the Consumer entity through the Consumer's `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. A single Consumer can belong to multiple Consumer Groups. + +For the Consumer-side configuration, see the [Consumer entity](/ai-gateway/entities/ai-consumer/) reference. + +## Attach Policies + +Policies attached to a Consumer Group run when a member of that group is identified during a request. To attach a Policy, add its `name` or `id` to the Consumer Group's `policies` array. + +You can attach multiple Policies to a single Consumer Group. Each Policy is an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Use in parent entity ACLs + +The `acls` field on Model, Agent, and MCP Server entities accepts Consumer Group names alongside Consumer and Authenticated Group names. Add a Consumer Group to a parent entity's `acls.allow` list to permit its members access, or to `acls.deny` to block them. + +ACLs are evaluated at the Service level of the parent entity's derived primitives. Consumer Group membership is resolved after the request is authenticated and the Consumer is identified. + +## Set up a Consumer Group + +The following example creates an AI Consumer Group with one attached Policy that applies a shared rate limit to its members. + +{% entity_example %} +type: consumer_group +data: + display_name: Internal Teams + name: internal-teams + policies: + - rate-limit-internal-teams +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer.md b/app/_ai_gateway_entities/ai-consumer.md new file mode 100644 index 0000000000..69a805b6be --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer.md @@ -0,0 +1,140 @@ +--- +title: AI Consumers +content_type: reference +entities: + - ai-consumer +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: "Consumers for {{site.ai_gateway}}." +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumer +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer Credential entity + url: /ai-gateway/entities/ai-consumer-credential/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.base_gateway}} Consumer entity" + url: /gateway/entities/consumer/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer different from a {{site.base_gateway}} Consumer? + a: | + The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface uses the + {{site.ai_gateway}} entity convention (`display_name`, `name`, `labels`), requires an + authentication `type` field, accepts inline Consumer Group assignment, and lets you + reference Policies. Credentials are managed as a separate sub-entity rather than embedded + on the Consumer. + + - q: How do I add credentials to an AI Consumer? + a: | + Credentials are a separate sub-entity, not a field on the Consumer. Create them under the + Consumer's nested credentials endpoint. See the + [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/) reference. + + - q: "What's the difference between `type: api-key` and `type: oauth`?" + a: | + The `type` declares which credential family the Consumer authenticates with. An `api-key` + Consumer holds one or more `api-key` Credentials. An `oauth` Consumer holds one or more + `oauth` Credentials whose `custom_id` maps to the OAuth provider's identifier. The + Credential's `type` must match the Consumer's `type`. + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The `consumer_groups` array accepts one or more references to Consumer Groups by + `name` or `id`. + + - q: How do I attach Policies to a Consumer? + a: | + Add the Policy's `name` or `id` to the Consumer's `policies` array. + See the [Policy entity](/ai-gateway/entities/ai-policy/) reference. +--- + +## What is a Consumer? + +A Consumer is the {{site.ai_gateway}} entity that represents a downstream client of the AI APIs you publish through {{site.ai_gateway}}. + +You can use Consumers and Consumer Groups to authenticate clients, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. + +Consumers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers +{% endtable %} + +## Configure a Consumer + +When you create a Consumer, the configuration steps generally follow this order: + +1. Choose an authentication `type`: `api-key` for API key credentials, or `oauth` for OAuth 2.0 / OpenID Connect credentials. +1. Optionally assign the Consumer to one or more Consumer Groups through the `consumer_groups` array. +1. Optionally attach Policies to the Consumer for request-level plugin execution. +1. Create credentials separately through the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/). + +For a concrete example, see [Set up a Consumer](#set-up-a-consumer). + +## Authentication type + +The `type` field declares which credential family the Consumer authenticates with. Supported values are: + +* `api-key`: the Consumer authenticates with one or more API key Credentials. +* `oauth`: the Consumer authenticates through an OAuth identity issued by an external OIDC provider. {{site.ai_gateway}} accepts any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, through the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The Consumer Credential carries a `custom_id` that maps to the OAuth provider's user identifier (for example, an OIDC Client ID or `sub` claim). + +The `type` of every Credential issued to the Consumer must match the Consumer's `type`. See the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/) reference for credential management. + +## Consumer Group membership + +You can assign a Consumer to one or more Consumer Groups through the `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. + +Consumer Groups are managed through their own entity surface. See the [Consumer Group entity](/ai-gateway/entities/ai-consumer-group/) reference. + +## Attach Policies + +Policies are how plugin configurations apply to a Consumer. Attach a Policy by adding its `name` or `id` to the Consumer's `policies` array. The underlying plugin runs in the request lifecycle when the Consumer is identified. + +You can attach multiple Policies to a single Consumer. Each Policy is an independent plugin instance. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Set up a Consumer + +The following example creates an AI Consumer assigned to a single Consumer Group. Credentials are issued separately through the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/). + +{% entity_example %} +type: consumer +data: + display_name: Mobile App - Production + name: mobile-app-production + type: api-key + consumer_groups: + - internal-teams + policies: [] +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-data-plane-certificate.md b/app/_ai_gateway_entities/ai-data-plane-certificate.md new file mode 100644 index 0000000000..d650cc7350 --- /dev/null +++ b/app/_ai_gateway_entities/ai-data-plane-certificate.md @@ -0,0 +1,124 @@ +--- +title: AI Data Plane Certificates +content_type: reference +entities: + - ai-data-plane-certificate +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-data-plane-certificate/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Client certificates that authorize data planes to connect to an {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayDataPlaneClientCertificate +works_on: + - konnect +tools: + - konnect-api + - terraform +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Vault entity + url: /ai-gateway/entities/ai-vault/ +faqs: + - q: Why is there no update operation? + a: | + The certificate body is immutable once registered. To rotate, register a new Data Plane + Certificate alongside the existing one, roll the data planes onto the new certificate, then + delete the old entry. This pattern avoids a window where no certificate is installed. + + - q: What happens to connected data planes when a certificate is deleted? + a: | + Any data plane currently connecting with the deleted certificate loses its trust anchor and + can no longer establish a connection to the {{site.ai_gateway}}. Roll data planes onto a + replacement certificate before deleting the old one. + + - q: Is the private key stored alongside the certificate? + a: | + No. Only the public certificate is registered with the {{site.ai_gateway}}. The corresponding + private key stays on the data plane and is never sent to {{site.konnect_short_name}}. + + - q: Can the same certificate be used by multiple data planes? + a: | + Yes. Any data plane provisioned with the registered certificate and its private key can + establish a connection. Use multiple certificates when you need to revoke trust for a subset + of data planes independently. + + - q: How does this relate to the {{site.base_gateway}} data plane client certificate? + a: | + It plays the same role, establishing mutual TLS between the control plane and a data plane, + but it is scoped to a single {{site.ai_gateway}} instance and managed through the + {{site.ai_gateway}} entity surface, not the {{site.konnect_short_name}} Gateway control plane API. +--- + +## What is a Data Plane Certificate? + +A Data Plane Certificate is an {{site.ai_gateway}} entity that registers a public X.509 certificate as a trusted client identity for an {{site.ai_gateway}}. Data planes presenting the matching private key during the mTLS handshake are allowed to connect; data planes without a matching registered certificate are rejected. + +Each Data Plane Certificate belongs to exactly one {{site.ai_gateway}}. An {{site.ai_gateway}} can have multiple registered certificates so that you can issue one per data plane fleet, rotate keys without downtime, or revoke trust for a subset of data planes independently. + +Data Plane Certificates are managed through the {{site.konnect_short_name}} {{site.ai_gateway}} API, the {{site.konnect_short_name}} UI, or Terraform: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/data-plane-certificates +{% endtable %} + +There is no on-prem equivalent for this entity. Self-managed {{site.base_gateway}} deployments use the existing [`/certificates`](/gateway/entities/certificate/) entity and [hybrid mode node configuration](/gateway/hybrid-mode/) instead. + +## Trust model + +The {{site.ai_gateway}} acts as the control plane in a CP/DP topology. Each data plane presents a client certificate during the TLS handshake, and the {{site.ai_gateway}} accepts the connection only if the presented certificate matches one that has been registered as a Data Plane Certificate on that {{site.ai_gateway}}. + +Only the public certificate is registered with the {{site.ai_gateway}}. The private key is generated and held on the data plane side; it never leaves the data plane host. + + +{% mermaid %} +sequenceDiagram + participant DP as Data Plane + participant CP as {{site.ai_gateway}} (Control Plane) + + Note over DP: Holds private key locally
(never sent over the network) + DP->>CP: TLS handshake with client certificate + Note over CP: Compare presented certificate against
registered Data Plane Certificates + alt Certificate matches a registered entry + CP-->>DP: TLS handshake completes + DP->>CP: Receive configuration and stream telemetry + else No matching registered certificate + CP-->>DP: Connection rejected + end +{% endmermaid %} + + +## Lifecycle + +Data Plane Certificates support create, list, get, and delete operations. There is no update endpoint, the certificate body is immutable. + +To rotate a certificate without downtime: + +1. Register the new certificate as an additional Data Plane Certificate on the {{site.ai_gateway}}. +1. Reconfigure the data planes to present the new certificate and key. +1. Verify that data planes have reconnected with the new identity. +1. Delete the old Data Plane Certificate. + +Deleting a Data Plane Certificate immediately invalidates the trust for any data plane still using it. Existing connections are dropped and reconnect attempts using the deleted certificate are rejected. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-data-plane-node.md b/app/_ai_gateway_entities/ai-data-plane-node.md new file mode 100644 index 0000000000..0a22531ad7 --- /dev/null +++ b/app/_ai_gateway_entities/ai-data-plane-node.md @@ -0,0 +1,95 @@ +--- +title: AI Data Plane Nodes +content_type: reference +entities: + - ai-data-plane-node +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-data-plane-node/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Data Plane nodes that run {{site.ai_gateway}} workloads and connect to the control plane. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayDataPlaneNode +works_on: + - konnect +tools: + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entity" + url: /ai-gateway/entities/ai-gateway/ + - text: Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ +faqs: + - q: How do I register a new Data Plane node? + a: | + Data Plane nodes register themselves when they start and establish a connection to the + {{site.ai_gateway}} using a client certificate. Once registered, the node appears in + the Konnect {{site.ai_gateway}} UI and is accessible via the API. + + - q: What does `config_hash` tell me? + a: | + `config_hash` is a hash of the configuration currently applied by the node. Compare + this to the {{site.ai_gateway}}'s `config_hash`. If they match, the node is in sync + with the latest control plane configuration. If they differ, the node is running stale + configuration. + + - q: What is `last_ping`? + a: | + `last_ping` is a Unix timestamp indicating the most recent heartbeat from the node. + It helps operators identify nodes that are no longer communicating with the control plane. + + - q: What do compatibility issues mean? + a: | + Compatibility issues indicate that the node's version or configuration is incompatible + with the {{site.ai_gateway}}. The issue detail includes a resolution explaining what + must be changed to bring the node into a compatible state. +--- + +## What is a Data Plane Node? + +A Data Plane Node is a runtime instance that executes {{site.ai_gateway}} traffic and maintains a connection to the {{site.konnect_short_name}} {{site.ai_gateway}} control plane. Each node runs the {{site.ai_gateway}} data plane binary, loads configuration from the control plane, and processes requests according to that configuration. + +Nodes are read-only entities in the {{site.ai_gateway}} API. You cannot create or delete nodes through the control plane; instead, nodes self-register when they start with a valid [Data Plane Certificate](/ai-gateway/entities/ai-data-plane-certificate/). Operators monitor and troubleshoot nodes through the Konnect UI and API. + +Data Plane Nodes can be viewed through the {{site.konnect_short_name}} {{site.ai_gateway}} API: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/nodes +{% endtable %} + +## Understanding Node Status + +When you list or inspect a node, key fields to monitor are: + +* **`last_ping`**: The most recent heartbeat timestamp. A stale value indicates the node has lost connectivity or crashed. +* **`config_hash`**: Compare this to the {{site.ai_gateway}}'s `config_hash`. If they differ, the node is running stale configuration and should be restarted or rolled forward. +* **`compatibility_status`**: Reports any version or configuration incompatibilities. If issues are present, review the resolution steps provided before routing traffic through the node. + +## Monitoring Nodes + +Regularly check the list of registered nodes to ensure they are healthy and in sync: + +1. **Verify connectivity**: Check `last_ping` to confirm the node is actively reporting to the control plane. +1. **Verify configuration sync**: Compare each node's `config_hash` to the {{site.ai_gateway}}'s `config_hash`. If they differ, the node is running stale configuration and should be restarted or rolled forward. +1. **Resolve compatibility issues**: If a node reports compatibility issues, the `compatibility_status` field includes resolution steps. Address them before the node begins serving traffic. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md new file mode 100644 index 0000000000..ae0e57d47d --- /dev/null +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -0,0 +1,127 @@ +--- +title: "{{site.ai_gateway}}" +content_type: reference +entities: + - ai-gateway +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-gateway/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: | + The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGateway +works_on: + - konnect +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ +faqs: + - q: How is an {{site.ai_gateway}} different from a {{site.konnect_short_name}} Gateway control plane? + a: | + An {{site.ai_gateway}} is a dedicated control plane purpose-built for AI traffic. It exposes its own + entity surface (Models, Providers, Policies, Agents, MCP Servers, and so on) and its own + data plane runtime. It doesn't share entities or data planes with a regular + {{site.konnect_short_name}} Gateway control plane. + + - q: Can I run more than one {{site.ai_gateway}} in an organization? + a: | + Yes. An organization can hold multiple {{site.ai_gateway}} entities. Each one has its own + configuration and telemetry endpoints, its own set of child entities, and its own + data planes. + + - q: What does `config_hash` represent? + a: | + `config_hash` is a hash of the {{site.ai_gateway}}'s latest configuration, including all of its + child entities. It changes any time something under the {{site.ai_gateway}} is created, updated, + or deleted. Compare it to the `config_hash` reported by a data plane node to check whether + the node has the current configuration. + + - q: What happens to child entities when I delete an {{site.ai_gateway}}? + a: | + Deleting an {{site.ai_gateway}} removes the entity. Its child entities (Models, Providers, Policies, + Agents, MCP Servers, Vaults, Consumers, Consumer Groups, and Data Plane Certificates) are + tied to the {{site.ai_gateway}} and are not addressable without it. + + - q: Is the {{site.ai_gateway}} entity available on-prem? + a: | + No. The {{site.ai_gateway}} entity is a {{site.konnect_short_name}} concept. On-prem deployments + manage the same child entities (Models, Providers, Policies, and so on) directly through + the Admin API, without a parent `ai-gateways/{id}` container. +--- + +## What is an {{site.ai_gateway}}? + +An {{site.ai_gateway}} is the top-level {{site.ai_gateway}} entity. It's a dedicated control plane for AI traffic, separate from a regular {{site.konnect_short_name}} Gateway control plane, that owns the entities {{site.ai_gateway}} uses to serve LLM and agent workloads: + +1. [Models](/ai-gateway/entities/ai-model/): AI model endpoints, capabilities, and load balancing. +1. [Providers](/ai-gateway/entities/ai-provider/): upstream LLM service connections and credentials. +1. [Policies](/ai-gateway/entities/ai-policy/): security, rate limiting, and guardrail behavior attached to other entities. +1. [Agents](/ai-gateway/entities/ai-agent/): A2A and HTTP agent routing. +1. [MCP Servers](/ai-gateway/entities/ai-mcp-server/): MCP tool exposure and session handling. +1. [Vaults](/ai-gateway/entities/ai-vault/): secret storage referenced from other entities. +1. [Consumers](/ai-gateway/entities/ai-consumer/), [Consumer Groups](/ai-gateway/entities/ai-consumer-group/), [Consumer Credentials](/ai-gateway/entities/ai-consumer-credential/): identities used in access control. +1. [Data Plane Certificates](/ai-gateway/entities/ai-data-plane-certificate/): certificates that authorize data plane nodes to connect. + +Every other {{site.ai_gateway}} entity is created under an {{site.ai_gateway}} and addressed through its ID: + +{% table %} +columns: + - title: Surface + key: surface + - title: Endpoint + key: endpoint +rows: + - surface: {{site.ai_gateway}} + endpoint: /v1/ai-gateways + - surface: Child entities + endpoint: /v1/ai-gateways/{aiGatewayId}/{entity} +{% endtable %} + +## Endpoints + +When an {{site.ai_gateway}} is created, {{site.ai_gateway}} provisions two endpoints that data planes connect to: + +1. **Configuration endpoint** (`endpoints.configuration`): the URL data plane nodes use to receive their configuration from the control plane. +1. **Telemetry endpoint** (`endpoints.telemetry`): the URL data plane nodes use to ship analytics and runtime telemetry back to {{site.konnect_short_name}}. + +Both endpoints are read-only, assigned at creation time, and stable for the lifetime of the {{site.ai_gateway}}. Data plane nodes need both URLs, along with a [Data Plane Certificate](/ai-gateway/entities/ai-data-plane-certificate/), to register with the {{site.ai_gateway}}. + +## Configuration hash + +`config_hash` is a read-only field that {{site.ai_gateway}} updates every time anything under the {{site.ai_gateway}} changes, such as a new Model, an updated Policy, or a deleted Provider. Each data plane node reports back the `config_hash` of the configuration it's running. The two values match when the node is in sync with the control plane. + +Use `config_hash` to verify rollout: after a configuration change, watch the node `config_hash` (through [List Nodes](/ai-gateway/entities/ai-data-plane-certificate/) or the {{site.konnect_short_name}} UI) until every node reports the {{site.ai_gateway}}'s current value. + +## Labels + +`labels` are a free-form `key: value` map for organization. Use them to tag {{site.ai_gateway}}s by environment (`env: production`), team ownership, cost center, or any other dimension you filter on. Labels don't affect runtime behavior. + +## Lifecycle + +{{site.ai_gateway}}s can be created and managed through the {{site.konnect_short_name}} UI or the {{site.ai_gateway}} API. Once an {{site.ai_gateway}} exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API or decK as documented on each entity page. + +Creating an {{site.ai_gateway}} provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The {{site.ai_gateway}} has no runtime traffic of its own. Traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. + +Updating an {{site.ai_gateway}} changes its `name`, `description`, or `labels`. Endpoints and `config_hash` are managed by {{site.ai_gateway}} and can't be set directly. + +Deleting an {{site.ai_gateway}} removes the entity. Its child entities are scoped to the {{site.ai_gateway}} and can't be addressed without it. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md new file mode 100644 index 0000000000..6257e9156c --- /dev/null +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -0,0 +1,544 @@ +--- +title: AI MCP Servers +content_type: reference +entities: + - ai-mcp-server +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-mcp-server/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: MCP Server entity used by {{site.ai_gateway}} to expose tools and proxy MCP traffic. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayMCPServer +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Kong MCP traffic gateway + url: /mcp/ + - text: Model Context Protocol specification + url: https://modelcontextprotocol.io/ +faqs: + - q: Which MCP protocol version does the runtime use? + a: | + The MCP runtime behind an MCP Server entity speaks MCP protocol version `2025-06-18`. Upstream + MCP servers may run `2025-06-18` or `2025-11-25`. Versions from 2024 are not supported. + + - q: What's the difference between the four server types? + a: | + `passthrough-listener` proxies MCP traffic to an upstream MCP server without converting tools. + `conversion-listener` converts a RESTful API into MCP tools and accepts MCP requests on the + same Route. `conversion-only` defines a tool library that other MCP Servers reference by tag + but doesn't accept incoming MCP traffic itself. `listener` aggregates tools from one or more + `conversion-only` MCP Servers into a single MCP endpoint. + + - q: Can the same Consumer's identity gate access to specific tools? + a: | + Yes. Set `default_tool_acls` on the MCP Server with `allow` and `deny` lists, and override per + tool through `tools[].acls`. A per-tool ACL replaces the default for that tool, it doesn't + merge. + + - q: How do OAuth-based ACLs differ from Consumer-based ACLs? + a: | + Set `acl_attribute_type` to `oauth_access_token` and provide `access_token_claim_field` (a jq + filter, for example `.user.email`). ACLs then evaluate against the claim value extracted from + the OAuth access token instead of the resolved Consumer identity. The OAuth flow is supplied + by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + + - q: What error code do denied requests return? + a: | + `HTTP 403 Forbidden`. Earlier {{site.ai_gateway}} versions returned the JSON-RPC error code + `INVALID_PARAMS -32602`; from {{site.ai_gateway}} 3.14 onward, denials follow the + [MCP 2025-11-25 authorization specification](https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#error-handling). + + - q: Can I attach the same authentication or rate-limiting plugin that I'd attach to a Route? + a: | + Plugin configuration that applies to the MCP Server goes through the + [Policy entity](/ai-gateway/entities/ai-policy/). Attach Policies to the MCP Server through its + `policies` field. +--- + +## What is an MCP Server? + +An MCP Server is a first-class {{site.ai_gateway}} entity that exposes tools to MCP-compatible clients (such as [Insomnia](https://konghq.com/products/kong-insomnia), [Claude](https://claude.ai/), [Cursor](https://cursor.com/), or [LM Studio](https://lmstudio.ai/)) over the [Model Context Protocol](https://modelcontextprotocol.io/). The runtime acts as a protocol bridge, translating between MCP and HTTP so MCP clients can either call existing APIs through {{site.ai_gateway}} or interact with upstream MCP servers. + +Because the runtime executes inside {{site.ai_gateway}}, MCP endpoints are provisioned dynamically on demand. You don't host or scale them separately, and the same authentication, traffic control, and observability features available to traditional API traffic apply to MCP traffic at the same scale. + +MCP Servers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/mcp-servers +{% endtable %} + +## Configure an MCP Server + +When you create an MCP Server, the configuration steps generally follow this order: + +1. Choose a server type: `passthrough-listener` to proxy an upstream MCP server, `conversion-listener` to convert a REST API into MCP tools, `conversion-only` to define a shared tool library, or `listener` to aggregate tools from `conversion-only` servers. +1. Point the MCP Server at an upstream: supply the Service URL for conversion types, or the upstream MCP server address for `passthrough-listener`. +1. For conversion types, define tools that map MCP tool names to upstream HTTP endpoints. +1. Optionally, configure sessions for stateful interactions. +1. Optionally, attach Policies for authentication, rate limiting, and observability. +1. Optionally, configure ACLs to restrict which consumers can discover and invoke specific tools. + +For a concrete example, see [Set up an MCP Server](#set-up-an-mcp-server). + +## Common Policies + +Attach plugins as [Policies](/ai-gateway/entities/ai-policy/) on the MCP Server to handle authentication, rate limiting, observability, and traffic control: + + +{% table %} +columns: + - title: Use case + key: use_case + - title: Example + key: example +rows: + - use_case: Authentication + example: | + Apply [AI MCP OAuth2](/plugins/ai-mcp-oauth2/) for MCP-spec OAuth 2.0 flows, or [OpenID Connect](/plugins/openid-connect/) / [Key Auth](/plugins/key-auth/) for non-OAuth identity. + - use_case: Rate limiting + example: | + Use [Rate Limiting](/plugins/rate-limiting/) or [Rate Limiting Advanced](/plugins/rate-limiting-advanced/) to control MCP request volume. + - use_case: Observability + example: | + Add [logging and tracing plugins](/plugins/?category=logging) for full request and response visibility. MCP metrics surface in [{{site.konnect_short_name}} analytics](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics). + - use_case: Traffic control + example: | + Apply [request and response transformation plugins](/plugins/?category=transformations) or [ACL policies](/plugins/acl/). +{% endtable %} + + +## Server modes + +The `type` field selects one of four modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. + + +{% table %} +columns: + - title: Mode + key: mode + - title: Description + key: description + - title: Use cases + key: usecase +rows: + - mode: "`passthrough-listener`" + description: | + Listens for incoming MCP requests and proxies them to an upstream MCP server without + converting tools. Generates MCP observability metrics. + usecase: | + You already operate an MCP server and want {{site.ai_gateway}} to act as an authenticated, + observable entrypoint. Common for third-party or internally hosted MCP services exposed + through {{site.ai_gateway}}. + - mode: "`conversion-listener`" + description: | + Converts RESTful API paths into MCP tools and accepts incoming MCP requests on the Route + path. Tools are defined directly on the MCP Server and an optional server block applies. + {% new_in 3.13 %} Supports session identifiers set by authentication services for cookie-based + authentication. + usecase: | + Make an existing REST API available to MCP clients directly through {{site.ai_gateway}}. + Common for services that both define and handle their own tools. + - mode: "`conversion-only`" + description: | + Converts RESTful API paths into MCP tools but does not accept incoming MCP requests. + Tools are tagged at the MCP Server level so a `listener` MCP Server can reference them. + Used together with one or more `listener` MCP Servers. + usecase: | + Define reusable tool specifications without serving them. Suitable for teams that maintain + a shared library of tool definitions. + - mode: "`listener`" + description: | + Similar to `conversion-listener`, but instead of defining its own tools, it binds tools + from one or more `conversion-only` MCP Servers through `config.server.tag`. + usecase: | + A single MCP endpoint that aggregates tools from multiple `conversion-only` MCP Servers. + Typical in multi-service or multi-team environments that expose a unified MCP interface. +{% endtable %} + + +## How MCP traffic flows + +For `conversion-listener`, `conversion-only`, and `listener` modes, the runtime converts MCP requests into HTTP calls and wraps the responses back in MCP format: + +1. Accepts an MCP protocol request from a client. +1. Parses the MCP tool call and matches it to a tool definition. +1. Converts the call into a standard HTTP request. +1. Sends the request to the upstream Service. +1. Wraps the HTTP response in MCP format and returns it to the client. + +For `passthrough-listener` mode, the runtime proxies MCP traffic directly to the upstream MCP server without conversion. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}}
(MCP Server) + participant Upstream as Upstream Service + + Client->>Gateway: MCP request (tool invocation) + activate Gateway + Gateway->>Gateway: Parse MCP payload + Gateway->>Gateway: Map to HTTP endpoint + Gateway->>Upstream: HTTP request + deactivate Gateway + activate Upstream + Upstream-->>Gateway: HTTP response + deactivate Upstream + activate Gateway + Gateway->>Gateway: Convert to MCP format + Gateway-->>Client: MCP response + deactivate Gateway +{% endmermaid %} + + +{:.info} +> Pings from MCP clients are included in the total request count for an {{site.ai_gateway}} +> instance, in addition to requests made to the MCP server itself. + +## Tools + +A [tool](#schema-aigateway-mcpserver-tools) maps an MCP tool name to an upstream HTTP endpoint. Each tool needs at minimum a description and an HTTP method. The runtime extracts the host, path, headers, and query from the route configuration, so most tool entries don't need to specify them. Override these on the tool entry only when the route doesn't match the upstream endpoint exactly. + +For richer mapping, supply [`request_body`](#schema-aigateway-mcpserver-tools-request-body), [`responses`](#schema-aigateway-mcpserver-tools-responses), and [`parameters`](#schema-aigateway-mcpserver-tools-parameters) specifications in OpenAPI JSON format. The runtime uses them to validate calls and shape upstream HTTP requests. + +Tools can also carry MCP-spec [annotations](#schema-aigateway-mcpserver-tools-annotations) that hint at tool behavior to clients (for example, whether a tool is read-only, idempotent, or destructive). Annotations don't change runtime behavior; they help clients decide whether to surface a tool, confirm before invocation, or treat it as safe to retry. + +[Per-tool ACLs](#schema-aigateway-mcpserver-tools-acls) override the MCP Server's [default tool ACLs](#schema-aigateway-mcpserver-default-tool-acls). See [ACL tool control](#acl-tool-control). + +## Sessions + +`listener` and `conversion-listener` MCP Servers support managed sessions for stateful interactions. Configure session storage through `config.server.session`. The `passthrough-listener` mode doesn't use managed sessions because session state lives on the upstream MCP server. + +Two session strategies: + +1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` which are encryption keys; the first entry is used for encryption, all entries are used for decryption to support key rotation. +1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. + +{% include_cached /plugins/redis/redis-cloud-auth.md tier='enterprise' %} + +`session_ttl` controls how long sessions live (default 24 hours). Set `managed: false` to disable managed sessions when the upstream maintains state externally. + +Secrets used in session encryption can be referenced from a [Vault](/ai-gateway/entities/ai-vault/). + +## Server configuration + +The `config.server` block carries runtime settings that apply across all tools on the MCP Server: + + +{% table %} +columns: + - title: Field + key: field + - title: Default + key: default + - title: Description + key: description +rows: + - field: "[`forward_client_headers`](#schema-aigateway-mcpserver-config-server-forward-client-headers)" + default: "`true`" + description: Whether to forward client request headers to the upstream when calling tools. + - field: "[`tag`](#schema-aigateway-mcpserver-config-server-tag)" + default: (none) + description: A single tag used by `listener` MCP Servers to filter which `conversion-only` tools to expose. + - field: "[`timeout`](#schema-aigateway-mcpserver-config-server-timeout)" + default: 10 seconds + description: Maximum time to wait for an upstream tool call. +{% endtable %} + + +[`config.max_request_body_size`](#schema-aigateway-mcpserver-config-max-request-body-size) controls the maximum incoming request body size accepted by the MCP Server (default 1 MB). + +## ACL tool control + +When exposing MCP servers through {{site.ai_gateway}}, you may need granular control over which authenticated API consumers can discover and invoke specific tools. The MCP Server's ACL feature lets you define access rules at both the default level (applying to all tools) and per-tool level (for fine-grained exceptions). + +This way, consumers only interact with tools appropriate to their role, while maintaining a complete audit trail of all access attempts. Authentication is handled by an authentication Policy attached to the MCP Server (such as [Key Auth](/plugins/key-auth/) or an OIDC flow), and the resulting Consumer identity is used for ACL checks. + +{:.info} +> **ACL in `listener` mode** +> +> Listener mode does not support direct ACL configuration. Instead, it inherits ACL rules from tagged `conversion-listener` or `conversion-only` MCP Servers. +> +> To use ACLs with `listener` mode: +> 1. Configure `conversion-listener` or `conversion-only` MCP Servers with ACL rules and tags. +> 1. Configure `listener` mode to aggregate tools by matching tags. +> 1. Set `include_consumer_groups: true` on the listener. Without this setting, the listener cannot pass Consumer Group membership to the aggregated tools, and ACL rules will not evaluate correctly. +> +> See [Enforce ACLs on aggregated MCP servers](/mcp/enforce-acls-on-aggregated-mcp-servers/) for a complete example. + +### Attribute types + +Two attribute types determine what the MCP Server evaluates ACL rules against: + +1. **`consumer`** (default). Evaluates against the resolved Consumer identity. +1. **`oauth_access_token`**. Evaluates against a claim extracted from the OAuth access token. Set `access_token_claim_field` to a jq filter (for example, `.user.email` for a nested claim). The OAuth flow itself is supplied by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + +### Supported identifier types + +When `acl_attribute_type` is `consumer`, ACL rules can reference [Consumers](/gateway/entities/consumer/) and [Consumer Groups](/gateway/entities/consumer-group/) using these identifier types in `allow` and `deny` lists: + +* [`username`](/gateway/entities/consumer/#schema-consumer-username): Consumer username +* [`id`](/gateway/entities/consumer/#schema-consumer-username): Consumer UUID +* [`custom_id`](/gateway/entities/consumer/#schema-consumer-custom-id): Custom Consumer identifier +* [`consumer_groups.name`](/gateway/entities/consumer/#schema-consumer-custom-id): Consumer Group name + +The authenticated Consumer identity is matched against these identifiers. If the [Consumer](/gateway/entities/consumer/) or any of their [Consumer Groups](/gateway/entities/consumer-group/) match an ACL entry, the rule applies. + +### How default and per-tool ACLs work + +The runtime evaluates access using a two-tier system: + + +{% table %} +columns: + - title: ACL type + key: field + - title: Description + key: description +rows: + - field: "`default_tool_acls`" + description: | + Baseline rules that apply to all tools unless overridden. + - field: "`tools[].acls`" + description: | + When configured, these rules replace the default ACL for that specific tool. The per-tool ACL doesn't inherit or merge with `default_tool_acls`. It is an all-or-nothing override. +{% endtable %} + + +{:.info} +> If a tool defines its own ACL, the runtime ignores `default_tool_acls` for that tool: +> +> - Tools with no ACL configuration inherit the default rules (both `allow` and `deny` lists). +> - Tools with an ACL must explicitly list all allowed subjects (even if they were already in `default_tool_acls`). + +### ACL evaluation logic + +Both default and per-tool ACLs use `allow` and `deny` lists. Evaluation follows this order: + +1. **Deny list configuration**. If a `deny` list exists and the subject matches any `deny` entry, the request is rejected (`HTTP 403 Forbidden`). +1. **Allow list configuration**. If an `allow` list exists, the subject must match at least one entry; otherwise, the request is denied (`HTTP 403 Forbidden`). +1. **No allow list configuration**. If no `allow` list exists and the subject is not in `deny`, the request is allowed. +1. **No ACL configuration**. If neither list exists, the request is allowed. + +All access attempts (allowed or denied) are written to the audit log. + +The table below summarizes the possible ACL configurations and their outcomes. + +{% table %} +columns: + - title: Condition + key: condition + - title: "Proxied to upstream service?" + key: proxy + - title: Response code + key: response +rows: + - condition: "Subject matches any `deny` rule" + proxy: No + response: HTTP 403 Forbidden + - condition: "`allow` list exists and subject is not in it" + proxy: No + response: HTTP 403 Forbidden + - condition: "Only `deny` list exists and subject is not in it" + proxy: Yes + response: 200 + - condition: "No ACL rules configured" + proxy: Yes + response: 200 +{% endtable %} + +### ACL tool control request flow + +The runtime evaluates ACLs for both tool discovery and tool invocation. These are two distinct operations with different behaviors: + +**Tool discovery (list tools)**: + +1. MCP client requests the list of available tools. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates each tool against `default_tool_acls`. +1. The runtime returns an HTTP 200 response with only the tools the Consumer is allowed to access. +1. The runtime logs the discovery attempt. + +**Tool invocation**: + +1. MCP client invokes a specific tool. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates the tool-specific ACL if it exists, or the default ACL otherwise. +1. The runtime logs the access attempt (allowed or denied). +1. The runtime returns `HTTP 403 Forbidden` if denied, or forwards the request to the upstream MCP server if allowed. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}} + participant Auth as AuthN Policy + participant ACL as MCP Server (ACL/Audit) + participant Up as Upstream MCP Server + participant Log as Audit Sink + + %% ----- List Tools ----- + rect + note over Client,Gateway: List Tools (Default ACL Scope) + Client->>Gateway: GET /tools + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate scoped default ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway-->>Client: Filtered tool list + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end + + %% ----- Tool Invocation ----- + rect + note over Client,Up: Tool Invocation (Per-tool ACL) + Client->>Gateway: POST /tools/{tool} + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate per-tool ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway->>Up: Forward request + Up-->>Gateway: Response + Gateway-->>Client: Response + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end +{% endmermaid %} + + +## Logging and audits + +[Logging](#schema-aigateway-mcpserver-config-logging) captures three layers of MCP traffic: per-request statistics for telemetry, request and response payloads for full visibility, and [audit entries](/ai-gateway/ai-audit-log-reference/#ai-mcp-logs) for every ACL decision. Payload logging may expose sensitive data; enable it with care. MCP Server analytics surface in [{{site.konnect_short_name}} Explorer and Dashboards](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics) alongside other {{site.ai_gateway}} traffic, and export through [OpenTelemetry](/ai-gateway/ai-otel-metrics/#mcp-metrics). + +## Attach Policies + +Policies are how plugin configurations apply to an MCP Server. Authentication, rate limiting, request and response transformation, and OAuth gating (through [AI MCP OAuth2](/plugins/ai-mcp-oauth2/)) attach to the MCP Server through the `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one MCP Server; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Scope of support + +The MCP Server runtime supports MCP operations and upstream interactions, while certain advanced features and non-HTTP protocols are not currently supported. The table below summarizes what is supported and what is outside the current scope. + + +{% feature_table %} +item_title: Features +columns: + - title: Description + key: description + - title: Supported + key: supported + +features: + - title: "Protocol" + description: Handling latest streamable HTTP with HTTP and HTTPS upstreams + supported: true + - title: "OpenAPI operations" + description: Mapping MCP calls to upstream HTTP operations based on the OpenAPI schema + supported: true + - title: "JSON format" + description: Handling standard JSON request and response bodies + supported: true + - title: "Form-encoded data" + description: Handling `application/x-www-form-urlencoded` + supported: true + - title: "SNI routing" + description: Converting SNI-only routes + supported: false + - title: "Form and XML data" + description: Handling formats such as multipart/form-data or XML + supported: false + - title: "Advanced MCP features" + description: Handling structured output, active notifications on tool changes, and session sharing between instances + supported: false + - title: "Non-HTTP protocols" + description: Handling WebSocket and gRPC upstreams + supported: false + - title: "AI Guardrails" + description: Applying guardrails to MCP AI requests and responses + supported: false +{% endfeature_table %} + + +## Set up an MCP Server + +The following example creates a `conversion-listener` MCP Server that converts a flight-booking REST API into a single `searchFlights` MCP tool, restricts access to the `internal-teams` Consumer Group, and stores managed sessions in client-side encrypted form. + +{% entity_example %} +type: mcp_server +data: + display_name: KongAir Flights + name: kongair-flights + type: conversion-listener + acl_attribute_type: consumer + acls: + allow: + - internal-teams + deny: [] + default_tool_acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + logging: + statistics: true + payloads: false + audits: true + max_request_body_size: 1048576 + server: + forward_client_headers: true + timeout: 10000 + session: + managed: true + strategy: client + session_ttl: 86400 + client: + secrets: + - "{vault://my-vault/session-secret}" + tools: + - name: searchFlights + description: Search for available flights between two airports. + method: GET + path: /flights + annotations: + title: Search flights + read_only_hint: true + idempotent_hint: true +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md new file mode 100644 index 0000000000..039e28e240 --- /dev/null +++ b/app/_ai_gateway_entities/ai-model.md @@ -0,0 +1,419 @@ +--- +title: AI Models +content_type: reference +entities: + - ai-model +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-model/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: AI Models registered with the {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayModel +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} providers" + url: /ai-gateway/ai-providers/ + - text: Load balancing with AI Proxy Advanced + url: /ai-gateway/load-balancing/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ +faqs: + - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? + a: | + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. + {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. + You don't configure the underlying plugin directly. + + - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? + a: | + No. Generated primitives are protected from direct modification through the standard Admin API. + Update the Model entity instead, and {{site.ai_gateway}} recreates the underlying primitives within a single transaction. + + - q: How do I configure models in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. + + - q: What happens when I update a Model? + a: | + {{site.ai_gateway}} deletes the Model's derived primitives and recreates them from the updated entity state, all within a single database transaction. + On failure, the transaction rolls back and no partial state is written. + + - q: What happens when I delete a Model? + a: | + The Model and all its derived primitives (Service, Routes, plugin instances) are deleted within a single transaction. + + - q: Can I apply the same configuration to multiple Models? + a: | + Yes, by attaching one Policy with that configuration to each Model. + Policies are not shared between entities, each instance is independent. + See [Policy entity](/ai-gateway/entities/ai-policy/). + + - q: How do I limit which consumers can reach a Model? + a: | + Set the `acls` field on the Model with allow or deny lists. + Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. + + - q: Does the Model entity store provider credentials? + a: | + No. Provider credentials live on the [Provider entity](/ai-gateway/entities/ai-provider/) and are materialized into the underlying primitives at Model creation time. + Updating a Provider propagates the credential change to all Models that reference it. + + - q: Can a client override the model name from the request body? + a: | + By default, no. The request `model` field must match the upstream model on one of the Model's targets, otherwise the runtime returns a `400` error. + To accept a client-side alias, set `config.model.alias` on the Model and clients can send the alias value in the request `model` field instead of the upstream provider model name. + + - q: Can a client override `temperature`, `top_p`, or `top_k` from the request? + a: | + Yes. Values for `temperature`, `top_p`, and `top_k` in the request take precedence over the per-target configuration declared on `target_models[].config`. + + - q: Which algorithm does `lowest-latency` use to pick the fastest target? + a: | + Exponentially Weighted Moving Average (EWMA). EWMA continuously updates with every response, weighting recent observations more heavily, so older latencies decay over time but still contribute. There is no fixed learning-phase window. + + - q: Does the load balancer keep probing slower targets after picking a winner? + a: | + Yes. EWMA ensures every target continues to receive a small share of traffic (typically 0.1% to 5%, depending on the latency gap). This ongoing probing lets the load balancer adapt if a previously slower target becomes faster. + +--- + +## What is a Model? + +A Model is a first-class {{site.ai_gateway}} entity that represents an AI model endpoint exposed through {{site.ai_gateway}}. + +A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services, Routes, or plugin entries by hand. + +Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/models +{% endtable %} + +## Configure a Model + +When you create a Model in {{site.konnect_short_name}} or via the API, the configuration steps generally follow this order: + +1. Choose a type (`model` or `api`) and declare which capabilities the Model exposes. +1. Add one or more target models, each pointing to a Provider with credentials. +1. Select a request and response format (default is `openai`). +1. If you have more than one target, configure load balancing in `config.balancer`. +1. Optionally, attach Policies to add plugin configuration and set `acls` to control access. + +For a concrete example, see [Set up a Model](#set-up-a-model). + +## How it works + +When you configure a Model, you define what capabilities it exposes, which upstream providers it routes to, and how requests are load-balanced and logged. At request time, the Model mediates traffic between clients and upstream provider APIs: + +1. Translates between the request and response format chosen for the Model and the upstream provider's native format. +1. Resolves upstream connection coordinates (protocol, host, port, path, HTTP method) from the selected target and its [Provider](/ai-gateway/entities/ai-provider/), unless the target is a self-hosted model. +1. Authenticates to the upstream provider using credentials stored on the Provider entity. +1. Decorates the upstream request with per-target configuration (such as temperature or token-limit overrides) declared on `target_models[].config`. +1. Records usage statistics (tokens, cost, latency) for attached log Policies, and optionally the full request and response when payload logging is enabled. +1. Fulfills requests to self-hosted models using the supported native format transformations. + +A single Model can expose multiple upstream providers behind a consistent client-facing format, so callers don't change their request shape when the underlying Provider changes. + +## How a Model maps to runtime configuration + +When you create or update a Model, {{site.ai_gateway}} generates a fixed set of primitives: + +* One [Gateway Service](/gateway/entities/service/). +* One [Route](/gateway/entities/route/) per declared capability in the `capabilities` array. +* One [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin per generated Route. + +Provider credentials are added into the AI Proxy Advanced plugin configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. + +Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. + +{:.info} +> **Why a transaction instead of an in-place update?** +> +> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. + +## Capabilities + +The [`capabilities`](#schema-aigateway-model-capabilities) field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. + +Model [`type`](#schema-aigateway-model-type) controls which capability set applies: + +* `model`: synchronous request/response workloads through generative APIs. Supported capabilities are `chat`, `embeddings`, `assistants`, `responses`, `audio-transcriptions`, `audio-translations`, `image-generation`, `image-edits`, `video-generations`, and `realtime`. +* `api`: asynchronous workloads through the files and batches APIs. Supported capabilities are `batches` and `files`. + +Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. + +The following table maps each capability to an OpenAI API reference and the corresponding [AI Proxy plugin](/plugins/ai-proxy/) example. + + +{% table %} +columns: + - title: Capability + key: capability + - title: Description + key: description + - title: Example route + key: example +rows: + - capability: "`chat`" + description: Conversational responses from a sequence of messages. + example: "[`llm/v1/chat`](/plugins/ai-proxy/examples/openai-chat-route/)" + - capability: "`embeddings`" + description: Vector representations for semantic search and similarity matching. + example: "[`llm/v1/embeddings`](/plugins/ai-proxy/examples/embeddings-route-type/)" + - capability: "`assistants`" + description: Persistent tool-using agents with metadata for debugging and evaluation. + example: "[`llm/v1/assistants`](/plugins/ai-proxy/examples/assistants-route-type/)" + - capability: "`responses`" + description: REST-based full-text responses. + example: "[`llm/v1/responses`](/plugins/ai-proxy/examples/responses-route-type/)" + - capability: "`audio-transcriptions`" + description: Speech-to-text. + example: "[`audio/v1/audio/transcriptions`](/plugins/ai-proxy/examples/audio-transcription-openai/)" + - capability: "`audio-translations`" + description: Audio translation between languages. + example: "[`audio/v1/audio/translations`](/plugins/ai-proxy/examples/audio-translation-openai/)" + - capability: "`image-generation`" + description: Generate images from text prompts. + example: "[`image/v1/images/generations`](/plugins/ai-proxy/examples/image-generation-openai/)" + - capability: "`image-edits`" + description: Modify images from text prompts. + example: "[`image/v1/images/edits`](/plugins/ai-proxy/examples/image-edits-openai/)" + - capability: "`video-generations`" + description: Generate videos from text prompts. + example: "[`video/v1/videos/generations`](/plugins/ai-proxy/examples/video-generation-openai/)" + - capability: "`realtime`" + description: Bidirectional WebSocket streaming for low-latency, interactive voice and text. + example: "[`realtime/v1/realtime`](/plugins/ai-proxy-advanced/examples/realtime-route-openai/)" + - capability: "`batches`" + description: Asynchronous bulk LLM requests for long workloads. + example: "[`llm/v1/batches`](/plugins/ai-proxy/examples/batches-route-type/)" + - capability: "`files`" + description: File uploads for long documents and structured input. + example: "[`llm/v1/files`](/plugins/ai-proxy/examples/files-route-type/)" +{% endtable %} + + +## Request and response formats + +The [`formats`](#schema-aigateway-model-formats) array on a Model declares the request and response shapes the Model accepts. Each entry has a `type` that selects the format. The default `openai` format flattens upstream provider responses into the OpenAI shape, so clients can use a single request and response format across providers. + +To preserve a provider's native request and response format instead, set `formats[].type` to a non-OpenAI value. The Model passes requests upstream without conversion, while {{site.ai_gateway}} continues to provide analytics, logging, and cost calculation. + + +{% table %} +columns: + - title: Format + key: format + - title: Provider + key: provider + - title: Native capabilities + key: capabilities +rows: + - format: "`openai`" + provider: All supported providers (default) + capabilities: Translates between OpenAI request and response shapes and the upstream provider format. + - format: "`anthropic`" + provider: "[Anthropic](/ai-gateway/ai-providers/anthropic/#supported-native-llm-formats-for-anthropic)" + capabilities: Messages, batch processing. + - format: "`bedrock`" + provider: "[Amazon Bedrock](/ai-gateway/ai-providers/bedrock/#supported-native-llm-formats-for-amazon-bedrock)" + capabilities: Converse, RAG (RetrieveAndGenerate), reranking, async invocation. + - format: "`cohere`" + provider: "[Cohere](/ai-gateway/ai-providers/cohere/#supported-native-llm-formats-for-cohere)" + capabilities: Reranking. + - format: "`gemini`" + provider: "[Gemini](/ai-gateway/ai-providers/gemini/#supported-native-llm-formats-for-gemini), [Vertex AI](/ai-gateway/ai-providers/vertex/#supported-native-llm-formats-for-gemini-vertex)" + capabilities: Content generation, embeddings, batches, file uploads, reranking, long-running predictions. + - format: "`huggingface`" + provider: "[Hugging Face](/ai-gateway/ai-providers/huggingface/#supported-native-llm-formats-for-hugging-face)" + capabilities: Text generation, streaming. +{% endtable %} + + +When a native format is set, only the corresponding provider is supported with its specific APIs. For format-specific behavior and limitations, see the [AI Proxy plugin reference](/plugins/ai-proxy/#supported-native-llm-formats). + +## Target models + +A Model is a virtual model: it exposes one route ([`config.route`](#schema-aigateway-model-config-route)) and one set of capabilities, and routes requests to one or more concrete upstream models declared in its [`target_models`](#schema-aigateway-model-target-models) array. Each entry represents a single upstream model instance with one URL. + +For each target, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. + +There's no separate Target Model entity or endpoint. Target models are managed only as nested data inside a Model, through the same Model API surface used to create, update, and delete the parent. Adding, removing, or modifying a target is an update to the Model itself. + +## Load balancing + +A Model routes to a single target by default. Add more than one target when you want redundancy, fallback between providers, or cost and latency optimization. When you have multiple targets, configure `config.balancer` to distribute requests according to a load balancing algorithm. + +When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). + +### Algorithms + +The [`algorithm`](#schema-aigateway-model-config-balancer-algorithm) field selects one of seven load balancing strategies for distributing requests across target models. + + +{% table %} +columns: + - title: Algorithm + key: algorithm + - title: Behavior + key: behavior +rows: + - algorithm: "[`round-robin`](/plugins/ai-proxy-advanced/examples/round-robin/)" + behavior: Weighted traffic distribution across targets. + - algorithm: "[`consistent-hashing`](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + behavior: Sticky sessions based on header values. + - algorithm: "[`least-connections`](/plugins/ai-proxy-advanced/examples/least-connections/)" + behavior: Route to backends with spare capacity. + - algorithm: "[`lowest-latency`](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + behavior: Route to the fastest-responding model. + - algorithm: "[`lowest-usage`](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + behavior: Route based on token counts or cost. + - algorithm: "[`semantic`](/plugins/ai-proxy-advanced/examples/semantic/)" + behavior: Route based on prompt-to-model similarity. + - algorithm: "[`priority`](/plugins/ai-proxy-advanced/examples/priority/)" + behavior: Tiered failover across model groups. +{% endtable %} + + +### Retry and fallback + +The load balancer supports configurable retries, timeouts, and failover to different targets when one is unavailable. Fallback works across targets with any supported format, so you can mix providers freely (for example, OpenAI and Mistral). For configuration details, see [Retry and fallback configuration](/ai-gateway/load-balancing/#retry-and-fallback). + +{:.info} +> Client errors don't trigger failover. To fail over on additional error types, set +> [`failover_criteria`](#schema-aigateway-model-config-balancer-failover-criteria) to include HTTP codes +> like `http_429` or `http_502`, and `non_idempotent` for POST requests. + +### Health check and circuit breaker + +The load balancer includes a circuit breaker that improves reliability under sustained failures. When a target reaches the failure threshold set by [`max_fails`](#schema-aigateway-model-config-balancer-max-fails), the load balancer stops routing requests to it until the [`fail_timeout`](#schema-aigateway-model-config-balancer-fail-timeout) period elapses. For behavior examples and tuning, see [Circuit breaker](/ai-gateway/load-balancing/#health-check-and-circuit-breaker). + +### Vector store + +A vector store holds numerical representations (embeddings) of requests and responses so the runtime can match new requests against stored vectors. It powers the [`semantic`](#schema-aigateway-model-config-balancer-algorithm) algorithm and any similarity-matching workflow on the Model. Configure storage through [`config.balancer.vectordb`](#schema-aigateway-model-config-balancer-vectordb) by selecting a `strategy`: + +* `redis`: connects to Redis with Vector Similarity Search (VSS), AWS MemoryDB for Redis, or Valkey. {{site.ai_gateway}} auto-detects Valkey from the server name field and uses the Valkey-specific driver. +* `pgvector`: connects to PostgreSQL with the pgvector extension. + +For deeper background on vector storage and similarity matching, see [Embedding-based similarity matching](/ai-gateway/semantic-similarity/). + +### Embeddings + +An embedding model converts request and response text into vector representations for the vector store. Set [`config.balancer.embeddings`](#schema-aigateway-model-config-balancer-embeddings) to reference a Provider and an embedding model name. Supported provider types are `azure`, `bedrock`, `gemini`, and `huggingface`. The same embedding model also powers the `lowest-usage` algorithm when usage is calculated against semantic content. + +## Templating + +The Model resolves runtime values from request data using placeholder substitution. This lets you select the target model dynamically per request, route to per-deployment Azure endpoints, or fan out to multiple providers from a single Model. + +Substitution applies to the [`name`](#schema-aigateway-model-target-models-name) of each target model and to any per-target [`config`](#schema-aigateway-model-target-models-config) option. Three placeholders are available: + +* `$(headers.header_name)`: the value of a request header. +* `$(uri_captures.path_parameter_name)`: the value of a captured URI path parameter. +* `$(query_params.query_parameter_name)`: the value of a query string parameter. + +For end-to-end examples, see [dynamic model selection](/plugins/ai-proxy/examples/sdk-dynamic-model-selection/), [Azure deployment routing](/plugins/ai-proxy/examples/sdk-azure-deployment/), and [proxying multiple models in one Azure instance](/plugins/ai-proxy/examples/sdk-multiple-providers/) on the AI Proxy plugin page. + +## Access control + +A Model's `acls` field controls which identities are allowed to reach the Model. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced at the Service level of the generated primitives. + +For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. + +## Attach Policies + +Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. + +A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. + +You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. + +Not every plugin type is valid as a Model Policy. + +Policies created through the nested on-prem endpoint (`POST /ai/models/{modelId}/policies`) are deleted when the Model is deleted. Policies created independently (for example, at `/v1/ai-gateways/{aiGatewayId}/policies` or `/ai/policies`) are not deleted when the Model is deleted; only the Model's reference is removed. + +For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +### Plugin priority and Policy execution order + +A Policy attached to a Model creates one plugin entry on the Service of the Model's derived primitives. That plugin runs at the [priority](/gateway/entities/plugin/#plugin-priority) of its underlying plugin type, which determines when it executes relative to other plugins on the request. + +The AI Proxy Advanced plugin runs at priority `770` and parses the request body to resolve the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They still gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available yet. + +For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/entities/plugin/) to push their execution later. + +## Set up a Model + +The following example creates an OpenAI Model that exposes both `chat` and `responses` capabilities, routed through a single OpenAI Provider, with token usage logging enabled. + +{% entity_example %} +type: model +data: + display_name: GPT-4o Production + name: gpt-4o-production + type: model + enabled: true + capabilities: + - chat + - responses + formats: + - type: openai + acls: + allow: + - internal-teams + deny: [] + policies: [] + target_models: + - name: gpt-4o + provider: + name: my-openai-account + config: + temperature: 0.7 + max_tokens: 4096 + input_cost: 0.0000025 + output_cost: 0.000010 + config: + logging: + statistics: true + payloads: false + response_streaming: allow + max_request_body_size: 1048576 + model: + name_header: true + balancer: + algorithm: round-robin + retries: 3 + connect_timeout: 60000 + read_timeout: 60000 + write_timeout: 60000 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-policy.md b/app/_ai_gateway_entities/ai-policy.md new file mode 100644 index 0000000000..0d33f55848 --- /dev/null +++ b/app/_ai_gateway_entities/ai-policy.md @@ -0,0 +1,139 @@ +--- +title: AI Policies +content_type: reference +entities: + - ai-policy +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-policy/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: "Policies for {{site.ai_gateway}}." +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayPolicy +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Agent entity + url: /ai-gateway/entities/ai-agent/ + - text: MCP Server entity + url: /ai-gateway/entities/ai-mcp-server/ + - text: Plugin entity + url: /gateway/entities/plugin/ +faqs: + - q: Are Policies shared across multiple entities? + a: | + No. Each Policy is an independent instance. To apply the same plugin + configuration to two Models, create two Policies with matching `config`, + one per Model. + + - q: How is a Policy different from a plugin? + a: | + A Policy is a plugin instance configured through the {{site.ai_gateway}} entity surface + instead of the classic `/plugins` endpoint. The runtime effect is the same: a plugin attached + at the appropriate scope. {{site.ai_gateway}} manages the Policy's lifecycle alongside the + entity it's attached to. + + - q: Can a Policy be scoped to a Consumer or Consumer Group? + a: | + Yes. Add the Policy's `name` or `id` to the Consumer's or Consumer Group's `policies` array. + The plugin runs when the Consumer is identified during a request, or when a member of the + Consumer Group is identified. + + - q: What plugin types can a Policy use? + a: | + Set the plugin name in the Policy's `type` field and provide the plugin's configuration + in the `config` field. Examples include `ai-sanitizer`, `ai-prompt-guard`, + `ai-prompt-decorator`, `ai-rate-limiting-advanced`, and `openid-connect`. The supported set + isn't enumerated on this page, refer to the {{site.ai_gateway}} plugin reference for the full list. + + - q: What happens to a Policy when its parent entity is deleted? + a: | + Standalone Policies referenced from parent entities through a `policies` array are independent + and aren't deleted when a referencing parent is deleted. The reference is simply removed. +--- + +## What is a Policy? + +A Policy is an {{site.ai_gateway}} entity that represents an action, taken by a plugin, that can be attached to an {{site.ai_gateway}} entity. + +Each Policy declares a `type` (which is a plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. {{site.ai_gateway}} attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. + +For the set of plugin types you can use as a Policy `type`, see the [AI plugin reference](/plugins/?category=ai). + +Policies are not shared. Each Policy is one plugin instance. To apply the same configuration to two parent entities, create two Policies. + +Policies are managed through the {{site.ai_gateway}} entity surface: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/policies +{% endtable %} + +## Policy scopes + +A Policy is scoped by where it's referenced from. Each Policy is an independent plugin instance attached at exactly one scope. To apply the same configuration in multiple places, create one Policy per place. + +The available scopes are: + +* **Global**: a Policy that no parent entity references runs for every {{site.ai_gateway}} route on the data plane. Non-AI traffic on the same data plane isn't affected. +* **Model**: referenced from the `policies` array on a [Model entity](/ai-gateway/entities/ai-model/). The plugin runs at the Service of the Model's derived primitives. +* **Agent**: referenced from the `policies` array on an [Agent entity](/ai-gateway/entities/ai-agent/). The plugin runs at the Service of the Agent's derived primitives. +* **MCP Server**: referenced from the `policies` array on an [MCP Server entity](/ai-gateway/entities/ai-mcp-server/). The plugin runs at the Service of the MCP Server's derived primitives. +* **Consumer**: referenced from the `policies` array on a [Consumer entity](/ai-gateway/entities/ai-consumer/). The plugin runs when the Consumer is identified during a request. +* **Consumer Group**: referenced from the `policies` array on a [Consumer Group entity](/ai-gateway/entities/ai-consumer-group/). The plugin runs when a member of the Consumer Group is identified during a request. + +### Creating Policies + +All Policies are created through a single endpoint at `/v1/ai-gateways/{aiGatewayId}/policies`. Scope is set entirely through the reference-array mechanism above: add the Policy's `name` or `id` to the parent entity's `policies` array, or omit the reference for global scope. + +## Lifecycle + +Creating a Policy creates exactly one plugin entry in the underlying runtime. Updating a Policy updates that plugin entry. Deleting a Policy deletes that plugin entry. All scopes support standard CRUD operations through the matching path. + +The `config` field is passed through to the plugin without translation. + +{:.info} +> **Plugin config schemas live with the plugin docs** +> +> {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. +> For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. + +## Set up a global Policy + +The following example creates a global PII sanitizer Policy that runs for every {{site.ai_gateway}} route. + +{% entity_example %} +type: policy +data: + display_name: PII Sanitizer - Global + name: pii-sanitizer-global + type: ai-sanitizer + enabled: true + config: + anonymize: + - phone + - creditcard + stop_on_error: true +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-provider.md b/app/_ai_gateway_entities/ai-provider.md new file mode 100644 index 0000000000..584e639fae --- /dev/null +++ b/app/_ai_gateway_entities/ai-provider.md @@ -0,0 +1,153 @@ +--- +title: AI Providers +content_type: reference +entities: + - ai-provider +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-provider/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: AI provider credentials and configuration used by {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayProvider +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} providers" + url: /ai-gateway/ai-providers/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ +faqs: + - q: What happens when I update a Provider's credentials? + a: | + {{site.ai_gateway}} propagates the credential change to every Model that references the + Provider (by `name` or `id`). The next request through any of those Models uses the updated + credentials. + + - q: How does a Model reference a Provider? + a: | + Set `target_models[].provider` on the Model to the Provider's `name` or `id`. + + - q: Do Providers generate any runtime primitives on their own? + a: | + No. A Provider entity is a write-time template. Credentials and configuration only enter + the runtime when a Model references the Provider; at that point, the Provider's values are + materialized into the underlying primitives generated for the Model. + + - q: How do I configure providers in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure provider credentials and endpoints using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. +--- + +## What is a Provider? + +A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and its credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. + +Models reference a Provider through `target_models[].provider` to route their `target_models` to that upstream. The reference can use either the Provider `name` or `id`. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. + +### Relationship to Models + +A Provider stores how to reach and authenticate to an upstream LLM service. A [Model](/ai-gateway/entities/ai-model/) decides which upstream provider model to call and how requests are load-balanced, formatted, and logged. The relationship is many-to-many at the target level: a single Provider can back many Models (for example, an `openai` Provider used by both a chat Model and an embeddings Model), and a single Model can route across multiple Providers through its `target_models` array (for example, a Model with one OpenAI target and one Anthropic target for fallback). + +Providers don't expose model endpoints on their own. They become routable only through a Model that references them. + +Providers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/providers +{% endtable %} + +## Supported providers + +{{site.ai_gateway}} supports the following upstream providers. The Provider's [`type`](#schema-aigateway-provider-type) field selects one of these connections. Per-provider pages document supported capabilities, configuration requirements, and provider-specific limitations. + +{% html_tag type="div" css_classes="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3" %} +{% icon_card icon="openai.svg" title="OpenAI" cta_url="/ai-gateway/ai-providers/openai/" %} +{% icon_card icon="azure.svg" title="Azure OpenAI" cta_url="/ai-gateway/ai-providers/azure/" %} +{% icon_card icon="bedrock.svg" title="Amazon Bedrock" cta_url="/ai-gateway/ai-providers/bedrock/" %} +{% icon_card icon="anthropic.svg" title="Anthropic" cta_url="/ai-gateway/ai-providers/anthropic/" %} +{% icon_card icon="gemini.svg" title="Gemini" cta_url="/ai-gateway/ai-providers/gemini/" %} +{% icon_card icon="vertex.svg" title="Vertex AI" cta_url="/ai-gateway/ai-providers/vertex/" %} +{% icon_card icon="cohere.svg" title="Cohere" cta_url="/ai-gateway/ai-providers/cohere/" %} +{% icon_card icon="mistral.svg" title="Mistral" cta_url="/ai-gateway/ai-providers/mistral/" %} +{% icon_card icon="huggingface.svg" title="Hugging Face" cta_url="/ai-gateway/ai-providers/huggingface/" %} +{% icon_card icon="metaai.svg" title="Llama" cta_url="/ai-gateway/ai-providers/llama/" %} +{% icon_card icon="xai.svg" title="xAI" cta_url="/ai-gateway/ai-providers/xai/" %} +{% icon_card icon="dashscope.svg" title="Alibaba Cloud DashScope" cta_url="/ai-gateway/ai-providers/dashscope/" %} +{% icon_card icon="cerebras.svg" title="Cerebras" cta_url="/ai-gateway/ai-providers/cerebras/" %} +{% icon_card icon="deepseek.svg" title="DeepSeek" cta_url="/ai-gateway/ai-providers/deepseek/" %} +{% icon_card icon="ollama.svg" title="Ollama" cta_url="/ai-gateway/ai-providers/ollama/" %} +{% icon_card icon="databricks.svg" title="Databricks" cta_url="/ai-gateway/ai-providers/databricks/" %} +{% icon_card icon="vllm.svg" title="vLLM" cta_url="/ai-gateway/ai-providers/vllm/" %} +{% endhtml_tag %} + +## Authentication + +The `config.auth` object declares how {{site.ai_gateway}} authenticates to the upstream provider. The shape of `auth` depends on the Provider's `type`: + +* **`basic`**: header- or query-parameter-based auth. Used by most provider types. +* **`aws`**: IAM access-key and assume-role auth. Used by `bedrock`. +* **`azure`**: Microsoft Entra ID or managed-identity auth. Used by `azure`. +* **`gcp`**: Google service-account auth. Used by `gemini`. + +`bedrock`, `azure`, and `gemini` can also fall back to `basic` auth. See the schema below for field-level details, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. + +{:.warning} +> Don't commit credential values to source control. Use a secret-management system to inject +> auth values at deployment time, and treat any value checked into a configuration file as +> compromised. + +## Provider references + +Models reference a Provider through the `target_models[].provider` field. The same reference shape is used elsewhere in the schema (such as the embeddings model under a Model's load balancer config). Provider references in {{site.ai_gateway}} entities accept either the Provider `name` or `id`. + +If references use `name`, the `name` field acts as a stable human-readable handle. Renaming a Provider (changing `name`) breaks any Model references that point at the old name. + +## Lifecycle + +Creating a Provider stores the entity but doesn't generate any runtime primitives. Provider credentials enter the runtime only when a Model references the Provider. At that point, the credentials are materialized into the underlying primitives of the Model. + +Updating a Provider re-materializes credentials into every Model that references it. The change takes effect on the next request through any referencing Model. + +## Set up a Provider + +The following example creates an OpenAI Provider that authenticates with a single bearer-token header. A Model can then route to this Provider by setting `target_models[].provider` to `my-openai-account` (or the Provider `id`). + +{% entity_example %} +type: provider +data: + display_name: OpenAI Production + name: my-openai-account + type: openai + config: + auth: + type: basic + headers: + - name: Authorization + value: Bearer +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-vault.md b/app/_ai_gateway_entities/ai-vault.md new file mode 100644 index 0000000000..2f15006b56 --- /dev/null +++ b/app/_ai_gateway_entities/ai-vault.md @@ -0,0 +1,106 @@ +--- +title: AI Vaults +content_type: reference +entities: + - ai-vault +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-vault/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Vaults for storing and referencing secrets used by {{site.ai_gateway}} entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayVault +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: "{{site.base_gateway}} Vault entity" + url: /gateway/entities/vault/ +faqs: + - q: How is an {{site.ai_gateway}} Vault different from a {{site.base_gateway}} Vault? + a: | + The runtime entity is the same secret-management abstraction. The {{site.ai_gateway}} surface + manages Vaults through the AI entity convention (`display_name`, `name`, `description`, + `labels`) and exposes them at the `/ai/vaults` API alongside the other AI entities. + + - q: Which secret backends are supported? + a: | + The `type` field selects the backend: `konnect`, `env`, `aws`, `gcp`, `azure`, `conjur`, or `hcv`. + Each type carries its own `config` shape. HashiCorp Vault (`hcv`) further selects an + `auth_method` from `token`, `cert`, `jwt`, `approle`, `kubernetes`, `gcp_iam`, `gcp_gce`, + `aws_ec2`, `aws_iam`, or `azure`. + + - q: How are Vault secrets referenced from other {{site.ai_gateway}} entities? + a: | + Sensitive fields on Provider, Model, MCP Server, and other entities are annotated as + referenceable. Set those fields to a vault reference string (for example, a `{vault://...}` + placeholder) instead of a literal value. The Vault `name` is the lookup key. + + - q: What does `name` control? + a: | + `name` is a user-defined unique identifier and the stable handle used to look up the Vault + configuration when other entities reference secrets. Renaming a Vault breaks any reference + pointing at the old value. +--- + +## What is a Vault? + +A Vault is a first-class {{site.ai_gateway}} entity that registers a secret-management backend so that other entities (Providers, Models, MCP Servers) can reference secrets instead of embedding values directly. + +A Vault entity stores the connection configuration and credentials needed to reach the backend. {{site.ai_gateway}} resolves vault references against the registered Vaults at request time. + +Vaults can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/vaults +{% endtable %} + +## Backends + +Each Vault selects one of the supported secret backends: {{site.konnect_short_name}} Config Store, environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. + +HashiCorp Vault additionally supports several authentication methods (token, AppRole, JWT, Kubernetes, AWS, GCP, Azure, and others). See the [{{site.base_gateway}} Vault entity](/gateway/entities/vault/) for backend-specific guidance that applies to both deployment modes. + +## Caching + +Cloud-backed vault types (`aws`, `gcp`, `azure`, `conjur`, `hcv`) cache resolved secrets so that {{site.ai_gateway}} doesn't hit the backend on every reference. Cache duration, negative-lookup caching, and how long expired secrets stay in use during backend outages are all tunable. The `env` type doesn't cache because environment-variable lookups don't hit the network. + +## Set up a Vault + +The following example registers an environment-variable vault that resolves references against process environment variables prefixed with `KONG_`. + +{% entity_example %} +type: vault +data: + display_name: Production Env Vault + name: prod-env-vault + description: Vault for production secrets sourced from environment variables. + type: env + config: + prefix: KONG_ +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_api/konnect/ai-gateway/_index.md b/app/_api/konnect/ai-gateway/_index.md new file mode 100644 index 0000000000..a04c2cee46 --- /dev/null +++ b/app/_api/konnect/ai-gateway/_index.md @@ -0,0 +1,3 @@ +--- +konnect_product_id: 38df0a35-37de-48fa-ac9d-60595d26eddf +--- \ No newline at end of file diff --git a/app/_assets/javascripts/apps/EntitySchema.vue b/app/_assets/javascripts/apps/EntitySchema.vue index d82db15813..9c3839efb2 100644 --- a/app/_assets/javascripts/apps/EntitySchema.vue +++ b/app/_assets/javascripts/apps/EntitySchema.vue @@ -15,6 +15,7 @@ diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index ae36a3af09..ab68f2b003 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -32,7 +32,9 @@ formats: admin-api: label: 'Admin API' base_url: 'http://localhost:8001' + ai_gateway_base_url: 'http://localhost:8001' endpoints: + # core entities consumer: '/consumers/' consumer_group: '/consumer_groups/' route: '/routes/' @@ -53,6 +55,11 @@ formats: keyring: '/keyring/' event_hook: '/event-hooks/' partial: '/partials/' + ai_endpoints: + # AI entities (/ai/* on on-prem AI Gateway) + consumer: '/ai-consumers/' + consumer_group: '/ai-consumer-groups/' + vault: '/ai-vaults/' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' @@ -61,11 +68,24 @@ formats: global: '/plugins/' variables: <<: *variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' + ai_model: + placeholder: 'aiModelId' + description: 'The `id` of the AI Model.' + ai_agent: + placeholder: 'aiAgentId' + description: 'The `id` of the AI Agent.' + ai_mcp_server: + placeholder: 'aiMCPServerId' + description: 'The `id` of the AI MCP Server.' konnect-api: label: 'Konnect API' base_url: 'https://{region}.api.konghq.com/v2/control-planes/{control_plane}/core-entities' event_gateway_base_url: 'https://{region}.api.konghq.com/v1/event-gateways/{event_gateway}' + ai_gateway_base_url: 'https://{region}.api.konghq.com/v1/ai-gateways/{ai_gateway}' endpoints: consumer: '/consumers/' consumer_group: '/consumer_groups/' @@ -85,6 +105,15 @@ formats: listener: '/listeners' schema_registry: '/schema-registries' static_key: '/static-keys' + ai_endpoints: + model: '/models' + policy: '/policies' + agent: '/agents' + mcp_server: '/mcp-servers' + provider: '/providers' + consumer: '/consumers/' + consumer_group: '/consumer-groups/' + vault: '/vaults/' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' @@ -127,7 +156,11 @@ formats: event_gateway_listener: placeholder: 'eventGatewayListenerId' description: The `id` of the Event Gateway Listener. - + ai_gateway_variables: + <<: *konnect_variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' kic: label: 'KIC' @@ -140,6 +173,13 @@ formats: ui: label: 'UI' entities: + - ai-provider + - ai-model + - ai-agent + - ai-mcp-server + - ai-policy + - ai-consumer + - ai-consumer-group - admin - ca_certificate - certificate @@ -176,4 +216,4 @@ phases: produce: label: 'Produce Phase' cluster: - label: 'Cluster Phase' \ No newline at end of file + label: 'Cluster Phase' diff --git a/app/_data/konnect_oas_data.json b/app/_data/konnect_oas_data.json index 0d492c6990..586dd8a9aa 100644 --- a/app/_data/konnect_oas_data.json +++ b/app/_data/konnect_oas_data.json @@ -1,4 +1,25 @@ [ + { + "id": "38df0a35-37de-48fa-ac9d-60595d26eddf", + "title": "New AI Gateway", + "latestVersion": { + "name": "v2", + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd" + }, + "description": "New AI Gateway API.", + "documentCount": 0, + "versionCount": 1, + "versions": [ + { + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd", + "created_at": "2024-02-21T17:28:17.757Z", + "updated_at": "2024-10-17T19:13:18.223Z", + "name": "v2", + "deprecated": false, + "registration_configs": [] + } + ] + }, { "id": "ccb264be-1963-49a4-b6e8-bc7c98a6e4c2", "title": "Application Auth Strategies", diff --git a/app/_data/products/ai-gateway.yml b/app/_data/products/ai-gateway.yml index e40c9b2a8b..fc1a92a474 100644 --- a/app/_data/products/ai-gateway.yml +++ b/app/_data/products/ai-gateway.yml @@ -1,2 +1,8 @@ name: AI Gateway -icon: /_assets/icons/products/ai-gateway.svg \ No newline at end of file +icon: /_assets/icons/products/ai-gateway.svg + +releases: + - release: "2.0" + version: "2.0.0" + name: "v2" + latest: true \ No newline at end of file diff --git a/app/_includes/components/entity_example/format/admin-api.md b/app/_includes/components/entity_example/format/admin-api.md index 1cd81c3cfe..570496cf7a 100644 --- a/app/_includes/components/entity_example/format/admin-api.md +++ b/app/_includes/components/entity_example/format/admin-api.md @@ -1,9 +1,13 @@ {% if include.render_context %} {% case include.presenter.entity_type %} {% when 'consumer' %} -To create a Consumer, call the [Admin API's `/consumers` endpoint](/api/gateway/admin-ee/#/operations/create-consumer). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer, call the [Admin API's `/ai-consumers` endpoint](/api/gateway/admin-ee/#/operations/create-ai-consumer). {% else %} +To create a Consumer, call the [Admin API's `/consumers` endpoint](/api/gateway/admin-ee/#/operations/create-consumer). {% endif %} {% when 'consumer_group' %} -To create a Consumer Group, call the [Admin API's `/consumer_groups` endpoint](/api/gateway/admin-ee/#/operations/create-consumer_group). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer Group, call the [Admin API's `/ai-consumer-groups` endpoint](/api/gateway/admin-ee/#/operations/create-ai-consumer-group).{% else %} +To create a Consumer Group, call the [Admin API's `/consumer_groups` endpoint](/api/gateway/admin-ee/#/operations/create-consumer_group).{% endif %} {% when 'route' %} To create a Route, call the [Admin API’s `/routes` endpoint](/api/gateway/admin-ee/#/operations/create-route). @@ -30,7 +34,9 @@ To create a CA Certificate, call the [Admin API's `/ca_certificates` endpoint](/ {% when 'certificate' %} To create a Certificate, call the [Admin API's `/certificates` endpoint](/api/gateway/admin-ee/#/operations/create-certificate). {% when 'vault' %} -To create a Vault entity, call the [Admin API's `/vaults` endpoint](/api/gateway/admin-ee/#/operations/create-vault). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Vault entity, call the [Admin API's `/ai-vaults` endpoint](/api/gateway/admin-ee/#/operations/create-ai-vault). {% else %} +To create a Vault entity, call the [Admin API's `/vaults` endpoint](/api/gateway/admin-ee/#/operations/create-vault). {% endif %} {% when 'partial' %} To create a Partial, call the [Admin API's `/partials` endpoint](/api/gateway/admin-ee/#/operations/create-partial). {% when 'key' %} diff --git a/app/_includes/components/entity_example/format/deck.md b/app/_includes/components/entity_example/format/deck.md index c7e00fb335..1f1b026082 100644 --- a/app/_includes/components/entity_example/format/deck.md +++ b/app/_includes/components/entity_example/format/deck.md @@ -1,7 +1,7 @@ {% if include.render_context %} {% case include.presenter.entity_type %} -{% when 'consumer' %} -The following creates a new Consumer called **{{ include.presenter.data['username'] }}**: +{% when 'consumer' %}{% assign name = include.presenter.data['name'] | default: include.presenter.data['username'] %} +The following creates a new Consumer called **{{ name }}**: {% when 'consumer_group' %} The following creates a new Consumer Group called **{{ include.presenter.data['name'] }}**: {% when 'route' %} diff --git a/app/_includes/components/entity_example/format/konnect-api.md b/app/_includes/components/entity_example/format/konnect-api.md index da568f8207..5cdbf5530b 100644 --- a/app/_includes/components/entity_example/format/konnect-api.md +++ b/app/_includes/components/entity_example/format/konnect-api.md @@ -1,8 +1,12 @@ {% case include.presenter.entity_type %} {% when 'consumer' %} -To create a Consumer, call the Konnect [control plane config API's `/consumers` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer, call the Konnect [{{site.ai_gateway}} API's `/consumers` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-gateway-consumer).{% else %} +To create a Consumer, call the Konnect [control plane config API's `/consumers` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer).{% endif %} {% when 'consumer_group' %} -To create a Consumer Group, call the Konnect [control plane config API's `/consumer_groups` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer_group). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer Group, call the Konnect [{{site.ai_gateway}} API's `/consumer-groups` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-consumer-group).{% else %} +To create a Consumer Group, call the Konnect [control plane config API's `/consumer_groups` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer_group).{% endif %} {% when 'route' %} To create a Route, call the Konnect [control plane config API's `/routes` endpoint](/api/konnect/control-planes-config/#/operations/create-route). {% when 'service' %} @@ -18,7 +22,9 @@ To create a CA Certificate, call the Konnect [control plane config API's `/ca-ce {% when 'certificate' %} To create a Certificate, call the Konnect [control plane config API's `/certificates` endpoint](/api/konnect/control-planes-config/#/operations/create-certificate). {% when 'vault' %} -To create a Vault entity, call the Konnect [control plane config API's `/vaults` endpoint](/api/konnect/control-planes-config/#/operations/create-vault). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Vault entity, call the Konnect [{{site.ai_gateway}} API's `/vaults` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-gateway-vault). {% else %} +To create a Vault entity, call the Konnect [control plane config API's `/vaults` endpoint](/api/konnect/control-planes-config/#/operations/create-vault). {% endif %} {% when 'key' %} To create a Key, call the Konnect [control plane config API's `/keys` endpoint](/api/konnect/control-planes-config/#/operations/create-key). {% when 'key-set' %} diff --git a/app/_includes/components/entity_example/format/ui_ai.md b/app/_includes/components/entity_example/format/ui_ai.md new file mode 100644 index 0000000000..ab70cb72fc --- /dev/null +++ b/app/_includes/components/entity_example/format/ui_ai.md @@ -0,0 +1,83 @@ +{% if page.layout == 'gateway_entity' %} +{% case include.presenter.entity_type %} +{% when 'provider' %} +The following creates a new AI Provider. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Providers**. +1. Click **New Provider**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a provider (for example: `{{ include.presenter.data['type'] }}`). +1. Configure authentication and connection settings for the selected provider type. +1. Click **Create**. +{% when 'policy' %} +The following creates a new AI Policy. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Policies**. +1. Click **New Policy**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a policy **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure the policy `config` fields. +1. Click **Create**. +{% when 'consumer' %} +The following creates a new AI Consumer. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Consumers**. +1. Click **New Consumer**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select an authentication **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure credentials and optional Consumer Group or Policy references. +1. Click **Create**. +{% when 'consumer_group' %} +The following creates a new AI Consumer Group. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Credentials**. +1. Select the **Groups** tab. +1. Click **New Group**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Optionally add policy references for group-level enforcement. +1. Click **Create**. +{% when 'model' %} +The following creates a new AI Model. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Models**. +1. Click **New Model**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure at least one target model and select the Provider reference. +1. Optionally add policies, ACLs, labels, and fallback/load-balancing settings. +1. Click **Create**. +{% when 'agent' %} +The following creates a new AI Agent. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Agents**. +1. Click **New Agent**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select an Agent **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Enter the upstream Agent **URL** (for example: `{{ include.presenter.data['config']['url'] }}`). +1. Optionally configure logging, max payload size, ACLs, and Policy references. +1. Click **Create**. +{% when 'mcp_server' %} +The following creates a new AI MCP Server. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **MCP Servers**. +1. Click **New MCP Server**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure endpoint/auth settings and optional policies. +1. Click **Create**. +{% else %} +UI instructions are not yet available for this {{site.ai_gateway}} entity type. +{% endcase %} +{% endif %} diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml new file mode 100644 index 0000000000..313199bbb6 --- /dev/null +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -0,0 +1,109 @@ +metadata: + title: "{{site.ai_gateway}} entities" + content_type: landing_page + description: This page lists the entities that make up {{site.ai_gateway}}. + breadcrumbs: + - /ai-gateway/ + products: + - ai-gateway + works_on: + - on-prem + - konnect + +rows: + - header: + type: h1 + text: "{{site.ai_gateway}} entities" + sub_text: "Entities are the components and objects that make up {{site.ai_gateway}}." + + - header: + type: h2 + text: "Core entities" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: "{{site.ai_gateway}}" + description: The top-level entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. + cta: + text: "{{site.ai_gateway}} entity" + url: /ai-gateway/entities/ai-gateway/ + - blocks: + - type: card + config: + title: "{{site.ai_gateway}} Provider" + description: Stores upstream provider credentials and connection configuration. Providers are reusable and are not model endpoints. + cta: + text: AI Provider entity + url: /ai-gateway/entities/ai-provider/ + - blocks: + - type: card + config: + title: Model + description: Defines a model endpoint and capability configuration used for model selection and policy targeting. + cta: + text: Model entity + url: /ai-gateway/entities/ai-model/ + - blocks: + - type: card + config: + title: AI Agent + description: An A2A or HTTP agent exposed through the A2A proxy flow. Independent of Model. + cta: + text: AI Agent entity + url: /ai-gateway/entities/ai-agent/ + - blocks: + - type: card + config: + title: AI MCP Server + description: An MCP server in passthrough, listener, or conversion-listener mode. Mode is immutable after creation. + cta: + text: AI MCP Server entity + url: /ai-gateway/entities/ai-mcp-server/ + - blocks: + - type: card + config: + title: AI Policy + description: An AI Gateway plugin instance scoped globally or to a specific AI entity. Policy instances are independent. + cta: + text: AI Policy entity + url: /ai-gateway/entities/ai-policy/ + - blocks: + - type: card + config: + title: AI Consumer + description: A thin wrapper around the existing Consumer entity. + cta: + text: AI Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - blocks: + - type: card + config: + title: AI Consumer Group + description: A thin wrapper around the existing Consumer Group entity. + cta: + text: AI Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + + - header: + type: h2 + text: "Security" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: AI Vault + description: Store and reference secrets used by AI Gateway entities and plugins. + cta: + text: AI Vault entity + url: /ai-gateway/entities/ai-vault/ + - blocks: + - type: card + config: + title: AI Data Plane Certificate + description: Public client certificates that authorize data planes to establish mTLS connections to an AI Gateway. + cta: + text: AI Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ diff --git a/app/_plugins/drops/entity_example/presenters/admin-api.rb b/app/_plugins/drops/entity_example/presenters/admin-api.rb index 9eebea6126..4c950ea1ba 100644 --- a/app/_plugins/drops/entity_example/presenters/admin-api.rb +++ b/app/_plugins/drops/entity_example/presenters/admin-api.rb @@ -42,14 +42,40 @@ def data_validate_on_prem config: { url:, headers:, body: data, method: 'POST', status_code: 201 } }) end + def product + @product ||= @example_drop.product + end + private def build_url [ - formats['admin-api']['base_url'], - formats['admin-api']['endpoints'][entity_type] + base_url, + endpoint ].join end + + def base_url + @base_url ||= case @example_drop.product + when 'gateway' + formats['admin-api']['base_url'] + when 'ai-gateway' + formats['admin-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end + + def endpoint + @endpoint ||= case @example_drop.product + when 'gateway' + formats['admin-api']['endpoints'][entity_type] + when 'ai-gateway' + formats['admin-api']['ai_endpoints'][entity_type] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end end class Plugin < Base @@ -72,7 +98,7 @@ def missing_variables def build_url [ - formats['admin-api']['base_url'], + base_url, formats['admin-api']['plugin_endpoints'][@example_drop.target.key] ].join end diff --git a/app/_plugins/drops/entity_example/presenters/konnect-api.rb b/app/_plugins/drops/entity_example/presenters/konnect-api.rb index a890099167..0efa5a4f3f 100644 --- a/app/_plugins/drops/entity_example/presenters/konnect-api.rb +++ b/app/_plugins/drops/entity_example/presenters/konnect-api.rb @@ -44,25 +44,46 @@ def product def default_variables @default_variables ||= - if @example_drop.product == 'gateway' + case @example_drop.product + when 'gateway' formats['konnect-api']['variables'] - else + when 'event-gateway' formats['konnect-api']['event_gateway_variables'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_variables'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end def build_url [ base_url, - formats['konnect-api']['endpoints'][entity_type] + endpoint ].join end def base_url - @base_url ||= if @example_drop.product == 'gateway' + @base_url ||= case @example_drop.product + when 'gateway' formats['konnect-api']['base_url'] - else + when 'event-gateway' formats['konnect-api']['event_gateway_base_url'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end + + def endpoint + @endpoint ||= case @example_drop.product + when 'gateway', 'event-gateway' + formats['konnect-api']['endpoints'][entity_type] + when 'ai-gateway' + formats['konnect-api']['ai_endpoints'][entity_type] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end end diff --git a/app/_plugins/drops/entity_example/presenters/ui.rb b/app/_plugins/drops/entity_example/presenters/ui.rb index 84b80506a3..62851b692f 100644 --- a/app/_plugins/drops/entity_example/presenters/ui.rb +++ b/app/_plugins/drops/entity_example/presenters/ui.rb @@ -13,7 +13,11 @@ def data end def template_file - '/components/entity_example/format/ui.md' + if @example_drop.product == 'ai-gateway' + '/components/entity_example/format/ui_ai.md' + else + '/components/entity_example/format/ui.md' + end end end diff --git a/app/_plugins/drops/entity_schema.rb b/app/_plugins/drops/entity_schema.rb index fd37919bc2..62b1585efd 100644 --- a/app/_plugins/drops/entity_schema.rb +++ b/app/_plugins/drops/entity_schema.rb @@ -57,20 +57,12 @@ def api_file @api_file ||= [ File.expand_path('../', @site.source), 'api-specs', - *product_path, + @schema.fetch('api'), release_path, 'openapi.yaml' ].join('/') end - def product_path - if @release.ee_version - %w[gateway admin-ee] - else - %w[konnect event-gateway] - end - end - def release_path if @release.ee_version @release.number diff --git a/jekyll.yml b/jekyll.yml index a8cedcfaaa..4b7ab29734 100644 --- a/jekyll.yml +++ b/jekyll.yml @@ -34,6 +34,8 @@ include: # Collections collections: + ai_gateway_entities: + output: true gateway_entities: output: true how-tos: @@ -54,6 +56,16 @@ defaults: permalink: "/how-to/:path/" breadcrumbs: - "/how-to/" + - scope: + path: "_ai_gateway_entities" + type: "ai_gateway_entities" + values: + layout: "gateway_entity" + permalink: "/ai-gateway/entities/:path/" + products: + - ai-gateway + breadcrumbs: + - "/ai-gateway/" - scope: path: "_gateway_entities" type: "gateway_entities" diff --git a/vite.config.ts b/vite.config.ts index 3e7741a319..a73582a2b1 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -63,12 +63,16 @@ export default ({ command, mode }) => { server: { cors: { origin: 'http://localhost:8888' }, proxy: { - '^/api': { + '/vite-dev/api': { changeOrigin: true, target: portalApiUrl, configure: (proxy, options) => { mutateCookieAttributes(proxy) setHostHeader(proxy) + }, + rewrite: (path) => { + return path + .replace(/^\/vite-dev\/api/, '/api/'); } } } From ee5814bbc25118e699d5567fae8dc81ab3a7db66 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 06:34:16 +0200 Subject: [PATCH 11/20] update agent and mcp server entities --- app/_ai_gateway_entities/ai-agent.md | 6 ++- app/_ai_gateway_entities/ai-mcp-server.md | 46 ++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/app/_ai_gateway_entities/ai-agent.md b/app/_ai_gateway_entities/ai-agent.md index 9ffd7b9cb8..0348b626ad 100644 --- a/app/_ai_gateway_entities/ai-agent.md +++ b/app/_ai_gateway_entities/ai-agent.md @@ -107,6 +107,10 @@ When an Agent has type `a2a`, proxied traffic is processed in four phases: Non-A2A traffic, and traffic to `http` Agents, is proxied without these steps. +## Routing configuration + +Beyond the `url` field, Agents can define HTTP routing rules through `config.route`. This allows you to match requests by method, path, host, and other HTTP patterns. Use `route` when you need fine-grained control over which traffic reaches the Agent. If only a URL is needed, the `url` field is simpler. + {% mermaid %} sequenceDiagram @@ -301,7 +305,7 @@ data: logging: statistics: true payloads: false - max_payload_size: 524288 + max_payload_size: 1048576 {% endentity_example %} ## Schema diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md index 6257e9156c..3d9de073c0 100644 --- a/app/_ai_gateway_entities/ai-mcp-server.md +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -39,13 +39,14 @@ faqs: The MCP runtime behind an MCP Server entity speaks MCP protocol version `2025-06-18`. Upstream MCP servers may run `2025-06-18` or `2025-11-25`. Versions from 2024 are not supported. - - q: What's the difference between the four server types? + - q: What's the difference between the server types? a: | `passthrough-listener` proxies MCP traffic to an upstream MCP server without converting tools. `conversion-listener` converts a RESTful API into MCP tools and accepts MCP requests on the same Route. `conversion-only` defines a tool library that other MCP Servers reference by tag but doesn't accept incoming MCP traffic itself. `listener` aggregates tools from one or more - `conversion-only` MCP Servers into a single MCP endpoint. + `conversion-only` MCP Servers into a single MCP endpoint. `upstream-server` registers a real + MCP server into an aggregation pool, dynamically fetching its tools for a `listener` to aggregate. - q: Can the same Consumer's identity gate access to specific tools? a: | @@ -134,7 +135,7 @@ rows: ## Server modes -The `type` field selects one of four modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. +The `type` field selects one of five modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. {% table %} @@ -174,13 +175,48 @@ rows: - mode: "`listener`" description: | Similar to `conversion-listener`, but instead of defining its own tools, it binds tools - from one or more `conversion-only` MCP Servers through `config.server.tag`. + from one or more `conversion-only` or `upstream-server` MCP Servers through `config.server.tag`. usecase: | - A single MCP endpoint that aggregates tools from multiple `conversion-only` MCP Servers. + A single MCP endpoint that aggregates tools from multiple `conversion-only` or `upstream-server` MCP Servers. Typical in multi-service or multi-team environments that expose a unified MCP interface. + - mode: "`upstream-server`" + description: | + Registers a real MCP server into an aggregation pool. Dynamically fetches the upstream's + tool list and caches it. Works together with a `listener` MCP Server that uses shared tags + to aggregate tools. Supports optional OAuth2 authentication to fetch tool lists from the upstream. + usecase: | + Expose an existing upstream MCP server's tools alongside others through a single `listener` + endpoint. The listener aggregates all tagged upstreams, so adding a new upstream is just + deploying a new `upstream-server` with matching tags. {% endtable %} +## Tool aggregation with upstream-server + +When using `listener` with `upstream-server` MCP Servers, the runtime aggregates tools from all upstreams that share the listener's tag. This pattern centralizes tool discovery and management for agents while keeping upstream services decoupled. + +### How aggregation works + +1. **Tags connect upstreams to listeners**: Set `config.server.tag` on the listener (e.g., `my-tools`). Set the same tag on every `upstream-server` MCP Server you want included. Any upstream with matching tags gets pulled into the aggregation. + +2. **Tool discovery**: When an MCP client calls `tools/list`, the listener fetches tool lists from every tagged upstream. If an upstream requires authentication, configure `config.server.tools_list_auth` with OAuth2 credentials so the listener can fetch its tools. + +3. **Tool caching**: Each `upstream-server` caches its tool list for the duration specified by `config.tools_cache_ttl_seconds`. Set to `0` to fetch fresh on every client request. + +4. **Tool name disambiguation**: If two upstreams expose tools with the same name, the listener prepends the service name to avoid collisions (e.g., `weather-service/get-forecast`). Disable this with `config.server.preserve_upstream_tool_names: true` if you're sure names won't collide. + +5. **Tool invocation**: When a client calls a tool, the listener routes the request to whichever upstream registered it. From the client's perspective, it's one call to one URL. + +### Upstream authentication + +By default, the listener connects to upstreams without credentials. If an upstream MCP server requires authentication: + +- Set `config.server.tools_list_auth` on the `upstream-server` plugin with OAuth2 client-credentials configuration +- Kong fetches a token from your identity provider when first needed, caches it, and refreshes it when it expires +- The token is used only when fetching the upstream's tool list; it's separate from agent authentication + +This allows different upstreams to use different credentials, managed centrally by Kong. + ## How MCP traffic flows For `conversion-listener`, `conversion-only`, and `listener` modes, the runtime converts MCP requests into HTTP calls and wraps the responses back in MCP format: From 083b8350bf3b3152cb7de83f53a2d1e689de9202 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 06:52:52 +0200 Subject: [PATCH 12/20] update mcp server --- app/_ai_gateway_entities/ai-mcp-server.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md index 3d9de073c0..6df3018546 100644 --- a/app/_ai_gateway_entities/ai-mcp-server.md +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -214,8 +214,11 @@ By default, the listener connects to upstreams without credentials. If an upstre - Set `config.server.tools_list_auth` on the `upstream-server` plugin with OAuth2 client-credentials configuration - Kong fetches a token from your identity provider when first needed, caches it, and refreshes it when it expires - The token is used only when fetching the upstream's tool list; it's separate from agent authentication +- Different upstreams can use different credentials, managed centrally by Kong -This allows different upstreams to use different credentials, managed centrally by Kong. +### Header forwarding + +When the listener routes tool calls to an upstream, it can forward request headers from the original MCP client. Set `config.server.forward_client_headers: true` on the `listener` or `upstream-server` to pass through headers like authentication or context information. This allows upstreams to see the client's original request context. ## How MCP traffic flows From 8e1ceae77cd576b0345c73d1d0c907475c077c6f Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 10:16:47 +0200 Subject: [PATCH 13/20] Remove on-prem mentions --- app/_ai_gateway_entities/ai-gateway.md | 6 +++--- app/_ai_gateway_entities/ai-model.md | 6 +++--- app/_ai_gateway_entities/ai-vault.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index ae0e57d47d..b2238888cb 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -60,9 +60,9 @@ faqs: - q: Is the {{site.ai_gateway}} entity available on-prem? a: | - No. The {{site.ai_gateway}} entity is a {{site.konnect_short_name}} concept. On-prem deployments - manage the same child entities (Models, Providers, Policies, and so on) directly through - the Admin API, without a parent `ai-gateways/{id}` container. + No. {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. --- ## What is an {{site.ai_gateway}}? diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md index 039e28e240..043c141304 100644 --- a/app/_ai_gateway_entities/ai-model.md +++ b/app/_ai_gateway_entities/ai-model.md @@ -38,7 +38,7 @@ related_resources: faqs: - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? a: | - A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API, UI, or decK. {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. You don't configure the underlying plugin directly. @@ -350,13 +350,13 @@ For per-request authentication and identity, configure the appropriate authentic Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. -A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. +A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. Not every plugin type is valid as a Model Policy. -Policies created through the nested on-prem endpoint (`POST /ai/models/{modelId}/policies`) are deleted when the Model is deleted. Policies created independently (for example, at `/v1/ai-gateways/{aiGatewayId}/policies` or `/ai/policies`) are not deleted when the Model is deleted; only the Model's reference is removed. +Policies attached to a Model are not deleted when the Model is deleted; only the Model's reference is removed. For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. diff --git a/app/_ai_gateway_entities/ai-vault.md b/app/_ai_gateway_entities/ai-vault.md index 2f15006b56..04169c1946 100644 --- a/app/_ai_gateway_entities/ai-vault.md +++ b/app/_ai_gateway_entities/ai-vault.md @@ -35,7 +35,7 @@ faqs: a: | The runtime entity is the same secret-management abstraction. The {{site.ai_gateway}} surface manages Vaults through the AI entity convention (`display_name`, `name`, `description`, - `labels`) and exposes them at the `/ai/vaults` API alongside the other AI entities. + `labels`) and exposes them through the {{site.konnect_short_name}} API alongside the other AI entities. - q: Which secret backends are supported? a: | From 306e12092d2e66efaaac005bea61c4ca71ad661c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 07:59:47 +0200 Subject: [PATCH 14/20] feat(ai-gateway): Update load balancing capabilities documentation for AI Gateway 2.0 (#5308) --- app/_ai_gateway_entities/ai-model.md | 90 ++++++++---------- app/_data/entity_examples/config.yml | 5 + app/ai-gateway/load-balancing.md | 135 ++++++++++++++++----------- 3 files changed, 125 insertions(+), 105 deletions(-) diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md index 043c141304..ccdf4d1dff 100644 --- a/app/_ai_gateway_entities/ai-model.md +++ b/app/_ai_gateway_entities/ai-model.md @@ -18,14 +18,13 @@ schema: works_on: - konnect tools: - - deck - konnect-api related_resources: - text: About {{site.ai_gateway}} url: /ai-gateway/ - text: "{{site.ai_gateway}} providers" url: /ai-gateway/ai-providers/ - - text: Load balancing with AI Proxy Advanced + - text: Load balancing url: /ai-gateway/load-balancing/ - text: Provider entity url: /ai-gateway/entities/ai-provider/ @@ -36,22 +35,22 @@ related_resources: - text: Consumer Group entity url: /ai-gateway/entities/ai-consumer-group/ faqs: - - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? + - q: What's the difference between a Model entity and the `model` field in a Policy configuration? a: | - A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API, UI, or decK. - {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. - You don't configure the underlying plugin directly. + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API and UI. + It defines routing, capabilities, and load balancing. A Policy is a reusable configuration that adds behavior (like caching or guardrails) to a Model. + You declare both separately and attach Policies to Models. - - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? + - q: Can I edit the Service or Routes that {{site.ai_gateway}} generates from a Model? a: | No. Generated primitives are protected from direct modification through the standard Admin API. Update the Model entity instead, and {{site.ai_gateway}} recreates the underlying primitives within a single transaction. - - q: How do I configure models in on-prem deployments? - a: | - {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. - For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). - See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. + # - q: How do I configure models in on-prem deployments? + # a: | + # {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + # For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} directly through its plugin interface. + # See the [{{site.base_gateway}} documentation](/gateway/) for available AI-related capabilities. - q: What happens when I update a Model? a: | @@ -60,7 +59,7 @@ faqs: - q: What happens when I delete a Model? a: | - The Model and all its derived primitives (Service, Routes, plugin instances) are deleted within a single transaction. + The Model and all its derived primitives (Service, Routes) are deleted within a single transaction. - q: Can I apply the same configuration to multiple Models? a: | @@ -81,7 +80,7 @@ faqs: - q: Can a client override the model name from the request body? a: | By default, no. The request `model` field must match the upstream model on one of the Model's targets, otherwise the runtime returns a `400` error. - To accept a client-side alias, set `config.model.alias` on the Model and clients can send the alias value in the request `model` field instead of the upstream provider model name. + To accept a client-side alias, set [`config.target_models[].model.alias`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-model-alias) on each target. Clients can then send the alias value in the request `model` field instead of the upstream provider model name. See [Request routing by model alias](/ai-gateway/load-balancing/#request-routing-by-model-alias) for details and examples. - q: Can a client override `temperature`, `top_p`, or `top_k` from the request? a: | @@ -101,9 +100,9 @@ faqs: A Model is a first-class {{site.ai_gateway}} entity that represents an AI model endpoint exposed through {{site.ai_gateway}}. -A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services, Routes, or plugin entries by hand. +A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services or Routes by hand. -Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: +Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API: {% table %} columns: @@ -124,7 +123,7 @@ When you create a Model in {{site.konnect_short_name}} or via the API, the confi 1. Add one or more target models, each pointing to a Provider with credentials. 1. Select a request and response format (default is `openai`). 1. If you have more than one target, configure load balancing in `config.balancer`. -1. Optionally, attach Policies to add plugin configuration and set `acls` to control access. +1. Optionally, attach Policies to add additional capabilities and set `acls` to control access. For a concrete example, see [Set up a Model](#set-up-a-model). @@ -147,16 +146,15 @@ When you create or update a Model, {{site.ai_gateway}} generates a fixed set of * One [Gateway Service](/gateway/entities/service/). * One [Route](/gateway/entities/route/) per declared capability in the `capabilities` array. -* One [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin per generated Route. -Provider credentials are added into the AI Proxy Advanced plugin configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. +Provider credentials are added into the generated runtime configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. -Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. +Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service or Routes through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. {:.info} > **Why a transaction instead of an in-place update?** > -> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. +> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. ## Capabilities @@ -169,7 +167,7 @@ Model [`type`](#schema-aigateway-model-type) controls which capability set appli Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. -The following table maps each capability to an OpenAI API reference and the corresponding [AI Proxy plugin](/plugins/ai-proxy/) example. +The following table maps each capability to an OpenAI API reference. For load balancing configuration details, see [Load balancing](/ai-gateway/load-balancing/). {% table %} @@ -178,45 +176,31 @@ columns: key: capability - title: Description key: description - - title: Example route - key: example rows: - capability: "`chat`" description: Conversational responses from a sequence of messages. - example: "[`llm/v1/chat`](/plugins/ai-proxy/examples/openai-chat-route/)" - capability: "`embeddings`" description: Vector representations for semantic search and similarity matching. - example: "[`llm/v1/embeddings`](/plugins/ai-proxy/examples/embeddings-route-type/)" - capability: "`assistants`" description: Persistent tool-using agents with metadata for debugging and evaluation. - example: "[`llm/v1/assistants`](/plugins/ai-proxy/examples/assistants-route-type/)" - capability: "`responses`" description: REST-based full-text responses. - example: "[`llm/v1/responses`](/plugins/ai-proxy/examples/responses-route-type/)" - capability: "`audio-transcriptions`" description: Speech-to-text. - example: "[`audio/v1/audio/transcriptions`](/plugins/ai-proxy/examples/audio-transcription-openai/)" - capability: "`audio-translations`" description: Audio translation between languages. - example: "[`audio/v1/audio/translations`](/plugins/ai-proxy/examples/audio-translation-openai/)" - capability: "`image-generation`" description: Generate images from text prompts. - example: "[`image/v1/images/generations`](/plugins/ai-proxy/examples/image-generation-openai/)" - capability: "`image-edits`" description: Modify images from text prompts. - example: "[`image/v1/images/edits`](/plugins/ai-proxy/examples/image-edits-openai/)" - capability: "`video-generations`" description: Generate videos from text prompts. - example: "[`video/v1/videos/generations`](/plugins/ai-proxy/examples/video-generation-openai/)" - capability: "`realtime`" description: Bidirectional WebSocket streaming for low-latency, interactive voice and text. - example: "[`realtime/v1/realtime`](/plugins/ai-proxy-advanced/examples/realtime-route-openai/)" - capability: "`batches`" description: Asynchronous bulk LLM requests for long workloads. - example: "[`llm/v1/batches`](/plugins/ai-proxy/examples/batches-route-type/)" - capability: "`files`" description: File uploads for long documents and structured input. - example: "[`llm/v1/files`](/plugins/ai-proxy/examples/files-route-type/)" {% endtable %} @@ -257,7 +241,7 @@ rows: {% endtable %} -When a native format is set, only the corresponding provider is supported with its specific APIs. For format-specific behavior and limitations, see the [AI Proxy plugin reference](/plugins/ai-proxy/#supported-native-llm-formats). +When a native format is set, only the corresponding provider is supported with its specific APIs. ## Target models @@ -271,7 +255,7 @@ There's no separate Target Model entity or endpoint. Target models are managed o A Model routes to a single target by default. Add more than one target when you want redundancy, fallback between providers, or cost and latency optimization. When you have multiple targets, configure `config.balancer` to distribute requests according to a load balancing algorithm. -When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). +When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing](/ai-gateway/load-balancing/). ### Algorithms @@ -285,19 +269,19 @@ columns: - title: Behavior key: behavior rows: - - algorithm: "[`round-robin`](/plugins/ai-proxy-advanced/examples/round-robin/)" + - algorithm: "`round-robin`" behavior: Weighted traffic distribution across targets. - - algorithm: "[`consistent-hashing`](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + - algorithm: "`consistent-hashing`" behavior: Sticky sessions based on header values. - - algorithm: "[`least-connections`](/plugins/ai-proxy-advanced/examples/least-connections/)" + - algorithm: "`least-connections`" behavior: Route to backends with spare capacity. - - algorithm: "[`lowest-latency`](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + - algorithm: "`lowest-latency`" behavior: Route to the fastest-responding model. - - algorithm: "[`lowest-usage`](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + - algorithm: "`lowest-usage`" behavior: Route based on token counts or cost. - - algorithm: "[`semantic`](/plugins/ai-proxy-advanced/examples/semantic/)" + - algorithm: "`semantic`" behavior: Route based on prompt-to-model similarity. - - algorithm: "[`priority`](/plugins/ai-proxy-advanced/examples/priority/)" + - algorithm: "`priority`" behavior: Tiered failover across model groups. {% endtable %} @@ -338,23 +322,23 @@ Substitution applies to the [`name`](#schema-aigateway-model-target-models-name) * `$(uri_captures.path_parameter_name)`: the value of a captured URI path parameter. * `$(query_params.query_parameter_name)`: the value of a query string parameter. -For end-to-end examples, see [dynamic model selection](/plugins/ai-proxy/examples/sdk-dynamic-model-selection/), [Azure deployment routing](/plugins/ai-proxy/examples/sdk-azure-deployment/), and [proxying multiple models in one Azure instance](/plugins/ai-proxy/examples/sdk-multiple-providers/) on the AI Proxy plugin page. +For examples of using templating, consult the {{site.ai_gateway}} documentation and API reference. ## Access control A Model's `acls` field controls which identities are allowed to reach the Model. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced at the Service level of the generated primitives. -For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. +For per-request authentication and identity, configure the appropriate authentication Policy globally or attach it to the Model. ## Attach Policies -Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. +Policies apply configuration and behavior to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. -You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. +You can attach multiple Policies to a single Model. Each Policy is applied independently, so attaching the same Policy type twice with different configurations creates two separate instances. -Not every plugin type is valid as a Model Policy. +Not every Policy type is valid as a Model attachment. Policies attached to a Model are not deleted when the Model is deleted; only the Model's reference is removed. @@ -362,11 +346,11 @@ For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/ ### Plugin priority and Policy execution order -A Policy attached to a Model creates one plugin entry on the Service of the Model's derived primitives. That plugin runs at the [priority](/gateway/entities/plugin/#plugin-priority) of its underlying plugin type, which determines when it executes relative to other plugins on the request. +A Policy attached to a Model runs on the Service of the Model's derived primitives. That Policy runs at the [priority](/gateway/entities/plugin/#plugin-priority) determined by its type, which affects when it executes relative to other Policies on the request. -The AI Proxy Advanced plugin runs at priority `770` and parses the request body to resolve the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They still gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available yet. +Model routing executes at a specific point in the request pipeline. Policies have different priorities that determine when they run. Higher priority Policies types may run before the Model routing is resolved. Authentication Policies (such as OpenID Connect) fall into this category. They gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available until after Model resolution. -For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/entities/plugin/) to push their execution later. +For Policies whose behavior depends on the resolved Model identity, use Policy types that run at or after Model resolution, or use [dynamic plugin ordering](/gateway/entities/plugin/#dynamic-plugin-ordering) to adjust execution order as needed. ## Set up a Model diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index 3c3c610eff..87e156e9e0 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -60,6 +60,7 @@ formats: # core entities consumer: '/consumers/' consumer_group: '/consumer_groups/' + model: '/models/' route: '/routes/' service: '/services/' target: '/upstreams/{upstream}/targets/' @@ -89,6 +90,10 @@ formats: route: '/routes/{route}/plugins/' service: '/services/{service}/plugins/' global: '/plugins/' + ai_policy_endpoints: + ai_model: '/models/{ai_model}/policies/' + ai_agent: '/agents/{ai_agent}/policies/' + ai_mcp_server: '/mcp-servers/{ai_mcp_server}/policies/' variables: <<: *variables ai_gateway: diff --git a/app/ai-gateway/load-balancing.md b/app/ai-gateway/load-balancing.md index ec5cc3baea..04e4473aab 100644 --- a/app/ai-gateway/load-balancing.md +++ b/app/ai-gateway/load-balancing.md @@ -1,46 +1,54 @@ --- -title: "Load balancing with AI Proxy Advanced" +title: "Load balancing with {{site.ai_gateway_name}}" layout: reference content_type: reference -description: This guide provides an overview of load balancing and retry and fallback strategies in the AI Proxy Advanced plugin. +description: "This guide provides an overview of load balancing and retry and fallback strategies in {{site.ai_gateway}}." breadcrumbs: - /ai-gateway/ works_on: - - on-prem - konnect products: - gateway - ai-gateway +tools: + - admin-api + - konnect-api + tags: - ai - load-balancing - - ai-proxy - -plugins: - - ai-proxy-advanced min_version: - gateway: '3.10' + ai-gateway: '2.0.0' related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ --- {{site.ai_gateway}} provides load balancing capabilities to distribute requests across multiple LLM models. You can use these features to improve fault tolerance, optimize resource utilization, and balance traffic across your AI systems. -The [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin supports several load balancing algorithms similar to those used for Kong upstreams, extended for AI model routing. You configure load balancing through the [Upstream entity](/gateway/entities/upstream/), which lets you control how requests are routed to various AI providers and models. +In {{site.ai_gateway}} 2.0.0 and later, load balancing is configured on the [Model entity](/ai-gateway/entities/ai-model/) through `config.balancer` and `target_models`. + + ### Load balancing algorithms {{site.ai_gateway}} supports multiple load balancing strategies for distributing traffic across AI models. Each algorithm addresses different goals: balancing load, improving cache-hit ratios, reducing latency, or providing [failover reliability](#retry-and-fallback). -The following table describes the available algorithms and considerations for selecting one. +The following table describes the available algorithms for [Model entities](/ai-gateway/entities/ai-model/) and considerations for selecting one. {% table %} @@ -52,54 +60,54 @@ columns: - title: Considerations key: considerations rows: - - algorithm: "[Round-robin (weighted)](/plugins/ai-proxy-advanced/examples/round-robin/)" + - algorithm: "Round-robin (weighted)" description: | Distributes requests across models based on their assigned weights. For example, if models `gpt-4`, `gpt-4o-mini`, and `gpt-3` have weights of `70`, `25`, and `5`, they receive approximately 70%, 25%, and 5% of traffic respectively. Requests are distributed proportionally, independent of usage or latency metrics. considerations: | * Traffic is routed proportionally based on weights. * Requests follow a circular sequence adjusted by weight. * Does not account for cache-hit ratios, latency, or current load. - - algorithm: "[Consistent-hashing](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + - algorithm: "Consistent-hashing" description: | - Routes requests based on a hash of a configurable header value. Requests with the same header value are routed to the same model, enabling sticky sessions for maintaining context across user interactions. The [`hash_on_header`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-hash-on-header) setting defines the header to hash. The default is `X-Kong-LLM-Request-ID`. + Routes requests based on a hash of a configurable header value. Requests with the same header value are routed to the same model, enabling sticky sessions for maintaining context across user interactions. The [`hash_on_header`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-hash-on-header) setting defines the header to hash. The default is `X-Kong-LLM-Request-ID`. considerations: | * Effective with consistent keys like user IDs. * Requires diverse hash inputs for balanced distribution. * Useful for session persistence and cache-hit optimization. - - algorithm: "[Least-connections](/plugins/ai-proxy-advanced/examples/least-connections/)" + - algorithm: "Least-connections" description: | - {% new_in 3.13 %} Tracks the number of in-flight requests for each backend and routes new requests to the backend with the highest spare capacity. The [`weight`](/plugins/ai-proxy-advanced/reference/#schema--config-targets-weight) parameter is used to calculate connection capacity. + Tracks the number of in-flight requests for each backend and routes new requests to the backend with the highest spare capacity. The [`weight`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-weight) parameter is used to calculate connection capacity. considerations: | * Dynamically adapts to backend response times. * Routes away from slower backends as they accumulate open connections. * Does not account for cache-hit ratios. - - algorithm: "[Lowest-usage](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + - algorithm: "Lowest-usage" description: | - Routes requests to models with the lowest measured resource usage. The [`tokens_count_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-tokens-count-strategy) parameter defines how usage is measured: prompt token counts, response token counts, or cost {% new_in 3.10 %}. + Routes requests to models with the lowest measured resource usage. The [`tokens_count_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-tokens-count-strategy) parameter defines how usage is measured: prompt token counts, response token counts, or cost. considerations: | * Balances load based on actual consumption metrics. * Useful for cost optimization and avoiding overloading individual models. - - algorithm: "[Lowest-latency](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + - algorithm: "Lowest-latency" description: | - Routes requests to the model with the lowest observed latency. The [`latency_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-latency-strategy) parameter defines how latency is measured. The default (`tpot`) uses time-per-output-token. The `e2e` option uses end-to-end response time. + Routes requests to the model with the lowest observed latency. The [`latency_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-latency-strategy) parameter defines how latency is measured. The default (`tpot`) uses time-per-output-token. The `e2e` option uses end-to-end response time.

The algorithm uses peak EWMA (Exponentially Weighted Moving Average) to track latency from TCP connect through body response. Metrics decay over time. considerations: | * Prioritizes models with the fastest response times. * Suited for latency-sensitive applications. * Less suitable for long-lived connections like WebSockets. - - algorithm: "[Semantic](/plugins/ai-proxy-advanced/examples/semantic/)" + - algorithm: "Semantic" description: | Routes requests based on semantic similarity between the prompt and model descriptions. Embeddings are generated using a specified model (for example, `text-embedding-3-small`), and similarity is calculated using vector search.

- {% new_in 3.13 %} Multiple targets can share [identical descriptions](/plugins/ai-proxy-advanced/examples/semantic-with-fallback/). When they do, the balancer performs round-robin fallback among them if the primary target fails. Weights affect fallback order. + Multiple targets can share identical descriptions. When they do, the balancer performs round-robin fallback among them if the primary target fails. Weights affect fallback order. considerations: | * Requires a vector database (for example, Redis) for similarity matching. * The `distance_metric` and `threshold` settings control matching sensitivity. * Best for routing prompts to domain-specialized models. - - algorithm: "[Priority](/plugins/ai-proxy-advanced/examples/priority/)" + - algorithm: "Priority" description: | - {% new_in 3.10 %} Routes requests to models based on assigned priority groups. The balancer always selects from the highest-priority group first. If all targets in that group are unavailable, it falls back to the next group. Within each group, the [`weight`](/plugins/ai-proxy-advanced/reference/#schema--config-targets-weight) parameter controls traffic distribution. + Routes requests to models based on assigned priority groups. The balancer always selects from the highest-priority group first. If all targets in that group are unavailable, it falls back to the next group. Within each group, the [`weight`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-weight) parameter controls traffic distribution. considerations: | * Higher-priority groups receive all traffic until they fail. * Lower-priority groups serve as fallback only. @@ -107,9 +115,17 @@ rows: {% endtable %} +For examples of each algorithm, see [Algorithm examples](/ai-gateway/entities/ai-model/#algorithm-examples) in the [Model entity](/ai-gateway/entities/ai-model/) reference. + +### Request routing by model alias + +Model aliases allow clients to send an alias instead of the actual model name in the request. This decouples the external model identifier from the internal provider model, enabling flexible routing without changing client code. + +Each target in a Model entity can have an optional [`model.alias`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-model-alias) field. When a client sends `"model": "alias-value"` in the request body, {{site.ai_gateway}} routes to the matching target. This feature works independently of load balancing algorithms — the alias determines which target (or set of targets) handles the request, and the configured load balancing algorithm selects the final backend within that set. + ### Retry and fallback -The load balancer includes built-in support for **retries** and **fallbacks**. When a request fails, the balancer can automatically retry the same target or redirect the request to a different upstream target. +The load balancer includes built-in support for **retries** and **fallbacks**. When a request fails, the balancer can automatically retry the same target or redirect the request to a different target model. #### How retry and fallback works @@ -143,7 +159,7 @@ flowchart LR #### Retry and fallback configuration -{{site.ai_gateway}} load balancer supports fine-grained control over failover behavior. Use [`failover_criteria`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-failover-criteria) to define when a request should retry on the next upstream target. By default, retries occur on `error` and `timeout`. An `error` means a failure occurred while connecting to the server, forwarding the request, or reading the response header. A `timeout` indicates that any of those stages exceeded the allowed time. +The {{site.ai_gateway}} load balancer supports fine-grained control over failover behavior on the [Model entity](/ai-gateway/entities/ai-model/). Use [`failover_criteria`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-failover-criteria) to define when a request should retry on the next target model. By default, retries occur on `error` and `timeout`. An `error` means a failure occurred while connecting to the server, forwarding the request, or reading the response header. A `timeout` indicates that any of those stages exceeded the allowed time. You can add more criteria to adjust retry behavior as needed: @@ -155,23 +171,23 @@ columns: - title: Description key: description rows: - - setting: "[`retries`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-retries)" + - setting: "[`retries`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-retries)" description: | Defines how many times to retry a failed request before reporting failure to the client. Increase for better resilience to transient errors; decrease if you need lower latency and faster failure. - - setting: "[`failover_criteria`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-failover-criteria)" + - setting: "[`failover_criteria`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-failover-criteria)" description: | Specifies which types of failures (e.g., `http_429`, `http_500`) should trigger a failover to a different target. Customize based on your tolerance for specific errors and desired failover behavior. - - setting: "[`connect_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-connect-timeout)" + - setting: "[`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout)" description: | Sets the maximum time allowed to establish a TCP connection with a target. Lower it for faster detection of unreachable servers; raise it if some servers may respond slowly under load. - - setting: "[`read_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-read-timeout)" + - setting: "[`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout)" description: | Defines the maximum time to wait for a server response after sending a request. Lower it for real-time applications needing quick responses; increase it for long-running operations. - - setting: "[`write_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-write-timeout)" + - setting: "[`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" description: | Sets the maximum time allowed to send the request payload to the server. Increase if large request bodies are common; keep short for small, fast payloads. @@ -180,7 +196,7 @@ rows: #### Retry and fallback scenarios -You can customize {{site.ai_gateway}} load balancer to fit different application needs, such as minimizing latency, enabling sticky sessions, or optimizing for cost. The table below maps common scenarios to key configuration options that control load balancing behavior: +You can customize the {{site.ai_gateway}} load balancer to fit different application needs, such as minimizing latency, enabling sticky sessions, or optimizing for cost. The table below maps common scenarios to key configuration options that control load balancing behavior: {% table %} @@ -193,36 +209,51 @@ columns: key: description rows: - scenario: "Requests must not hang longer than 3 seconds" - action: "Adjust [`connect_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-vectordb-redis-connect-timeout), [`read_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-vectordb-redis-read-timeout), [`write_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-write-timeout)" + action: "Adjust [`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout), [`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout), [`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" description: | - Shorten these timeouts to quickly fail if a server is slow or unresponsive, ensuring faster error handling and responsiveness. + Shorten these timeouts to quickly fail if a target model is slow or unresponsive, ensuring faster error handling and responsiveness. - scenario: "Prioritize the lowest-latency target" - action: "Set [`latency_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-latency-strategy) to `e2e`" + action: "Set [`latency_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-latency-strategy) to `e2e`" description: | Optimize routing based on full end-to-end response time, selecting the target that minimizes total latency. - scenario: "Need predictable fallback for the same user" - action: "Use [`hash_on_header`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-hash-on-header)" + action: "Use [`hash_on_header`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-hash-on-header)" description: | - Ensure that the same user consistently routes to the same target, enabling sticky sessions and reliable fallback behavior. + Ensure that the same user consistently routes to the same target model, enabling sticky sessions and reliable fallback behavior. - scenario: "Models have different costs" - action: "Set [`tokens_count_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-tokens-count-strategy) to `cost`" + action: "Set [`tokens_count_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-tokens-count-strategy) to `cost`" description: | - Route requests intelligently by considering cost, balancing model performance with budget optimization. + Route requests by considering cost, balancing model performance with budget targets. {% endtable %} -#### Version compatibility for fallbacks +### Health check and circuit breaker -{:.info} -> **{{site.base_gateway}} version compatibility for fallbacks:** -> {% new_in 3.10 %} -> - Full fallback support across targets, even with different API formats. -> - Mix models from different providers if needed (for example, OpenAI and {{ site.mistral }}). -> -> Pre-3.10: -> - Fallbacks only allowed between targets using the same API format. -> - Example: OpenAI-to-OpenAI fallback is supported; OpenAI-to-OLLAMA is not. +For Model entities, circuit breaker behavior is controlled through the balancer configuration on the Model. Use these settings to fail fast when a target model is unhealthy and to retry or fall back to another target instead of waiting for repeated slow responses. + + +{% table %} +columns: + - title: Setting + key: setting + - title: Use + key: use +rows: + - setting: "[`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout), [`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout), [`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" + use: "Reduce how long {{site.base_gateway}} waits before treating a target model as unavailable." + - setting: "[`max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails)" + use: "Set the number of failed attempts allowed before {{site.base_gateway}} marks a target model unhealthy." + - setting: "[`fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout)" + use: "Set how long {{site.base_gateway}} keeps a target model in a failed state before trying it again." +{% endtable %} + + +The load balancer supports health checks and circuit breakers to improve reliability. If the number of unsuccessful attempts to a target reaches [`config.balancer.max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails), the load balancer stops sending requests to that target until it reconsiders the target after the period defined by [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout). The diagram below illustrates this behavior: + +![Circuit breaker](/assets/images/ai-gateway/circuit-breaker.jpg){: style="display:block; margin-left:auto; margin-right:auto; width:50%; border-radius:10px" } + +Consider an example where [`config.balancer.max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails) is 3 and [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout) is 10 seconds. When failed requests for a target reach 3, the target is marked unhealthy and the load balancer stops sending requests to it. After 10 seconds, the target is reconsidered. If the request to this target still fails, the target remains unhealthy and the load balancer continues to exclude it. If the request succeeds, the target is marked healthy again and recovers from the circuit breaker. -### Health check and circuit breaker {% new_in 3.13 %} +The failure counter tracks total failures, not consecutive failures. If a target receives 2 failed requests, then 1 successful request within the timeout window, the counter remains at 2. The counter resets only when a successful request occurs after [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout) has elapsed since the last failed request. -{% include ai-gateway/circuit-breaker.md %} \ No newline at end of file +If all targets become unhealthy simultaneously, requests fail with `HTTP 500`. From 45c22274af7a24aa3f808c36499126c22b277187 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 08:08:55 +0200 Subject: [PATCH 15/20] update min_version --- app/ai-gateway/load-balancing.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/ai-gateway/load-balancing.md b/app/ai-gateway/load-balancing.md index 04e4473aab..a0ea831146 100644 --- a/app/ai-gateway/load-balancing.md +++ b/app/ai-gateway/load-balancing.md @@ -22,7 +22,7 @@ tags: - load-balancing min_version: - ai-gateway: '2.0.0' + ai-gateway: '2.0' related_resources: - text: "{{site.ai_gateway}}" @@ -37,9 +37,9 @@ In {{site.ai_gateway}} 2.0.0 and later, load balancing is configured on the [Mod From 8050e0415628bd82278612bbbf95a4e1b69f6e37 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 08:21:46 +0200 Subject: [PATCH 16/20] Update min_version for Resource sizing guidelines doc --- app/ai-gateway/resource-sizing-guidelines-ai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/ai-gateway/resource-sizing-guidelines-ai.md b/app/ai-gateway/resource-sizing-guidelines-ai.md index d7381a8eb7..35995b2bd9 100644 --- a/app/ai-gateway/resource-sizing-guidelines-ai.md +++ b/app/ai-gateway/resource-sizing-guidelines-ai.md @@ -11,7 +11,7 @@ works_on: - on-prem min_version: - gateway: '3.12' + gateway: '2.0' tags: - performance From 153a7a87f3c59a662000787098d9dab15582bc24 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 12 Jun 2026 16:11:55 +0100 Subject: [PATCH 17/20] initial 2.0 cleanup --- app/ai-gateway/streaming.md | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/app/ai-gateway/streaming.md b/app/ai-gateway/streaming.md index a7005b5c93..c2cecd2eef 100644 --- a/app/ai-gateway/streaming.md +++ b/app/ai-gateway/streaming.md @@ -8,7 +8,6 @@ works_on: - konnect products: - - gateway - ai-gateway breadcrumbs: - /ai-gateway/ @@ -22,15 +21,16 @@ plugins: - ai-proxy-advanced min_version: - gateway: '3.7' + ai-gateway: '3.0' -description: This guide walks you through setting up the AI Proxy and AI Proxy Advanced plugin with streaming. +description: This guide walks you through setting up Models with streaming. --- ## What is request streaming? -In an LLM (Large Language Model) inference request, {{site.base_gateway}} uses the upstream provider's REST API to generate the next chat message from the caller. -Normally, this request is processed and completely buffered by the LLM before being sent back to {{site.base_gateway}} and then to the caller in a single large JSON block. This process can be time-consuming, depending on the `max_tokens`, other request parameters, and the complexity of the request sent to the LLM model. +In an LLM (Large Language Model) inference request, {{site.ai_gateway}} uses the upstream provider's REST API to generate the next chat message from the caller. + +Normally, this request is processed and completely buffered by the LLM before being sent back to {{site.ai_gateway}} and then to the caller in a single large JSON block. This process can be time-consuming, depending on the `max_tokens`, other request parameters, and the complexity of the request sent to the LLM model. To avoid making the user wait for their chat response with a loading animation, most models can stream each word (or sets of words and tokens) back to the client. This allows the chat response to be rendered in real time. @@ -55,23 +55,23 @@ for chunk in stream: print(chunk.choices[0].delta.content, end="", flush=True) ``` -The client won't have to wait for the entire response. Instead, tokens will appear as they come in. +A client configured to use streaming won't have to wait for the entire response. Instead, tokens will appear as they come in. -## How AI Proxy streaming works +## How {{site.ai_gateway}} streaming works In streaming mode, a client can set `"stream": true` in their request, and the LLM server will stream each part of the response text (usually token-by-token) as a server-sent event. -{{site.base_gateway}} captures each batch of events and translates them into the {{site.base_gateway}} inference format. This ensures that all providers are compatible with the same framework including OpenAI-compatible SDKs or similar. +{{site.ai_gateway}} captures each batch of events and translates them into the {{site.ai_gateway}} inference format. This ensures that all providers are compatible with the same framework including OpenAI-compatible SDKs or similar. In a standard LLM transaction, requests proxied directly to the LLM look like this: {% mermaid %} sequenceDiagram actor Client - participant {{site.base_gateway}} - Note right of {{site.base_gateway}}: AI Proxy Advanced plugin - Client->>+{{site.base_gateway}}: - destroy {{site.base_gateway}} - {{site.base_gateway}}->>+Cloud LLM: Sends proxy request information + participant {{site.ai_gateway}} + Note right of {{site.ai_gateway}}: AI Proxy Advanced plugin + Client->>+{{site.ai_gateway}}: + destroy {{site.ai_gateway}} + {{site.ai_gateway}}->>+Cloud LLM: Sends proxy request information Cloud LLM->>+Client: Sends chunk to client {% endmermaid %} @@ -80,7 +80,7 @@ When streaming is requested, requests proxied directly to the LLM look like this {% mermaid %} flowchart LR A(client) - B({{site.base_gateway}} Gateway with + B({{site.ai_gateway}} Gateway with AI Proxy Advanced plugin) C(Cloud LLM) D[[transform frame]] @@ -122,12 +122,12 @@ Keep the following limitations in mind when you configure streaming for the {{si * Multiple AI features shouldn’t be expected to be applied and work simultaneously. * You can't use the [Response Transformer plugin](/plugins/response-transformer/) or any other response phase plugin when streaming is configured. -* The [AI Request Transformer plugin](/plugins/ai-request-transformer/) plugin **will** work, but the [AI Response Transformer plugin](/plugins/ai-response-transformer/) **will not**. This is because {{site.base_gateway}} can't check every single response token against a separate system. +* The [AI Request Transformer plugin](/plugins/ai-request-transformer/) plugin **will** work, but the [AI Response Transformer plugin](/plugins/ai-response-transformer/) **will not**. This is because {{site.ai_gateway}} can't check every single response token against a separate system. * Streaming currently doesn't work with the HTTP/2 protocol. You must disable this in your [`proxy_listen`](/gateway/configuration/#proxy-listen) configuration. ## Configuration -The AI Proxy and AI Proxy Advanced plugins already support request streaming; all you have to do is request {{site.base_gateway}} to stream the response tokens back to you. +{{site.ai_gateway}} already supports request streaming; all you have to do is add streaming to your request. The following is an example `llm/v1/completions` route streaming request: @@ -140,7 +140,7 @@ The following is an example `llm/v1/completions` route streaming request: You should receive each batch of tokens as HTTP chunks, each containing one or many server-sent events. -### Token usage in streaming responses {% new_in 3.13 %} +### Token usage in streaming responses You can receive token usage statistics in an SSE streaming response. Set the following parameter in the request JSON: @@ -154,7 +154,6 @@ You can receive token usage statistics in an SSE streaming response. Set the fol When you set this parameter, the `usage` object appears in the final SSE frame, before the `[DONE]` terminator. This object contains token count statistics for the request. - The following example shows how to request and process token usage statistics in a streaming response: ```python From d13c96232c71afbf7b7b45d396cb220a1f157703 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 15 Jun 2026 13:58:40 +0100 Subject: [PATCH 18/20] diagrams and config cleanup --- app/ai-gateway/streaming.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/app/ai-gateway/streaming.md b/app/ai-gateway/streaming.md index c2cecd2eef..d50001b6c3 100644 --- a/app/ai-gateway/streaming.md +++ b/app/ai-gateway/streaming.md @@ -4,7 +4,6 @@ content_type: reference layout: reference works_on: - - on-prem - konnect products: @@ -16,10 +15,6 @@ tags: - streaming - ai-proxy -plugins: - - ai-proxy - - ai-proxy-advanced - min_version: ai-gateway: '3.0' @@ -68,7 +63,6 @@ In a standard LLM transaction, requests proxied directly to the LLM look like th sequenceDiagram actor Client participant {{site.ai_gateway}} - Note right of {{site.ai_gateway}}: AI Proxy Advanced plugin Client->>+{{site.ai_gateway}}: destroy {{site.ai_gateway}} {{site.ai_gateway}}->>+Cloud LLM: Sends proxy request information @@ -80,8 +74,7 @@ When streaming is requested, requests proxied directly to the LLM look like this {% mermaid %} flowchart LR A(client) - B({{site.ai_gateway}} Gateway with - AI Proxy Advanced plugin) + B({{site.ai_gateway}}) C(Cloud LLM) D[[transform frame]] E[[read frame]] @@ -118,10 +111,10 @@ It also estimates tokens for LLM services that decided to not stream back the to ## Streaming limitations -Keep the following limitations in mind when you configure streaming for the {{site.ai_gateway}} plugin: +Keep the following limitations in mind when you configure streaming for the {{site.ai_gateway}}: * Multiple AI features shouldn’t be expected to be applied and work simultaneously. -* You can't use the [Response Transformer plugin](/plugins/response-transformer/) or any other response phase plugin when streaming is configured. +* You can't add Policies that use the [Response Transformer plugin](/plugins/response-transformer/) or any other response phase plugin when streaming is configured. * The [AI Request Transformer plugin](/plugins/ai-request-transformer/) plugin **will** work, but the [AI Response Transformer plugin](/plugins/ai-response-transformer/) **will not**. This is because {{site.ai_gateway}} can't check every single response token against a separate system. * Streaming currently doesn't work with the HTTP/2 protocol. You must disable this in your [`proxy_listen`](/gateway/configuration/#proxy-listen) configuration. @@ -186,7 +179,7 @@ for chunk in stream: ### Response streaming configuration parameters -In the AI Proxy and AI Proxy Advanced plugin configuration, you can set an optional field `config.response_streaming` to one of three values: +In the [Model](/ai-gateway/entities/ai-model/) configuration, you can set an optional field `config.response_streaming` to one of three values: {% table %} columns: From cde07a8b577404f25b3452bfa508127f84202fbc Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 15 Jun 2026 14:00:42 +0100 Subject: [PATCH 19/20] version typo --- app/ai-gateway/streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/ai-gateway/streaming.md b/app/ai-gateway/streaming.md index d50001b6c3..9bbf5e4208 100644 --- a/app/ai-gateway/streaming.md +++ b/app/ai-gateway/streaming.md @@ -16,7 +16,7 @@ tags: - ai-proxy min_version: - ai-gateway: '3.0' + ai-gateway: '2.0' description: This guide walks you through setting up Models with streaming. --- From 95b5a44c5e3626c998fa9d5308cae5288f7d1708 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 18 Jun 2026 17:21:44 +0100 Subject: [PATCH 20/20] plugin to policy --- app/ai-gateway/streaming.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/ai-gateway/streaming.md b/app/ai-gateway/streaming.md index 9bbf5e4208..5037d0d9b2 100644 --- a/app/ai-gateway/streaming.md +++ b/app/ai-gateway/streaming.md @@ -13,8 +13,7 @@ breadcrumbs: tags: - ai - streaming - - ai-proxy - + min_version: ai-gateway: '2.0' @@ -114,8 +113,8 @@ It also estimates tokens for LLM services that decided to not stream back the to Keep the following limitations in mind when you configure streaming for the {{site.ai_gateway}}: * Multiple AI features shouldn’t be expected to be applied and work simultaneously. -* You can't add Policies that use the [Response Transformer plugin](/plugins/response-transformer/) or any other response phase plugin when streaming is configured. -* The [AI Request Transformer plugin](/plugins/ai-request-transformer/) plugin **will** work, but the [AI Response Transformer plugin](/plugins/ai-response-transformer/) **will not**. This is because {{site.ai_gateway}} can't check every single response token against a separate system. +* You can't add Policies that use the [Response Transformer](/plugins/response-transformer/) or any otherwise trigger in the response phase when streaming is configured. +* The [AI Request Transformer Policy](/plugins/ai-request-transformer/) **will** work, but the [AI Response Transformer Policy](/plugins/ai-response-transformer/) **will not**. This is because {{site.ai_gateway}} can't check every single response token against a separate system. * Streaming currently doesn't work with the HTTP/2 protocol. You must disable this in your [`proxy_listen`](/gateway/configuration/#proxy-listen) configuration. ## Configuration