From b069b70bdef5593670c6146d74d695d1eac6e989 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:41:53 +0100 Subject: [PATCH 01/42] add howtos --- app/_data/schemas/frontmatter/tags.json | 1 + .../set-up-ai-proxy-advanced-with-kimi.md | 95 +++++++++++++++++++ .../ai-gateway/set-up-ai-proxy-with-kimi.md | 94 ++++++++++++++++++ app/ai-gateway/ai-providers/kimi.md | 87 +++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md create mode 100644 app/ai-gateway/ai-providers/kimi.md diff --git a/app/_data/schemas/frontmatter/tags.json b/app/_data/schemas/frontmatter/tags.json index 571c3211d8..2a2f02c525 100644 --- a/app/_data/schemas/frontmatter/tags.json +++ b/app/_data/schemas/frontmatter/tags.json @@ -117,6 +117,7 @@ "kafka", "kds", "key-auth", + "kimi", "kong-manager", "kongair", "kong-identity", diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md new file mode 100644 index 0000000000..b46f91e54b --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -0,0 +1,95 @@ +--- +title: Set up AI Proxy Advanced with kimi in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-advanced-with-kimi/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy Advanced + url: /plugins/ai-proxy-advanced/ + +description: Configure the AI Proxy Advanced plugin to create a chat route using Kimi. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy-advanced + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - kimi + +tldr: + q: How do I use the AI Proxy Advanced plugin with Kimi? + a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. + +tools: + - deck + +prereqs: + inline: + - title: Kimi + include_content: prereqs/deepseek + icon_url: /assets/icons/deepseek.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `deepseek-chat` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy-advanced + config: + targets: + - route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: openai + name: deepseek-chat + options: + upstream_url: https://api.deepseek.com/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $DEEPSEEK_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md new file mode 100644 index 0000000000..56a8687bac --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -0,0 +1,94 @@ +--- +title: Set up AI Proxy with Kimi in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-with-kimi/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy + url: /plugins/ai-proxy/ + +description: Configure the AI Proxy plugin to create a chat route using Kimi. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - kimi + +tldr: + q: How do I use the AI Proxy plugin with Kimi? + a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. + +tools: + - deck + +prereqs: + inline: + - title: Kimi + include_content: prereqs/deepseek + icon_url: /assets/icons/deepseek.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `deepseek-chat` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: openai + name: deepseek-chat + options: + upstream_url: https://api.deepseek.com/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $DEEPSEEK_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md new file mode 100644 index 0000000000..d5ecc74b9e --- /dev/null +++ b/app/ai-gateway/ai-providers/kimi.md @@ -0,0 +1,87 @@ +--- +title: "Kimi provider" +layout: reference +content_type: reference +description: Reference for supported capabilities for Kimi provider +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/ai-providers/ + +permalink: /ai-gateway/ai-providers/kimi/ + +works_on: + - on-prem + - konnect + +products: + - gateway + - ai-gateway + +tools: + - admin-api + - konnect-api + - deck + - kic + - terraform + +tags: + - ai + +plugins: + - ai-proxy-advanced + - ai-proxy + +min_version: + gateway: '3.14' + +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} plugins" + url: /plugins/?category=ai + - text: AI Providers + url: /ai-gateway/ai-providers/ + +how_to_list: + config: + products: + - ai-gateway + tags: + - kimi + description: true + view_more: false +--- + + +{% include plugins/ai-proxy/providers/providers.md providers=site.data.plugins.ai-proxy provider_name="Kimi" %} + +## Configure {{ provider.name }} with AI Proxy + +To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. + +Here's a minimal configuration for chat completions: + +{% entity_example %} +type: plugin +data: + name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${key} + model: + provider: deepseek + name: deepseek-chat + +variables: + key: + value: "$DEEPSEEK_API_KEY" +{% endentity_example %} + +{:.success} +> For more configuration options and examples, see: +> - [AI Proxy examples](/plugins/ai-proxy/examples/) +> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) + +{% include plugins/ai-proxy/providers/how-tos.md %} From e8c9e39358b2daf95e120fef8790ce137dad84b5 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:52:03 +0100 Subject: [PATCH 02/42] correct API key and base urls --- .../set-up-ai-proxy-advanced-with-kimi.md | 16 ++++++++-------- .../ai-gateway/set-up-ai-proxy-with-kimi.md | 14 +++++++------- app/_includes/prereqs/kimi.md | 12 ++++++++++++ 3 files changed, 27 insertions(+), 15 deletions(-) create mode 100644 app/_includes/prereqs/kimi.md diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index b46f91e54b..a58e5d4491 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -1,5 +1,5 @@ --- -title: Set up AI Proxy Advanced with kimi in {{site.base_gateway}} +title: Set up AI Proxy Advanced with Kimi in {{site.base_gateway}} permalink: /how-to/set-up-ai-proxy-advanced-with-kimi/ content_type: how_to related_resources: @@ -44,8 +44,8 @@ tools: prereqs: inline: - title: Kimi - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg + include_content: prereqs/kimi + icon_url: /assets/icons/kimi.svg entities: services: - example-service @@ -64,9 +64,9 @@ cleanup: ## Configure the plugin -To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. +To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. -In this example, we'll use the `deepseek-chat` model: +In this example, we'll use the `kimi-k2.6` model: {% entity_examples %} entities: @@ -80,14 +80,14 @@ entities: header_value: Bearer ${api_key} model: provider: openai - name: deepseek-chat + name: kimi-k2.6 options: - upstream_url: https://api.deepseek.com/chat/completions + upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 temperature: 1.0 variables: api_key: - value: $DEEPSEEK_API_KEY + value: $MOONSHOT_API_KEY {% endentity_examples %} ## Validate diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index 56a8687bac..c139e7eb29 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -44,8 +44,8 @@ tools: prereqs: inline: - title: Kimi - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg + include_content: prereqs/kimi + icon_url: /assets/icons/kimi.svg entities: services: - example-service @@ -64,9 +64,9 @@ cleanup: ## Configure the plugin -To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. +To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. -In this example, we'll use the `deepseek-chat` model: +In this example, we'll use the `kimi-k2.6` model: {% entity_examples %} entities: @@ -79,14 +79,14 @@ entities: header_value: Bearer ${api_key} model: provider: openai - name: deepseek-chat + name: kimi-k2.6 options: - upstream_url: https://api.deepseek.com/chat/completions + upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 temperature: 1.0 variables: api_key: - value: $DEEPSEEK_API_KEY + value: $MOONSHOT_API_KEY {% endentity_examples %} ## Validate diff --git a/app/_includes/prereqs/kimi.md b/app/_includes/prereqs/kimi.md new file mode 100644 index 0000000000..61625fb065 --- /dev/null +++ b/app/_includes/prereqs/kimi.md @@ -0,0 +1,12 @@ +This tutorial requires a DeepSeek API key. + +1. Create a [Kimi Platform](https://platform.kimi.ai/) account. +1. Click **API keys**. +1. Click **Create new API key**. +1. In the **Name** field, enter `Kong`. +1. Click **Create API key**. +1. Click **Copy**. +1. Export the key to your environment: + ```sh + export DECK_MOONSHOT_API_KEY='YOUR MOONSHOT API KEY' + ``` \ No newline at end of file From a87f15da0bbf278207b353f59ffecdb406e99d84 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 14:59:52 +0100 Subject: [PATCH 03/42] add Kimi to vale dict --- .github/styles/base/Dictionary.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 8d1266def5..5a570da9d5 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -465,6 +465,7 @@ kiali Kibana kibibytes kic +Kimi Knative Knative knative From e484f7170dd5ff40ec8925aa705701ee712d9ce4 Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 15:32:30 +0100 Subject: [PATCH 04/42] updated some provider references --- .github/styles/base/Dictionary.txt | 1 + app/ai-gateway/ai-providers/kimi.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 5a570da9d5..7dc61a22da 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -465,6 +465,7 @@ kiali Kibana kibibytes kic +kimi Kimi Knative Knative diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index d5ecc74b9e..336d8f6c20 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -71,12 +71,12 @@ data: header_name: Authorization header_value: Bearer ${key} model: - provider: deepseek - name: deepseek-chat + provider: kimi + name: kimi-k2.6 variables: key: - value: "$DEEPSEEK_API_KEY" + value: "$MOONSHOT_API_KEY" {% endentity_example %} {:.success} From 7648eebc93f746ed1cd72d197034fb3dffe57fdf Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 23 Apr 2026 15:58:50 +0100 Subject: [PATCH 05/42] kimi provider not just kimi via openai provider --- app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md | 2 +- app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index a58e5d4491..828e673364 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -79,7 +79,7 @@ entities: header_name: Authorization header_value: Bearer ${api_key} model: - provider: openai + provider: kimi name: kimi-k2.6 options: upstream_url: https://api.moonshot.ai/v1/chat/completions diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index c139e7eb29..959a4a6b22 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -78,8 +78,7 @@ entities: header_name: Authorization header_value: Bearer ${api_key} model: - provider: openai - name: kimi-k2.6 + provider: kimi options: upstream_url: https://api.moonshot.ai/v1/chat/completions max_tokens: 512 From 8dc0e0dfa749c44e70c46f54aedbac0d880b4c65 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 27 Apr 2026 10:28:24 +0100 Subject: [PATCH 06/42] Update app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md Co-authored-by: tomek-labuk --- app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md index 959a4a6b22..78035270c1 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md @@ -64,7 +64,7 @@ cleanup: ## Configure the plugin -To set up AI Proxy with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. +To set up AI Proxy with Kimi, use the `kimi` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. In this example, we'll use the `kimi-k2.6` model: From ab88b10903b6addc1d6effeab881fdeb74a7f2bd Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 28 Apr 2026 13:52:48 +0100 Subject: [PATCH 07/42] add kimi to landing pages list --- app/_landing_pages/ai-gateway/ai-providers.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index 37efd703a4..87b3514db2 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -146,6 +146,13 @@ rows: icon: /assets/icons/deepseek.svg cta: url: /ai-gateway/ai-providers/deepseek/ + - blocks: + - type: icon_card + config: + title: Kimi + icon: /assets/icons/kimi.svg + cta: + url: /ai-gateway/ai-providers/kimi/ - blocks: - type: icon_card config: From cde8c25caff7755734d25c856ba1657e13118715 Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 28 Apr 2026 14:43:25 +0100 Subject: [PATCH 08/42] kimi icon --- app/_assets/icons/products/kimi.svg | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 app/_assets/icons/products/kimi.svg diff --git a/app/_assets/icons/products/kimi.svg b/app/_assets/icons/products/kimi.svg new file mode 100644 index 0000000000..949cf16d79 --- /dev/null +++ b/app/_assets/icons/products/kimi.svg @@ -0,0 +1,4 @@ + + + + From 849d2c2797f04ec9c0020dbeb0255a61d2ecf3f4 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 11:27:49 +0100 Subject: [PATCH 09/42] add tag support --- .github/styles/base/Dictionary.txt | 2 ++ app/_data/schemas/frontmatter/tags.json | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index e091914bfd..bf43db2838 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -1029,6 +1029,8 @@ validators Valkey vararg vc +vercel +Vercel viewport viewports vLLM diff --git a/app/_data/schemas/frontmatter/tags.json b/app/_data/schemas/frontmatter/tags.json index 571c3211d8..02ce09b3b2 100644 --- a/app/_data/schemas/frontmatter/tags.json +++ b/app/_data/schemas/frontmatter/tags.json @@ -214,6 +214,7 @@ "upgrade", "validation", "vault", + "vercel", "versioning", "vertex-ai", "vllm", From db00cd6116fce50a6cd41138f2cb5a2a0deba8cd Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 12:03:21 +0100 Subject: [PATCH 10/42] initial skeleton --- app/_includes/prereqs/vercel.md | 13 +++++++++++++ app/_landing_pages/ai-gateway/ai-providers.yaml | 7 +++++++ jekyll.yml | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 app/_includes/prereqs/vercel.md diff --git a/app/_includes/prereqs/vercel.md b/app/_includes/prereqs/vercel.md new file mode 100644 index 0000000000..9fc5d5fa92 --- /dev/null +++ b/app/_includes/prereqs/vercel.md @@ -0,0 +1,13 @@ +This tutorial requires a {{ site.vercel}} API key. + +1. Create a [{{ site.vercel}}](https://vercel.com/) account. +1. Click **AI Gateway** +1. Click **API keys**. +1. Click **Create API key**. +1. In the **Name** field, enter `Kong`. +1. Click **Create API key**. +1. Click **Copy**. +1. Export the key to your environment: + ```sh + export DECK_VERCEL_API_KEY='YOUR VERCEL API KEY' + ``` \ No newline at end of file diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index 37efd703a4..78cebfc661 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -69,6 +69,13 @@ rows: icon: /assets/icons/Vertex.svg cta: url: /ai-gateway/ai-providers/vertex/ + - blocks: + - type: icon_card + config: + title: Vercel + icon: /assets/icons/Vertex.svg + cta: + url: /ai-gateway/ai-providers/vercel/ - blocks: - type: icon_card config: diff --git a/jekyll.yml b/jekyll.yml index eb8bc7c287..194e8cf39a 100644 --- a/jekyll.yml +++ b/jekyll.yml @@ -130,6 +130,8 @@ konnect_catalog: Catalog metering_and_billing: Metering & Billing observability: Observability dev_portal: Dev Portal +# 3rd party product names +vercel: Vercel repos: developer: https://github.com/Kong/developer.konghq.com From 8658f1520cc27bf84c51ef236a4c99eea9e5382e Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 14:56:23 +0100 Subject: [PATCH 11/42] add how-tos --- .../set-up-ai-proxy-advanced-with-vercel.md | 95 +++++++++++++++++++ .../ai-gateway/set-up-ai-proxy-with-vercel.md | 94 ++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md create mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md new file mode 100644 index 0000000000..5f22a04b22 --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md @@ -0,0 +1,95 @@ +--- +title: Set up AI Proxy Advanced with Vercel in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-advanced-with-vercel/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy Advanced + url: /plugins/ai-proxy-advanced/ + +description: Configure the AI Proxy Advanced plugin to create a chat route using Vercel. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy-advanced + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - vercel + +tldr: + q: How do I use the AI Proxy Advanced plugin with Vercel? + a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a Vercel model, and your Vercel API key. + +tools: + - deck + +prereqs: + inline: + - title: Vercel + include_content: prereqs/vercel + icon_url: /assets/icons/vercel.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy Advanced with Vercel, use the `openai` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `anthropic/claude-opus-4.6` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy-advanced + config: + targets: + - route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: vercel + name: anthropic/claude-opus-4.6 + options: + upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $VERCEL_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} \ No newline at end of file diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md new file mode 100644 index 0000000000..fd904b50ed --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md @@ -0,0 +1,94 @@ +--- +title: Set up AI Proxy with Vercel in {{site.base_gateway}} +permalink: /how-to/set-up-ai-proxy-with-vercel/ +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy + url: /plugins/ai-proxy/ + +description: Configure the AI Proxy plugin to create a chat route using Vercel. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - vercel + +tldr: + q: How do I use the AI Proxy plugin with Vercel? + a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a Vercel model, and your Vercel API key. + +tools: + - deck + +prereqs: + inline: + - title: Vercel + include_content: prereqs/vercel + icon_url: /assets/icons/vercel.svg + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy with Vercel, use the `vercel` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. + +In this example, we'll use the `anthropic/claude-opus-4.6` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: vercel + name: anthropic/claude-opus-4.6 + options: + upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $VERCEL_API_KEY +{% endentity_examples %} + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} \ No newline at end of file From d221c64736630d67b2c048a02e3c66f0644e3dd3 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 15:06:30 +0100 Subject: [PATCH 12/42] add main provider page --- app/ai-gateway/ai-providers/vercel.md | 87 +++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 app/ai-gateway/ai-providers/vercel.md diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md new file mode 100644 index 0000000000..f5b9b4c989 --- /dev/null +++ b/app/ai-gateway/ai-providers/vercel.md @@ -0,0 +1,87 @@ +--- +title: "Vercel provider" +layout: reference +content_type: reference +description: Reference for supported capabilities for Vercel provider +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/ai-providers/ + +permalink: /ai-gateway/ai-providers/vercel/ + +works_on: + - on-prem + - konnect + +products: + - gateway + - ai-gateway + +tools: + - admin-api + - konnect-api + - deck + - kic + - terraform + +tags: + - ai + +plugins: + - ai-proxy-advanced + - ai-proxy + +min_version: + gateway: '2.0.0' + +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} plugins" + url: /plugins/?category=ai + - text: AI Providers + url: /ai-gateway/ai-providers/ + +how_to_list: + config: + products: + - ai-gateway + tags: + - vercel + description: true + view_more: false +--- + + +{% include plugins/ai-proxy/providers/providers.md providers=site.data.plugins.ai-proxy provider_name="Vercel" %} + +## Configure {{ provider.name }} with AI Proxy + +To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. + +Here's a minimal configuration for chat completions: + +{% entity_example %} +type: plugin +data: + name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${key} + model: + provider: vercel + name: vercel-k2.6 + +variables: + key: + value: "$MOONSHOT_API_KEY" +{% endentity_example %} + +{:.success} +> For more configuration options and examples, see: +> - [AI Proxy examples](/plugins/ai-proxy/examples/) +> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) + +{% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file From f18f21c53ad541a6f827bec39079ab3f2abde456 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 15:33:49 +0100 Subject: [PATCH 13/42] add vercel as reseller note --- app/ai-gateway/ai-providers/vercel.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index f5b9b4c989..0b2ec2df4b 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -59,6 +59,8 @@ how_to_list: To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. +Note that, {{ site.vercel }} hosts [models](https://vercel.com/ai-gateway/models) from other providers so in this example we use `anthropic/claude-opus-4.6`. + Here's a minimal configuration for chat completions: {% entity_example %} From bff9804a16203852d252bc75bf3d7212f79307a5 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 8 May 2026 17:00:24 +0100 Subject: [PATCH 14/42] correct version numbers of deps --- .../ai-gateway/set-up-ai-proxy-advanced-with-vercel.md | 4 ++-- app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md index 5f22a04b22..3247688506 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md @@ -19,7 +19,7 @@ works_on: - konnect min_version: - gateway: '3.14' + gateway: '2.0.0' plugins: - ai-proxy-advanced @@ -45,7 +45,7 @@ prereqs: inline: - title: Vercel include_content: prereqs/vercel - icon_url: /assets/icons/vercel.svg + icon_url: /assets/icons/gateway.svg entities: services: - example-service diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md index fd904b50ed..57ab819bf8 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md @@ -19,7 +19,7 @@ works_on: - konnect min_version: - gateway: '3.14' + gateway: '2.0.0' plugins: - ai-proxy @@ -45,7 +45,7 @@ prereqs: inline: - title: Vercel include_content: prereqs/vercel - icon_url: /assets/icons/vercel.svg + icon_url: /assets/icons/gateway.svg entities: services: - example-service From 2c6e45c887ffffe0b833fd506e79c1b07b6d233e Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 11 May 2026 14:31:36 +0100 Subject: [PATCH 15/42] liquid fix for vale --- app/_includes/prereqs/vercel.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_includes/prereqs/vercel.md b/app/_includes/prereqs/vercel.md index 9fc5d5fa92..2cc5c53384 100644 --- a/app/_includes/prereqs/vercel.md +++ b/app/_includes/prereqs/vercel.md @@ -1,6 +1,6 @@ This tutorial requires a {{ site.vercel}} API key. -1. Create a [{{ site.vercel}}](https://vercel.com/) account. +1. Create a [{{ site.vercel }}](https://vercel.com/) account. 1. Click **AI Gateway** 1. Click **API keys**. 1. Click **Create API key**. From efc7a1e24cfc15a9cc0de837fda83a25b4fd03ff Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 12 May 2026 11:51:07 +0100 Subject: [PATCH 16/42] add vercel icon svg --- .../ai-gateway/set-up-ai-proxy-advanced-with-vercel.md | 2 +- app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md | 2 +- app/assets/icons/vercel.svg | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 app/assets/icons/vercel.svg diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md index 3247688506..1e89c8d988 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md @@ -45,7 +45,7 @@ prereqs: inline: - title: Vercel include_content: prereqs/vercel - icon_url: /assets/icons/gateway.svg + icon_url: /assets/icons/vercel.svg entities: services: - example-service diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md index 57ab819bf8..4dabd49b86 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md @@ -45,7 +45,7 @@ prereqs: inline: - title: Vercel include_content: prereqs/vercel - icon_url: /assets/icons/gateway.svg + icon_url: /assets/icons/vercel.svg entities: services: - example-service diff --git a/app/assets/icons/vercel.svg b/app/assets/icons/vercel.svg new file mode 100644 index 0000000000..72948d01a7 --- /dev/null +++ b/app/assets/icons/vercel.svg @@ -0,0 +1,3 @@ + + + From 116ced5388b6ceb7b9cb2e48258c894e389e1d5e Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 15 May 2026 16:52:25 +0100 Subject: [PATCH 17/42] add yaml files for provider --- app/_data/plugins/ai-proxy.yaml | 22 ++++++++++++ .../set-up-ai-proxy-advanced-with-vercel.md | 6 ++-- .../ai-proxy/examples/vercel-chat-route.yaml | 34 +++++++++++++++++++ app/ai-gateway/ai-providers/vercel.md | 2 +- 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 app/_kong_plugins/ai-proxy/examples/vercel-chat-route.yaml diff --git a/app/_data/plugins/ai-proxy.yaml b/app/_data/plugins/ai-proxy.yaml index 54082456a4..6ac56b6105 100644 --- a/app/_data/plugins/ai-proxy.yaml +++ b/app/_data/plugins/ai-proxy.yaml @@ -919,6 +919,28 @@ providers: provider_specific: [] statistics_logging: [] + - name: 'Vercel' + url_patterns: + - 'https://ai-gateway.vercel.sh' + min_version: '3.14' + chat: + supported: true + streaming: true + upstream_path: '`/v1/chat/completions`' + route_type: 'llm/v1/chat' + model_example: 'openai/gpt-5.5' + min_version: '3.14' + embeddings: + supported: false + image: + generations: + supported: false + edits: + supported: false + limitations: + provider_specific: [] + statistics_logging: [] + parameters: provider: 'config.model.provider' route_type: 'config.route_type' diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md index 1e89c8d988..fce6ff372c 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md @@ -64,9 +64,9 @@ cleanup: ## Configure the plugin -To set up AI Proxy Advanced with Vercel, use the `openai` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. +To set up AI Proxy Advanced with Vercel, use the `vercel` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. -In this example, we'll use the `anthropic/claude-opus-4.6` model: +In this example, we'll use the `openai/gpt-5.5` model: {% entity_examples %} entities: @@ -80,7 +80,7 @@ entities: header_value: Bearer ${api_key} model: provider: vercel - name: anthropic/claude-opus-4.6 + name: openai/gpt-5.5 options: upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions max_tokens: 512 diff --git a/app/_kong_plugins/ai-proxy/examples/vercel-chat-route.yaml b/app/_kong_plugins/ai-proxy/examples/vercel-chat-route.yaml new file mode 100644 index 0000000000..e72b4cbe63 --- /dev/null +++ b/app/_kong_plugins/ai-proxy/examples/vercel-chat-route.yaml @@ -0,0 +1,34 @@ + +title: 'Chat route with Vercel' +description: 'Configure a chat route using the Vercel AI Gateway.' + +weight: 900 +min_version: + gateway: '3.14' +requirements: +- Vercel account + +config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${key} + model: + provider: vercel + name: openai/gpt-5.5 + options: + upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions + max_tokens: 512 + temperature: 1.0 + +variables: + key: + value: $VERCEL_API_KEY + description: The API key to use to connect to Vercel. + +tools: + - deck + - admin-api + - konnect-api + - kic + - terraform diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index 0b2ec2df4b..02652d4e11 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -74,7 +74,7 @@ data: header_value: Bearer ${key} model: provider: vercel - name: vercel-k2.6 + name: openai/gpt-5.5 variables: key: From e03383e3e8748a1768f2214fabf0e1782e88f7c1 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 18 May 2026 14:51:39 +0100 Subject: [PATCH 18/42] use correct svg for vercel in ai-providers yaml Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- app/_landing_pages/ai-gateway/ai-providers.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index 78cebfc661..06e65f2ad1 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -66,14 +66,14 @@ rows: - type: icon_card config: title: Vertex AI - icon: /assets/icons/Vertex.svg + icon: /assets/icons/vertex.svg cta: url: /ai-gateway/ai-providers/vertex/ - blocks: - type: icon_card config: title: Vercel - icon: /assets/icons/Vertex.svg + icon: /assets/icons/vercel.svg cta: url: /ai-gateway/ai-providers/vercel/ - blocks: From dcac1d69b3b7d45e22d902c85f86c85ac71507c2 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 18 May 2026 14:55:27 +0100 Subject: [PATCH 19/42] add missing yaml entry and consistent version reqs --- app/_data/plugins/ai-proxy.yaml | 22 +++++++++++++++++++ .../set-up-ai-proxy-advanced-with-kimi.md | 2 +- app/ai-gateway/ai-providers/kimi.md | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/app/_data/plugins/ai-proxy.yaml b/app/_data/plugins/ai-proxy.yaml index 54082456a4..a5c9a3a88c 100644 --- a/app/_data/plugins/ai-proxy.yaml +++ b/app/_data/plugins/ai-proxy.yaml @@ -919,6 +919,28 @@ providers: provider_specific: [] statistics_logging: [] + - name: 'Kimi' + url_patterns: + - 'https://api.moonshot.ai' + min_version: '2.0.0' + chat: + supported: true + streaming: true + upstream_path: '`/v1/chat/completions`' + route_type: 'llm/v1/chat' + model_example: 'kimi-k2.6' + min_version: '2.0.0' + embeddings: + supported: false + image: + generations: + supported: false + edits: + supported: false + limitations: + provider_specific: [] + statistics_logging: [] + parameters: provider: 'config.model.provider' route_type: 'config.route_type' diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md index 828e673364..5b36fb4e4d 100644 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md @@ -19,7 +19,7 @@ works_on: - konnect min_version: - gateway: '3.14' + gateway: '2.0.0' plugins: - ai-proxy-advanced diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 336d8f6c20..f6146e70c3 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -32,7 +32,7 @@ plugins: - ai-proxy min_version: - gateway: '3.14' + gateway: '2.0.0' related_resources: - text: "{{site.ai_gateway}}" From 933344cddd20b6b7cd1598c49bdb5c48a2bd3df9 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 18 May 2026 14:58:21 +0100 Subject: [PATCH 20/42] copilot fixes --- app/_data/plugins/ai-proxy.yaml | 4 ++-- app/ai-gateway/ai-providers/vercel.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/_data/plugins/ai-proxy.yaml b/app/_data/plugins/ai-proxy.yaml index 6ac56b6105..cf4a220c80 100644 --- a/app/_data/plugins/ai-proxy.yaml +++ b/app/_data/plugins/ai-proxy.yaml @@ -922,14 +922,14 @@ providers: - name: 'Vercel' url_patterns: - 'https://ai-gateway.vercel.sh' - min_version: '3.14' + min_version: '2.0.0' chat: supported: true streaming: true upstream_path: '`/v1/chat/completions`' route_type: 'llm/v1/chat' model_example: 'openai/gpt-5.5' - min_version: '3.14' + min_version: '2.0.0' embeddings: supported: false image: diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index 02652d4e11..f0bcd6b458 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -59,7 +59,7 @@ how_to_list: To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. -Note that, {{ site.vercel }} hosts [models](https://vercel.com/ai-gateway/models) from other providers so in this example we use `anthropic/claude-opus-4.6`. +Note that, {{ site.vercel }} hosts [models](https://vercel.com/ai-gateway/models) from other providers so in this example we use `openai/gpt-5.5`. Here's a minimal configuration for chat completions: @@ -78,7 +78,7 @@ data: variables: key: - value: "$MOONSHOT_API_KEY" + value: "$VERCEL_API_KEY" {% endentity_example %} {:.success} From a23dedd3eb7a8665f7337a4a46c12a26da1985e7 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 18 May 2026 16:48:11 +0100 Subject: [PATCH 21/42] vale fix --- app/_includes/prereqs/vercel.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_includes/prereqs/vercel.md b/app/_includes/prereqs/vercel.md index 2cc5c53384..69b728f34d 100644 --- a/app/_includes/prereqs/vercel.md +++ b/app/_includes/prereqs/vercel.md @@ -1,7 +1,7 @@ This tutorial requires a {{ site.vercel}} API key. 1. Create a [{{ site.vercel }}](https://vercel.com/) account. -1. Click **AI Gateway** +1. Click **{{ site.ai_gateway }}** 1. Click **API keys**. 1. Click **Create API key**. 1. In the **Name** field, enter `Kong`. From bc308c99830976f1c50a8775655599bf4a046b67 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 29 May 2026 14:51:34 +0100 Subject: [PATCH 22/42] remove new provider how-tos --- .../set-up-ai-proxy-advanced-with-kimi.md | 95 ------------------- .../set-up-ai-proxy-advanced-with-vercel.md | 95 ------------------- .../ai-gateway/set-up-ai-proxy-with-kimi.md | 93 ------------------ .../ai-gateway/set-up-ai-proxy-with-vercel.md | 94 ------------------ app/ai-gateway/ai-providers/kimi.md | 7 -- app/ai-gateway/ai-providers/vercel.md | 7 -- 6 files changed, 391 deletions(-) delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md deleted file mode 100644 index 5b36fb4e4d..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-kimi.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Kimi in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-kimi/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Kimi. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '2.0.0' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - kimi - -tldr: - q: How do I use the AI Proxy Advanced plugin with Kimi? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. - -tools: - - deck - -prereqs: - inline: - - title: Kimi - include_content: prereqs/kimi - icon_url: /assets/icons/kimi.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with Kimi, use the `openai` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `kimi-k2.6` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: kimi - name: kimi-k2.6 - options: - upstream_url: https://api.moonshot.ai/v1/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $MOONSHOT_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md deleted file mode 100644 index fce6ff372c..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vercel.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Vercel in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-vercel/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Vercel. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '2.0.0' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - vercel - -tldr: - q: How do I use the AI Proxy Advanced plugin with Vercel? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a Vercel model, and your Vercel API key. - -tools: - - deck - -prereqs: - inline: - - title: Vercel - include_content: prereqs/vercel - icon_url: /assets/icons/vercel.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with Vercel, use the `vercel` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `openai/gpt-5.5` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: vercel - name: openai/gpt-5.5 - options: - upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $VERCEL_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} \ No newline at end of file diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md deleted file mode 100644 index 78035270c1..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-kimi.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Set up AI Proxy with Kimi in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-kimi/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Kimi. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - kimi - -tldr: - q: How do I use the AI Proxy plugin with Kimi? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a Kimi model, and your Kimi API key. - -tools: - - deck - -prereqs: - inline: - - title: Kimi - include_content: prereqs/kimi - icon_url: /assets/icons/kimi.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with Kimi, use the `kimi` provider, specify the [model](https://platform.kimi.ai/docs/models) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `kimi-k2.6` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: kimi - options: - upstream_url: https://api.moonshot.ai/v1/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $MOONSHOT_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md deleted file mode 100644 index 4dabd49b86..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vercel.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Set up AI Proxy with Vercel in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-vercel/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Vercel. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '2.0.0' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - vercel - -tldr: - q: How do I use the AI Proxy plugin with Vercel? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a Vercel model, and your Vercel API key. - -tools: - - deck - -prereqs: - inline: - - title: Vercel - include_content: prereqs/vercel - icon_url: /assets/icons/vercel.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with Vercel, use the `vercel` provider, specify the [model](https://vercel.com/ai-gateway/models) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `anthropic/claude-opus-4.6` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: vercel - name: anthropic/claude-opus-4.6 - options: - upstream_url: https://ai-gateway.vercel.sh/v1/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $VERCEL_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index f6146e70c3..0130533d70 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -78,10 +78,3 @@ variables: key: value: "$MOONSHOT_API_KEY" {% endentity_example %} - -{:.success} -> For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) - -{% include plugins/ai-proxy/providers/how-tos.md %} diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index f0bcd6b458..fb5afc1071 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -80,10 +80,3 @@ variables: key: value: "$VERCEL_API_KEY" {% endentity_example %} - -{:.success} -> For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) - -{% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file From 62e498f8abee82b6265595f6a86b678f0f4a0ad0 Mon Sep 17 00:00:00 2001 From: jbaross Date: Fri, 29 May 2026 16:17:39 +0100 Subject: [PATCH 23/42] links to entities --- app/_landing_pages/ai-gateway/ai-providers.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index b485256517..f4830f7df1 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -23,7 +23,7 @@ rows: blocks: - type: text text: | - The core of [{{site.ai_gateway}}](/ai-gateway/) is the ability to route AI requests to various providers exposed via a provider-agnostic API. This normalized API layer affords developers and organizations multiple benefits: + The core of [{{site.ai_gateway}}](/ai-gateway/) is the ability to serve AI [models](/ai-gateway/entities/model/) from various [providers](/ai-gateway/entities/provider/) via a provider-agnostic API. This normalized API layer affords developers and organizations multiple benefits: - type: unordered_list items: @@ -31,7 +31,7 @@ rows: - Centralized AI provider credential management - The {{site.ai_gateway}} gives developers and organizations a central point of governance and observability over AI data and usage - Request routing can be dynamic, allowing AI usage to be optimized based on various metrics - - AI services can be used by other {{site.base_gateway}} plugins to augment non-AI API traffic + - AI services can be used by other Kong tools to augment non-AI API traffic - column_count: 3 columns: - blocks: @@ -175,7 +175,7 @@ rows: - type: text text: | {:.info} - > Note that some providers may not be available depending on your {{site.base_gateway}} version, and some providers don't support all route types. + > Note that some providers may not be available or require different configuration steps depending on your {{site.base_gateway}} version, and some providers don't support all route types. > See the specific provider documentation for more details. - header: From d6c763119db9a113d0efafedaa9513ab72310287 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Mon, 1 Jun 2026 11:39:16 +0200 Subject: [PATCH 24/42] feat(ai-gateway): AI Gateway 2.0 entities (#5263) --- .github/styles/base/Dictionary.txt | 1 + api-specs/konnect/ai-gateway/v2/openapi.yaml | 19 + app/_ai_gateway_entities/ai-agent.md | 309 ++++++++++ .../ai-consumer-credential.md | 130 +++++ app/_ai_gateway_entities/ai-consumer-group.md | 135 +++++ app/_ai_gateway_entities/ai-consumer.md | 140 +++++ .../ai-data-plane-certificate.md | 124 ++++ .../ai-data-plane-node.md | 95 +++ app/_ai_gateway_entities/ai-gateway.md | 127 ++++ app/_ai_gateway_entities/ai-mcp-server.md | 544 ++++++++++++++++++ app/_ai_gateway_entities/ai-model.md | 419 ++++++++++++++ app/_ai_gateway_entities/ai-policy.md | 139 +++++ app/_ai_gateway_entities/ai-provider.md | 153 +++++ app/_ai_gateway_entities/ai-vault.md | 106 ++++ app/_api/konnect/ai-gateway/_index.md | 3 + app/_assets/javascripts/apps/EntitySchema.vue | 9 +- app/_data/entity_examples/config.yml | 44 +- app/_data/konnect_oas_data.json | 21 + app/_data/products/ai-gateway.yml | 8 +- .../entity_example/format/admin-api.md | 12 +- .../components/entity_example/format/deck.md | 4 +- .../entity_example/format/konnect-api.md | 12 +- .../components/entity_example/format/ui_ai.md | 83 +++ app/_landing_pages/ai-gateway/entities.yaml | 109 ++++ .../entity_example/presenters/admin-api.rb | 32 +- .../entity_example/presenters/konnect-api.rb | 31 +- .../drops/entity_example/presenters/ui.rb | 6 +- app/_plugins/drops/entity_schema.rb | 10 +- jekyll.yml | 12 + vite.config.ts | 6 +- 30 files changed, 2808 insertions(+), 35 deletions(-) create mode 100644 api-specs/konnect/ai-gateway/v2/openapi.yaml create mode 100644 app/_ai_gateway_entities/ai-agent.md create mode 100644 app/_ai_gateway_entities/ai-consumer-credential.md create mode 100644 app/_ai_gateway_entities/ai-consumer-group.md create mode 100644 app/_ai_gateway_entities/ai-consumer.md create mode 100644 app/_ai_gateway_entities/ai-data-plane-certificate.md create mode 100644 app/_ai_gateway_entities/ai-data-plane-node.md create mode 100644 app/_ai_gateway_entities/ai-gateway.md create mode 100644 app/_ai_gateway_entities/ai-mcp-server.md create mode 100644 app/_ai_gateway_entities/ai-model.md create mode 100644 app/_ai_gateway_entities/ai-policy.md create mode 100644 app/_ai_gateway_entities/ai-provider.md create mode 100644 app/_ai_gateway_entities/ai-vault.md create mode 100644 app/_api/konnect/ai-gateway/_index.md create mode 100644 app/_includes/components/entity_example/format/ui_ai.md create mode 100644 app/_landing_pages/ai-gateway/entities.yaml diff --git a/.github/styles/base/Dictionary.txt b/.github/styles/base/Dictionary.txt index 4656efd372..9061bb1108 100644 --- a/.github/styles/base/Dictionary.txt +++ b/.github/styles/base/Dictionary.txt @@ -13,6 +13,7 @@ ai_rate_limiting_policy agentic Agno Agno's +AIGateway Alertmanager Alibaba allow_terminated diff --git a/api-specs/konnect/ai-gateway/v2/openapi.yaml b/api-specs/konnect/ai-gateway/v2/openapi.yaml new file mode 100644 index 0000000000..170981dc02 --- /dev/null +++ b/api-specs/konnect/ai-gateway/v2/openapi.yaml @@ -0,0 +1,19 @@ +openapi: 3.0.0 +info: + title: Konnect AI Gateway + version: 0.0.0 + description: Internal API for managing Kong AI Gateway policies. + contact: + name: Kong + url: 'https://cloud.konghq.com' +servers: + - url: 'https://us.api.konghq.com/v1' + description: US Region Base URL + - url: 'https://eu.api.konghq.com/v1' + description: EU Region Base URL + - url: 'https://au.api.konghq.com/v1' + description: AU Region Base URL + - url: 'https://me.api.konghq.com/v1' + description: Middle-East Production region + - url: 'https://in.api.konghq.com/v1' + description: India Production region diff --git a/app/_ai_gateway_entities/ai-agent.md b/app/_ai_gateway_entities/ai-agent.md new file mode 100644 index 0000000000..9ffd7b9cb8 --- /dev/null +++ b/app/_ai_gateway_entities/ai-agent.md @@ -0,0 +1,309 @@ +--- +title: AI Agents +content_type: reference +entities: + - ai-agent +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-agent/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Agent entity used by {{site.ai_gateway}} for A2A and HTTP agent configurations. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayAgent +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: A2A protocol specification + url: https://a2aproject.github.io/A2A/ +faqs: + - q: What's the difference between an `a2a` Agent and an `http` Agent? + a: | + An `a2a` Agent applies Agent-to-Agent protocol awareness (JSON-RPC and REST binding detection, + agent-card URL rewriting, structured A2A telemetry) to traffic flowing to an upstream agent. + An `http` Agent is a generic HTTP route to an upstream agent without A2A-specific processing. + Use `a2a` when the upstream speaks the A2A protocol and you want observability tied to A2A + task and message semantics. + + - q: Does the Agent entity modify request routing or aggregate responses? + a: | + No. The runtime behind an Agent operates as a transparent proxy. It detects A2A requests, + records telemetry, and rewrites agent-card URLs to the gateway address. It does not change + routing decisions, merge responses, or hold task state on behalf of clients. + + - q: Why is the agent-card URL rewritten? + a: | + A2A clients use agent-card responses (at `/.well-known/agent-card.json`) to discover where to + send subsequent requests. Rewriting the `url` field, and any `additionalInterfaces[].url` + fields, to the {{site.ai_gateway}} address means clients route follow-up traffic through the + gateway instead of bypassing it. The rewrite honors `X-Forwarded-*` headers when the gateway + sits behind a load balancer. + + - q: How does streaming work? + a: | + Server-sent events (`Content-Type: text/event-stream`) pass through chunk-by-chunk without + buffering. The runtime counts SSE events, captures time-to-first-byte, and extracts task state + from the final event for analytics. Latency is preserved. + + - q: How do I limit which consumers can reach an Agent? + a: | + Set the `acls` field on the Agent with allow or deny lists. Each entry is a string that + references a Consumer, Consumer Group, or Authenticated Group by name. + + - q: Can the same plugin run on an Agent that I'd attach to a route or service? + a: | + Plugin configuration that applies to the Agent goes through the [Policy entity](/ai-gateway/entities/ai-policy/). + Attach Policies to the Agent through its `policies` field. + + - q: How do I configure agents in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure agent proxying using {{site.base_gateway}} plugins directly (for example, the AI A2A Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. +--- + +## What is an Agent? + +An Agent is a first-class {{site.ai_gateway}} entity that represents an upstream agent endpoint exposed through {{site.ai_gateway}}. An Agent has a type, either `a2a` for [Agent-to-Agent protocol](https://a2aproject.github.io/A2A/) traffic or `http` for generic HTTP agent routing, and a configuration that points {{site.ai_gateway}} at the upstream and shapes how requests flow. + +For `http` type Agents, requests are proxied without A2A-specific processing. For `a2a` type Agents, {{site.ai_gateway}} adds protocol-aware behavior on top of plain proxying: it detects A2A requests across both JSON-RPC and REST bindings, rewrites agent-card URLs so clients discover the gateway as the canonical endpoint, and emits structured A2A telemetry to {{site.konnect_short_name}} analytics and OpenTelemetry. + +Agents can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/agents +{% endtable %} + +## How A2A traffic flows + +When an Agent has type `a2a`, proxied traffic is processed in four phases: + +1. **Access**. Detects whether the request is an A2A operation (JSON-RPC or REST binding). When statistics logging is enabled, this starts an OpenTelemetry span and records the request body for payload logging if that's also enabled. +1. **Header filter**. Detects streaming responses (`Content-Type: text/event-stream`) and records time to first byte. Buffers agent-card responses for URL rewriting. +1. **Body filter**. Streams SSE chunks through to the client without buffering. Buffers non-streaming responses to extract task metadata. Rewrites agent-card URLs to the gateway address. Emits analytics at end of response. +1. **Log**. Finalizes the OpenTelemetry span with task state, task ID, and any error information. + +Non-A2A traffic, and traffic to `http` Agents, is proxied without these steps. + + +{% mermaid %} +sequenceDiagram + autonumber + participant Client as A2A Client + participant Gateway as {{site.ai_gateway}}
(Agent) + participant Agent as Upstream A2A Agent + + Client->>Gateway: A2A request (JSON-RPC or REST) + Note over Gateway: Detect A2A binding and method
Start OTel span (if logging enabled) + + Gateway->>Agent: Proxied request
(Accept-Encoding removed if logging enabled) + + alt Streaming response (SSE) + Agent-->>Gateway: text/event-stream chunks + Note over Gateway: Pass through each chunk
Count SSE events, track TTFB + Gateway-->>Client: SSE chunks (unchanged) + Note over Gateway: On final chunk:
Extract task state, set analytics + else Non-streaming response + Agent->>Gateway: JSON response + Note over Gateway: Buffer response
Extract task metadata + Gateway->>Client: Response (unchanged) + end + + Note over Gateway: Finish OTel span
Emit ai.a2a metrics to log plugins +{% endmermaid %} + + +## Core A2A protocol elements + +A2A defines the communication elements between agents. The runtime surfaces data tied to these elements in log output and OpenTelemetry spans for `a2a` Agents. + +{% table %} +columns: + - title: Element + key: element + - title: Description + key: description + - title: Purpose + key: purpose +rows: + - element: Agent Card + description: A JSON metadata document describing an agent's identity, capabilities, endpoint, skills, and authentication requirements. + purpose: Enables clients to discover agents and understand how to interact with them. + - element: Task + description: A stateful unit of work initiated by an agent, with a unique ID and defined lifecycle. + purpose: Tracks long-running operations and supports multi-turn interactions. + - element: Message + description: A single turn of communication between a client and an agent, containing content and a role (`user` or `agent`). + purpose: Conveys instructions, context, questions, answers, or status updates that are not formal artifacts. + - element: Part + description: The fundamental content container (for example, `TextPart`, `FilePart`, `DataPart`) used within messages and artifacts. + purpose: Provides flexibility for agents to exchange different content types within messages and artifacts. + - element: Artifact + description: A tangible output generated by an agent during a task (for example, a document, image, or structured data). + purpose: Carries the concrete output of a task in a structured, retrievable form. +{% endtable %} + +### Protocol detection + +A2A traffic is auto-detected per request and non-A2A traffic passes through without overhead. + +#### REST binding + +Detection anchors to the end of the request path, so any prefix added by the route is ignored. For example, both `/v1/message:send` and `/api/agents/v1/message:send` match `SendMessage`: + + +{% table %} +columns: + - title: HTTP method + key: method + - title: Path suffix + key: path + - title: A2A operation + key: operation + - title: Canonical method + key: canonical +rows: + - method: "`POST`" + path: "`/v1/message:send`" + operation: SendMessage + canonical: "`message/send`" + - method: "`POST`" + path: "`/v1/message:stream`" + operation: SendStreamingMessage + canonical: "`message/stream`" + - method: "`GET`" + path: "`/.well-known/agent-card.json`" + operation: GetAgentCard + canonical: "`agent/getCard`" + - method: "`GET`" + path: "`/v1/extendedAgentCard`" + operation: GetExtendedAgentCard + canonical: "`agent/getExtendedAgentCard`" + - method: "`GET`" + path: "`/v1/tasks/{id}`" + operation: GetTask + canonical: "`tasks/get`" + - method: "`GET`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" + - method: "`POST`" + path: "`/v1/tasks/{id}:cancel`" + operation: CancelTask + canonical: "`tasks/cancel`" + - method: "`POST`" + path: "`/v1/tasks/{id}:subscribe`" + operation: SubscribeToTask + canonical: "`tasks/resubscribe`" + - method: "`POST`" + path: "`/v1/tasks`" + operation: ListTasks + canonical: "`tasks/list`" +{% endtable %} + + +The canonical method name is what appears in OpenTelemetry span attributes and log output. + +#### JSON-RPC binding + +Detected by the `"jsonrpc"` field in the request body, combined with a recognized A2A method name or an `A2A-Version` request header. Recognized methods include `message/send`, `message/stream`, `tasks/get`, `tasks/list`, `tasks/cancel`, `tasks/resubscribe`, the `tasks/pushNotificationConfig/*` family, and `agent/getExtendedAgentCard`. + +A request carrying an `A2A-Version` header is treated as JSON-RPC even if the method isn't in the recognized list. When an unknown method is accepted this way, the `method` field in log output is recorded as `"unknown"` to bound metric cardinality. The OpenTelemetry span's `kong.a2a.operation` attribute still receives the actual method name. + +### Agent-card URL rewriting + +When an upstream agent returns an agent card, the runtime rewrites the `url` field, and any `additionalInterfaces[].url` fields, to the {{site.ai_gateway}} address. A2A clients then discover the gateway as the canonical endpoint instead of contacting the upstream directly. The rewrite uses `X-Forwarded-*` headers to construct the correct scheme, host, and port when the gateway is deployed behind a load balancer or reverse proxy. + +## Logging and observability + +When Statistics logging is enabled, {{site.ai_gateway}} records structured A2A telemetry per request and exposes it in {{site.konnect_short_name}} analytics, attached log plugins, and OpenTelemetry when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. For the canonical metric and attribute list, see [A2A metrics](/ai-gateway/ai-otel-metrics/#a2a-metrics). + +The runtime emits this data into the `ai.a2a` namespace consumed by {{site.konnect_short_name}} analytics and any attached logging plugins, and creates a `kong.a2a` child span when [{{site.base_gateway}} tracing](/gateway/tracing/) is configured. + +{:.info} +> When statistics logging is enabled, the runtime removes the `Accept-Encoding` request header +> before forwarding to the upstream. This prevents compressed responses that the runtime can't +> parse for metadata extraction. + +Payload logging additionally captures request and response bodies. Payloads are truncated at the configured payload size limit. + +{:.warning} +> Payload logging may expose sensitive data. Only enable it when you're prepared to handle +> request and response bodies in your logging pipeline. + +You can view A2A analytics in {{site.konnect_short_name}} Explorer and Dashboards through the [Agentic usage analytics](/observability/explorer/?tab=agentic-usage#metrics) view. + +### Log output fields + +{% include /plugins/ai-a2a-proxy/log-output-fields.md %} + +### OpenTelemetry span attributes + +When statistics logging is enabled and {{site.base_gateway}} tracing is configured, the runtime creates a `kong.a2a` child span with the following attributes: + +{% include /plugins/ai-a2a-proxy/otel-span-attributes.md %} + +### Task states + +Task state values surfaced in logs and spans are normalized to lowercase A2A spec format, regardless of the upstream SDK version: `submitted`, `working`, `input-required`, `completed`, `canceled`, `failed`, `rejected`, `auth-required`, `unknown`. + +## Access control + +The `acls` field controls which identities are allowed to reach the Agent. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced before traffic reaches the upstream agent. + +For per-request authentication and identity, attach an authentication Policy to the Agent. + +## Attach Policies + +Policies are how plugin configurations apply to an Agent. Attach them through the Agent's `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one Agent; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Set up an Agent + +The following example creates an `a2a` Agent that proxies traffic to an upstream A2A agent at `https://booking-agent.internal.kongair.com`, with statistics logging enabled and access restricted to the `internal-teams` Consumer Group. + +{% entity_example %} +type: agent +data: + display_name: KongAir Flight Booking Agent + name: kongair-flight-booking-agent + type: a2a + acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + url: https://booking-agent.internal.kongair.com + logging: + statistics: true + payloads: false + max_payload_size: 524288 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer-credential.md b/app/_ai_gateway_entities/ai-consumer-credential.md new file mode 100644 index 0000000000..a151e8f38a --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer-credential.md @@ -0,0 +1,130 @@ +--- +title: AI Consumer Credentials +content_type: reference +entities: + - ai-consumer-credential +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer-credential/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Credentials issued to AI Consumers for authenticating to {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerCredential +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ +faqs: + - q: Why are credentials a separate entity instead of a field on the Consumer? + a: | + Each credential has its own lifecycle, identifier, and (for API keys) TTL. Modeling them as + a sub-entity of the Consumer lets you list, rotate, and revoke individual credentials + independently of the Consumer record. + + - q: What credential types are supported? + a: | + Two types: `api-key` and `oauth`. The `type` of the Credential must match the Consumer's + `type`. An `api-key` credential carries the `api_key` value (and an optional `ttl`). An + `oauth` credential carries a `custom_id` that maps to the OAuth provider's identifier. + + - q: Can a Consumer have multiple credentials? + a: | + Yes. Issue one Credential per environment, client, or rotation cycle, and revoke individual + Credentials without affecting the others. + + - q: Is the API key value visible after creation? + a: | + No. The `api_key` field is write-only; subsequent reads return the Credential's metadata + (`name`, `display_name`, `ttl`, timestamps) but not the secret. Distribute the key value at + creation time, and rotate by issuing a new Credential and revoking the old one. + + - q: What's the relationship between `ttl` and the Consumer's lifecycle? + a: | + `ttl` controls how long the API key value remains valid in seconds. When it elapses, the + Credential stops authenticating but the Credential record (and the parent Consumer) remain. + Issue a new Credential to keep the Consumer authenticating. +--- + +## What is a Consumer Credential? + +A Consumer Credential is the {{site.ai_gateway}} entity that represents the secret material a [Consumer](/ai-gateway/entities/ai-consumer/) presents to authenticate to {{site.ai_gateway}}. + +Credentials are nested under their owning Consumer: each Credential belongs to exactly one Consumer, and removing the Consumer removes its Credentials. + +Consumer Credentials are managed through the {{site.ai_gateway}} entity API: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers/{consumerId}/credentials +{% endtable %} + +## Credential types + +The `type` field on a Credential must match the parent Consumer's `type`: + +* **`api-key`**: the Credential carries an `api_key` value the client presents on each request. An optional `ttl` (seconds) bounds the validity period; once it elapses, the value no longer authenticates. +* **`oauth`**: the Credential carries a `custom_id` that maps a Consumer to an OAuth identity issued by an external provider. {{site.ai_gateway}} works with any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The `custom_id` is typically the OIDC `sub` claim or the Client ID issued by the OAuth provider. The actual access token is issued and validated by the OAuth provider, not stored on the Credential. + +The `api_key` field is write-only and cannot be retrieved after creation. Treat creation responses as the only opportunity to capture the key value. + +## Lifecycle + +Each Credential has its own UUID and supports independent list, get, and delete operations through the nested endpoints under its parent Consumer. There is no `PUT` operation: rotation is an explicit "create new, delete old" flow, which avoids long-lived stale references. + +Deleting a Credential immediately stops it from authenticating. Deleting the parent Consumer removes all of its Credentials. + +## Set up an API key Credential + +The following example issues a 24-hour API key credential to an existing Consumer named `mobile-app-production`. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - Dev Key + name: mobile-app-dev-key + type: api-key + api_key: + ttl: 86400 +{% endentity_example %} + +{:.warning} +> Don't commit `api_key` values to source control. Inject them at creation time from a +> secret-management system, and treat any value checked into a configuration file as compromised. + +## Set up an OAuth Credential + +The following example issues an OAuth credential that maps an external OIDC client ID to a Consumer. + +{% entity_example %} +type: consumer-credential +data: + display_name: Mobile App - OIDC Mapping + name: mobile-app-oidc-mapping + type: oauth + custom_id: 0oatibf4t2PlDxqgR1d7 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer-group.md b/app/_ai_gateway_entities/ai-consumer-group.md new file mode 100644 index 0000000000..38ecc83a3b --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer-group.md @@ -0,0 +1,135 @@ +--- +title: AI Consumer Groups +content_type: reference +entities: + - ai-consumer-group +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer-group/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Consumer Groups for {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumerGroup +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.base_gateway}} Consumer Group entity" + url: /gateway/entities/consumer-group/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer Group different from a {{site.base_gateway}} Consumer Group? + a: | + The runtime entity is a regular Kong Consumer Group. The {{site.ai_gateway}} surface adds + the entity convention (`display_name`, `name`, `labels`) and a required `policies` array + for attaching plugin instances at the group scope. + + - q: Can I edit the underlying Kong Consumer Group that {{site.ai_gateway}} generates? + a: | + No. The generated Kong Consumer Group is protected from direct modification through the + standard `/consumer-groups` Admin API. Update the AI Consumer Group instead. + + - q: How do I assign a Consumer to a Consumer Group? + a: | + Set the `consumer_groups` array on the Consumer entity to reference this group by + `name` or `id`. Membership is managed from the Consumer side. + See the [Consumer entity](/ai-gateway/entities/ai-consumer/) reference. + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The Consumer's `consumer_groups` array accepts one or more references. + + - q: How do I attach Policies to a Consumer Group? + a: | + Add the Policy's `name` or `id` to the Consumer Group's `policies` array. + The plugin runs when a member of the group is identified during a request. + See the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + + - q: How do I gate access to a Model, Agent, or MCP Server with a Consumer Group? + a: | + Add the Consumer Group's name to the parent entity's `acls.allow` or `acls.deny` list. + ACLs accept Consumer, Consumer Group, and Authenticated Group names. + See the [Model entity](/ai-gateway/entities/ai-model/) reference. +--- + +## What is a Consumer Group? + +A Consumer Group is the {{site.ai_gateway}} entity that represents a collection of Consumers grouped for the purpose of applying shared Policies and access controls. + +Use Consumer Groups to scope group-wide behavior, such as rate limits, prompt guards, or content moderation, without configuring each Consumer individually. Consumer Groups can appear in the `acls` field of Model, Agent, and MCP Server entities, where they gate access to those parent entities. + +Consumer Groups can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumer-groups +{% endtable %} + +## Configure a Consumer Group + +When you create a Consumer Group, the configuration steps generally follow this order: + +1. Create the group with a display name, name, and optional description. +1. Optionally attach Policies for group-wide plugin execution (such as rate limits or content moderation). +1. Assign Consumers to the group through each Consumer's `consumer_groups` array. +1. Optionally use the Consumer Group in `acls` on Model, Agent, or MCP Server entities to control access. + +For a concrete example, see [Set up a Consumer Group](#set-up-a-consumer-group). + +## Membership + +A Consumer Group doesn't list its members directly. Membership is set on the Consumer entity through the Consumer's `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. A single Consumer can belong to multiple Consumer Groups. + +For the Consumer-side configuration, see the [Consumer entity](/ai-gateway/entities/ai-consumer/) reference. + +## Attach Policies + +Policies attached to a Consumer Group run when a member of that group is identified during a request. To attach a Policy, add its `name` or `id` to the Consumer Group's `policies` array. + +You can attach multiple Policies to a single Consumer Group. Each Policy is an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Use in parent entity ACLs + +The `acls` field on Model, Agent, and MCP Server entities accepts Consumer Group names alongside Consumer and Authenticated Group names. Add a Consumer Group to a parent entity's `acls.allow` list to permit its members access, or to `acls.deny` to block them. + +ACLs are evaluated at the Service level of the parent entity's derived primitives. Consumer Group membership is resolved after the request is authenticated and the Consumer is identified. + +## Set up a Consumer Group + +The following example creates an AI Consumer Group with one attached Policy that applies a shared rate limit to its members. + +{% entity_example %} +type: consumer_group +data: + display_name: Internal Teams + name: internal-teams + policies: + - rate-limit-internal-teams +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-consumer.md b/app/_ai_gateway_entities/ai-consumer.md new file mode 100644 index 0000000000..69a805b6be --- /dev/null +++ b/app/_ai_gateway_entities/ai-consumer.md @@ -0,0 +1,140 @@ +--- +title: AI Consumers +content_type: reference +entities: + - ai-consumer +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-consumer/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: "Consumers for {{site.ai_gateway}}." +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayConsumer +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Consumer Credential entity + url: /ai-gateway/entities/ai-consumer-credential/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.base_gateway}} Consumer entity" + url: /gateway/entities/consumer/ +faqs: + - q: How is an {{site.ai_gateway}} Consumer different from a {{site.base_gateway}} Consumer? + a: | + The runtime entity is a regular Kong Consumer. The {{site.ai_gateway}} surface uses the + {{site.ai_gateway}} entity convention (`display_name`, `name`, `labels`), requires an + authentication `type` field, accepts inline Consumer Group assignment, and lets you + reference Policies. Credentials are managed as a separate sub-entity rather than embedded + on the Consumer. + + - q: How do I add credentials to an AI Consumer? + a: | + Credentials are a separate sub-entity, not a field on the Consumer. Create them under the + Consumer's nested credentials endpoint. See the + [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/) reference. + + - q: "What's the difference between `type: api-key` and `type: oauth`?" + a: | + The `type` declares which credential family the Consumer authenticates with. An `api-key` + Consumer holds one or more `api-key` Credentials. An `oauth` Consumer holds one or more + `oauth` Credentials whose `custom_id` maps to the OAuth provider's identifier. The + Credential's `type` must match the Consumer's `type`. + + - q: Can a Consumer belong to multiple Consumer Groups? + a: | + Yes. The `consumer_groups` array accepts one or more references to Consumer Groups by + `name` or `id`. + + - q: How do I attach Policies to a Consumer? + a: | + Add the Policy's `name` or `id` to the Consumer's `policies` array. + See the [Policy entity](/ai-gateway/entities/ai-policy/) reference. +--- + +## What is a Consumer? + +A Consumer is the {{site.ai_gateway}} entity that represents a downstream client of the AI APIs you publish through {{site.ai_gateway}}. + +You can use Consumers and Consumer Groups to authenticate clients, attach Policies, and gate access to Models, Agents, and MCP Servers through those parent entities' `acls` field. + +Consumers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/consumers +{% endtable %} + +## Configure a Consumer + +When you create a Consumer, the configuration steps generally follow this order: + +1. Choose an authentication `type`: `api-key` for API key credentials, or `oauth` for OAuth 2.0 / OpenID Connect credentials. +1. Optionally assign the Consumer to one or more Consumer Groups through the `consumer_groups` array. +1. Optionally attach Policies to the Consumer for request-level plugin execution. +1. Create credentials separately through the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/). + +For a concrete example, see [Set up a Consumer](#set-up-a-consumer). + +## Authentication type + +The `type` field declares which credential family the Consumer authenticates with. Supported values are: + +* `api-key`: the Consumer authenticates with one or more API key Credentials. +* `oauth`: the Consumer authenticates through an OAuth identity issued by an external OIDC provider. {{site.ai_gateway}} accepts any standards-compliant OAuth 2.0 / OpenID Connect provider configured through the [OpenID Connect plugin](/plugins/openid-connect/), or, for MCP traffic, through the [AI MCP OAuth2 plugin](/plugins/ai-mcp-oauth2/). The Consumer Credential carries a `custom_id` that maps to the OAuth provider's user identifier (for example, an OIDC Client ID or `sub` claim). + +The `type` of every Credential issued to the Consumer must match the Consumer's `type`. See the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/) reference for credential management. + +## Consumer Group membership + +You can assign a Consumer to one or more Consumer Groups through the `consumer_groups` array. Each entry references a Consumer Group by `name` or `id`. + +Consumer Groups are managed through their own entity surface. See the [Consumer Group entity](/ai-gateway/entities/ai-consumer-group/) reference. + +## Attach Policies + +Policies are how plugin configurations apply to a Consumer. Attach a Policy by adding its `name` or `id` to the Consumer's `policies` array. The underlying plugin runs in the request lifecycle when the Consumer is identified. + +You can attach multiple Policies to a single Consumer. Each Policy is an independent plugin instance. + +For the supported plugin types and how Policies attach to other entities, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Set up a Consumer + +The following example creates an AI Consumer assigned to a single Consumer Group. Credentials are issued separately through the [Consumer Credential entity](/ai-gateway/entities/ai-consumer-credential/). + +{% entity_example %} +type: consumer +data: + display_name: Mobile App - Production + name: mobile-app-production + type: api-key + consumer_groups: + - internal-teams + policies: [] +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-data-plane-certificate.md b/app/_ai_gateway_entities/ai-data-plane-certificate.md new file mode 100644 index 0000000000..d650cc7350 --- /dev/null +++ b/app/_ai_gateway_entities/ai-data-plane-certificate.md @@ -0,0 +1,124 @@ +--- +title: AI Data Plane Certificates +content_type: reference +entities: + - ai-data-plane-certificate +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-data-plane-certificate/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Client certificates that authorize data planes to connect to an {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayDataPlaneClientCertificate +works_on: + - konnect +tools: + - konnect-api + - terraform +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Vault entity + url: /ai-gateway/entities/ai-vault/ +faqs: + - q: Why is there no update operation? + a: | + The certificate body is immutable once registered. To rotate, register a new Data Plane + Certificate alongside the existing one, roll the data planes onto the new certificate, then + delete the old entry. This pattern avoids a window where no certificate is installed. + + - q: What happens to connected data planes when a certificate is deleted? + a: | + Any data plane currently connecting with the deleted certificate loses its trust anchor and + can no longer establish a connection to the {{site.ai_gateway}}. Roll data planes onto a + replacement certificate before deleting the old one. + + - q: Is the private key stored alongside the certificate? + a: | + No. Only the public certificate is registered with the {{site.ai_gateway}}. The corresponding + private key stays on the data plane and is never sent to {{site.konnect_short_name}}. + + - q: Can the same certificate be used by multiple data planes? + a: | + Yes. Any data plane provisioned with the registered certificate and its private key can + establish a connection. Use multiple certificates when you need to revoke trust for a subset + of data planes independently. + + - q: How does this relate to the {{site.base_gateway}} data plane client certificate? + a: | + It plays the same role, establishing mutual TLS between the control plane and a data plane, + but it is scoped to a single {{site.ai_gateway}} instance and managed through the + {{site.ai_gateway}} entity surface, not the {{site.konnect_short_name}} Gateway control plane API. +--- + +## What is a Data Plane Certificate? + +A Data Plane Certificate is an {{site.ai_gateway}} entity that registers a public X.509 certificate as a trusted client identity for an {{site.ai_gateway}}. Data planes presenting the matching private key during the mTLS handshake are allowed to connect; data planes without a matching registered certificate are rejected. + +Each Data Plane Certificate belongs to exactly one {{site.ai_gateway}}. An {{site.ai_gateway}} can have multiple registered certificates so that you can issue one per data plane fleet, rotate keys without downtime, or revoke trust for a subset of data planes independently. + +Data Plane Certificates are managed through the {{site.konnect_short_name}} {{site.ai_gateway}} API, the {{site.konnect_short_name}} UI, or Terraform: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/data-plane-certificates +{% endtable %} + +There is no on-prem equivalent for this entity. Self-managed {{site.base_gateway}} deployments use the existing [`/certificates`](/gateway/entities/certificate/) entity and [hybrid mode node configuration](/gateway/hybrid-mode/) instead. + +## Trust model + +The {{site.ai_gateway}} acts as the control plane in a CP/DP topology. Each data plane presents a client certificate during the TLS handshake, and the {{site.ai_gateway}} accepts the connection only if the presented certificate matches one that has been registered as a Data Plane Certificate on that {{site.ai_gateway}}. + +Only the public certificate is registered with the {{site.ai_gateway}}. The private key is generated and held on the data plane side; it never leaves the data plane host. + + +{% mermaid %} +sequenceDiagram + participant DP as Data Plane + participant CP as {{site.ai_gateway}} (Control Plane) + + Note over DP: Holds private key locally
(never sent over the network) + DP->>CP: TLS handshake with client certificate + Note over CP: Compare presented certificate against
registered Data Plane Certificates + alt Certificate matches a registered entry + CP-->>DP: TLS handshake completes + DP->>CP: Receive configuration and stream telemetry + else No matching registered certificate + CP-->>DP: Connection rejected + end +{% endmermaid %} + + +## Lifecycle + +Data Plane Certificates support create, list, get, and delete operations. There is no update endpoint, the certificate body is immutable. + +To rotate a certificate without downtime: + +1. Register the new certificate as an additional Data Plane Certificate on the {{site.ai_gateway}}. +1. Reconfigure the data planes to present the new certificate and key. +1. Verify that data planes have reconnected with the new identity. +1. Delete the old Data Plane Certificate. + +Deleting a Data Plane Certificate immediately invalidates the trust for any data plane still using it. Existing connections are dropped and reconnect attempts using the deleted certificate are rejected. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-data-plane-node.md b/app/_ai_gateway_entities/ai-data-plane-node.md new file mode 100644 index 0000000000..0a22531ad7 --- /dev/null +++ b/app/_ai_gateway_entities/ai-data-plane-node.md @@ -0,0 +1,95 @@ +--- +title: AI Data Plane Nodes +content_type: reference +entities: + - ai-data-plane-node +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-data-plane-node/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Data Plane nodes that run {{site.ai_gateway}} workloads and connect to the control plane. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayDataPlaneNode +works_on: + - konnect +tools: + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entity" + url: /ai-gateway/entities/ai-gateway/ + - text: Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ +faqs: + - q: How do I register a new Data Plane node? + a: | + Data Plane nodes register themselves when they start and establish a connection to the + {{site.ai_gateway}} using a client certificate. Once registered, the node appears in + the Konnect {{site.ai_gateway}} UI and is accessible via the API. + + - q: What does `config_hash` tell me? + a: | + `config_hash` is a hash of the configuration currently applied by the node. Compare + this to the {{site.ai_gateway}}'s `config_hash`. If they match, the node is in sync + with the latest control plane configuration. If they differ, the node is running stale + configuration. + + - q: What is `last_ping`? + a: | + `last_ping` is a Unix timestamp indicating the most recent heartbeat from the node. + It helps operators identify nodes that are no longer communicating with the control plane. + + - q: What do compatibility issues mean? + a: | + Compatibility issues indicate that the node's version or configuration is incompatible + with the {{site.ai_gateway}}. The issue detail includes a resolution explaining what + must be changed to bring the node into a compatible state. +--- + +## What is a Data Plane Node? + +A Data Plane Node is a runtime instance that executes {{site.ai_gateway}} traffic and maintains a connection to the {{site.konnect_short_name}} {{site.ai_gateway}} control plane. Each node runs the {{site.ai_gateway}} data plane binary, loads configuration from the control plane, and processes requests according to that configuration. + +Nodes are read-only entities in the {{site.ai_gateway}} API. You cannot create or delete nodes through the control plane; instead, nodes self-register when they start with a valid [Data Plane Certificate](/ai-gateway/entities/ai-data-plane-certificate/). Operators monitor and troubleshoot nodes through the Konnect UI and API. + +Data Plane Nodes can be viewed through the {{site.konnect_short_name}} {{site.ai_gateway}} API: + +{% table %} +columns: + - title: Deployment + key: deployment + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - deployment: "{{site.konnect_short_name}}" + cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/nodes +{% endtable %} + +## Understanding Node Status + +When you list or inspect a node, key fields to monitor are: + +* **`last_ping`**: The most recent heartbeat timestamp. A stale value indicates the node has lost connectivity or crashed. +* **`config_hash`**: Compare this to the {{site.ai_gateway}}'s `config_hash`. If they differ, the node is running stale configuration and should be restarted or rolled forward. +* **`compatibility_status`**: Reports any version or configuration incompatibilities. If issues are present, review the resolution steps provided before routing traffic through the node. + +## Monitoring Nodes + +Regularly check the list of registered nodes to ensure they are healthy and in sync: + +1. **Verify connectivity**: Check `last_ping` to confirm the node is actively reporting to the control plane. +1. **Verify configuration sync**: Compare each node's `config_hash` to the {{site.ai_gateway}}'s `config_hash`. If they differ, the node is running stale configuration and should be restarted or rolled forward. +1. **Resolve compatibility issues**: If a node reports compatibility issues, the `compatibility_status` field includes resolution steps. Address them before the node begins serving traffic. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md new file mode 100644 index 0000000000..ae0e57d47d --- /dev/null +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -0,0 +1,127 @@ +--- +title: "{{site.ai_gateway}}" +content_type: reference +entities: + - ai-gateway +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-gateway/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: | + The top-level {{site.ai_gateway}} entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGateway +works_on: + - konnect +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ +faqs: + - q: How is an {{site.ai_gateway}} different from a {{site.konnect_short_name}} Gateway control plane? + a: | + An {{site.ai_gateway}} is a dedicated control plane purpose-built for AI traffic. It exposes its own + entity surface (Models, Providers, Policies, Agents, MCP Servers, and so on) and its own + data plane runtime. It doesn't share entities or data planes with a regular + {{site.konnect_short_name}} Gateway control plane. + + - q: Can I run more than one {{site.ai_gateway}} in an organization? + a: | + Yes. An organization can hold multiple {{site.ai_gateway}} entities. Each one has its own + configuration and telemetry endpoints, its own set of child entities, and its own + data planes. + + - q: What does `config_hash` represent? + a: | + `config_hash` is a hash of the {{site.ai_gateway}}'s latest configuration, including all of its + child entities. It changes any time something under the {{site.ai_gateway}} is created, updated, + or deleted. Compare it to the `config_hash` reported by a data plane node to check whether + the node has the current configuration. + + - q: What happens to child entities when I delete an {{site.ai_gateway}}? + a: | + Deleting an {{site.ai_gateway}} removes the entity. Its child entities (Models, Providers, Policies, + Agents, MCP Servers, Vaults, Consumers, Consumer Groups, and Data Plane Certificates) are + tied to the {{site.ai_gateway}} and are not addressable without it. + + - q: Is the {{site.ai_gateway}} entity available on-prem? + a: | + No. The {{site.ai_gateway}} entity is a {{site.konnect_short_name}} concept. On-prem deployments + manage the same child entities (Models, Providers, Policies, and so on) directly through + the Admin API, without a parent `ai-gateways/{id}` container. +--- + +## What is an {{site.ai_gateway}}? + +An {{site.ai_gateway}} is the top-level {{site.ai_gateway}} entity. It's a dedicated control plane for AI traffic, separate from a regular {{site.konnect_short_name}} Gateway control plane, that owns the entities {{site.ai_gateway}} uses to serve LLM and agent workloads: + +1. [Models](/ai-gateway/entities/ai-model/): AI model endpoints, capabilities, and load balancing. +1. [Providers](/ai-gateway/entities/ai-provider/): upstream LLM service connections and credentials. +1. [Policies](/ai-gateway/entities/ai-policy/): security, rate limiting, and guardrail behavior attached to other entities. +1. [Agents](/ai-gateway/entities/ai-agent/): A2A and HTTP agent routing. +1. [MCP Servers](/ai-gateway/entities/ai-mcp-server/): MCP tool exposure and session handling. +1. [Vaults](/ai-gateway/entities/ai-vault/): secret storage referenced from other entities. +1. [Consumers](/ai-gateway/entities/ai-consumer/), [Consumer Groups](/ai-gateway/entities/ai-consumer-group/), [Consumer Credentials](/ai-gateway/entities/ai-consumer-credential/): identities used in access control. +1. [Data Plane Certificates](/ai-gateway/entities/ai-data-plane-certificate/): certificates that authorize data plane nodes to connect. + +Every other {{site.ai_gateway}} entity is created under an {{site.ai_gateway}} and addressed through its ID: + +{% table %} +columns: + - title: Surface + key: surface + - title: Endpoint + key: endpoint +rows: + - surface: {{site.ai_gateway}} + endpoint: /v1/ai-gateways + - surface: Child entities + endpoint: /v1/ai-gateways/{aiGatewayId}/{entity} +{% endtable %} + +## Endpoints + +When an {{site.ai_gateway}} is created, {{site.ai_gateway}} provisions two endpoints that data planes connect to: + +1. **Configuration endpoint** (`endpoints.configuration`): the URL data plane nodes use to receive their configuration from the control plane. +1. **Telemetry endpoint** (`endpoints.telemetry`): the URL data plane nodes use to ship analytics and runtime telemetry back to {{site.konnect_short_name}}. + +Both endpoints are read-only, assigned at creation time, and stable for the lifetime of the {{site.ai_gateway}}. Data plane nodes need both URLs, along with a [Data Plane Certificate](/ai-gateway/entities/ai-data-plane-certificate/), to register with the {{site.ai_gateway}}. + +## Configuration hash + +`config_hash` is a read-only field that {{site.ai_gateway}} updates every time anything under the {{site.ai_gateway}} changes, such as a new Model, an updated Policy, or a deleted Provider. Each data plane node reports back the `config_hash` of the configuration it's running. The two values match when the node is in sync with the control plane. + +Use `config_hash` to verify rollout: after a configuration change, watch the node `config_hash` (through [List Nodes](/ai-gateway/entities/ai-data-plane-certificate/) or the {{site.konnect_short_name}} UI) until every node reports the {{site.ai_gateway}}'s current value. + +## Labels + +`labels` are a free-form `key: value` map for organization. Use them to tag {{site.ai_gateway}}s by environment (`env: production`), team ownership, cost center, or any other dimension you filter on. Labels don't affect runtime behavior. + +## Lifecycle + +{{site.ai_gateway}}s can be created and managed through the {{site.konnect_short_name}} UI or the {{site.ai_gateway}} API. Once an {{site.ai_gateway}} exists, its child entities (Models, Providers, Policies, and so on) are managed through the {{site.ai_gateway}} API or decK as documented on each entity page. + +Creating an {{site.ai_gateway}} provisions the configuration and telemetry endpoints and gives you the parent ID needed to create child entities. The {{site.ai_gateway}} has no runtime traffic of its own. Traffic flows once at least one Model, Agent, or MCP Server is configured under it and a data plane node is connected. + +Updating an {{site.ai_gateway}} changes its `name`, `description`, or `labels`. Endpoints and `config_hash` are managed by {{site.ai_gateway}} and can't be set directly. + +Deleting an {{site.ai_gateway}} removes the entity. Its child entities are scoped to the {{site.ai_gateway}} and can't be addressed without it. + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md new file mode 100644 index 0000000000..6257e9156c --- /dev/null +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -0,0 +1,544 @@ +--- +title: AI MCP Servers +content_type: reference +entities: + - ai-mcp-server +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-mcp-server/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: MCP Server entity used by {{site.ai_gateway}} to expose tools and proxy MCP traffic. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayMCPServer +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + - text: Kong MCP traffic gateway + url: /mcp/ + - text: Model Context Protocol specification + url: https://modelcontextprotocol.io/ +faqs: + - q: Which MCP protocol version does the runtime use? + a: | + The MCP runtime behind an MCP Server entity speaks MCP protocol version `2025-06-18`. Upstream + MCP servers may run `2025-06-18` or `2025-11-25`. Versions from 2024 are not supported. + + - q: What's the difference between the four server types? + a: | + `passthrough-listener` proxies MCP traffic to an upstream MCP server without converting tools. + `conversion-listener` converts a RESTful API into MCP tools and accepts MCP requests on the + same Route. `conversion-only` defines a tool library that other MCP Servers reference by tag + but doesn't accept incoming MCP traffic itself. `listener` aggregates tools from one or more + `conversion-only` MCP Servers into a single MCP endpoint. + + - q: Can the same Consumer's identity gate access to specific tools? + a: | + Yes. Set `default_tool_acls` on the MCP Server with `allow` and `deny` lists, and override per + tool through `tools[].acls`. A per-tool ACL replaces the default for that tool, it doesn't + merge. + + - q: How do OAuth-based ACLs differ from Consumer-based ACLs? + a: | + Set `acl_attribute_type` to `oauth_access_token` and provide `access_token_claim_field` (a jq + filter, for example `.user.email`). ACLs then evaluate against the claim value extracted from + the OAuth access token instead of the resolved Consumer identity. The OAuth flow is supplied + by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + + - q: What error code do denied requests return? + a: | + `HTTP 403 Forbidden`. Earlier {{site.ai_gateway}} versions returned the JSON-RPC error code + `INVALID_PARAMS -32602`; from {{site.ai_gateway}} 3.14 onward, denials follow the + [MCP 2025-11-25 authorization specification](https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#error-handling). + + - q: Can I attach the same authentication or rate-limiting plugin that I'd attach to a Route? + a: | + Plugin configuration that applies to the MCP Server goes through the + [Policy entity](/ai-gateway/entities/ai-policy/). Attach Policies to the MCP Server through its + `policies` field. +--- + +## What is an MCP Server? + +An MCP Server is a first-class {{site.ai_gateway}} entity that exposes tools to MCP-compatible clients (such as [Insomnia](https://konghq.com/products/kong-insomnia), [Claude](https://claude.ai/), [Cursor](https://cursor.com/), or [LM Studio](https://lmstudio.ai/)) over the [Model Context Protocol](https://modelcontextprotocol.io/). The runtime acts as a protocol bridge, translating between MCP and HTTP so MCP clients can either call existing APIs through {{site.ai_gateway}} or interact with upstream MCP servers. + +Because the runtime executes inside {{site.ai_gateway}}, MCP endpoints are provisioned dynamically on demand. You don't host or scale them separately, and the same authentication, traffic control, and observability features available to traditional API traffic apply to MCP traffic at the same scale. + +MCP Servers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/mcp-servers +{% endtable %} + +## Configure an MCP Server + +When you create an MCP Server, the configuration steps generally follow this order: + +1. Choose a server type: `passthrough-listener` to proxy an upstream MCP server, `conversion-listener` to convert a REST API into MCP tools, `conversion-only` to define a shared tool library, or `listener` to aggregate tools from `conversion-only` servers. +1. Point the MCP Server at an upstream: supply the Service URL for conversion types, or the upstream MCP server address for `passthrough-listener`. +1. For conversion types, define tools that map MCP tool names to upstream HTTP endpoints. +1. Optionally, configure sessions for stateful interactions. +1. Optionally, attach Policies for authentication, rate limiting, and observability. +1. Optionally, configure ACLs to restrict which consumers can discover and invoke specific tools. + +For a concrete example, see [Set up an MCP Server](#set-up-an-mcp-server). + +## Common Policies + +Attach plugins as [Policies](/ai-gateway/entities/ai-policy/) on the MCP Server to handle authentication, rate limiting, observability, and traffic control: + + +{% table %} +columns: + - title: Use case + key: use_case + - title: Example + key: example +rows: + - use_case: Authentication + example: | + Apply [AI MCP OAuth2](/plugins/ai-mcp-oauth2/) for MCP-spec OAuth 2.0 flows, or [OpenID Connect](/plugins/openid-connect/) / [Key Auth](/plugins/key-auth/) for non-OAuth identity. + - use_case: Rate limiting + example: | + Use [Rate Limiting](/plugins/rate-limiting/) or [Rate Limiting Advanced](/plugins/rate-limiting-advanced/) to control MCP request volume. + - use_case: Observability + example: | + Add [logging and tracing plugins](/plugins/?category=logging) for full request and response visibility. MCP metrics surface in [{{site.konnect_short_name}} analytics](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics). + - use_case: Traffic control + example: | + Apply [request and response transformation plugins](/plugins/?category=transformations) or [ACL policies](/plugins/acl/). +{% endtable %} + + +## Server modes + +The `type` field selects one of four modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. + + +{% table %} +columns: + - title: Mode + key: mode + - title: Description + key: description + - title: Use cases + key: usecase +rows: + - mode: "`passthrough-listener`" + description: | + Listens for incoming MCP requests and proxies them to an upstream MCP server without + converting tools. Generates MCP observability metrics. + usecase: | + You already operate an MCP server and want {{site.ai_gateway}} to act as an authenticated, + observable entrypoint. Common for third-party or internally hosted MCP services exposed + through {{site.ai_gateway}}. + - mode: "`conversion-listener`" + description: | + Converts RESTful API paths into MCP tools and accepts incoming MCP requests on the Route + path. Tools are defined directly on the MCP Server and an optional server block applies. + {% new_in 3.13 %} Supports session identifiers set by authentication services for cookie-based + authentication. + usecase: | + Make an existing REST API available to MCP clients directly through {{site.ai_gateway}}. + Common for services that both define and handle their own tools. + - mode: "`conversion-only`" + description: | + Converts RESTful API paths into MCP tools but does not accept incoming MCP requests. + Tools are tagged at the MCP Server level so a `listener` MCP Server can reference them. + Used together with one or more `listener` MCP Servers. + usecase: | + Define reusable tool specifications without serving them. Suitable for teams that maintain + a shared library of tool definitions. + - mode: "`listener`" + description: | + Similar to `conversion-listener`, but instead of defining its own tools, it binds tools + from one or more `conversion-only` MCP Servers through `config.server.tag`. + usecase: | + A single MCP endpoint that aggregates tools from multiple `conversion-only` MCP Servers. + Typical in multi-service or multi-team environments that expose a unified MCP interface. +{% endtable %} + + +## How MCP traffic flows + +For `conversion-listener`, `conversion-only`, and `listener` modes, the runtime converts MCP requests into HTTP calls and wraps the responses back in MCP format: + +1. Accepts an MCP protocol request from a client. +1. Parses the MCP tool call and matches it to a tool definition. +1. Converts the call into a standard HTTP request. +1. Sends the request to the upstream Service. +1. Wraps the HTTP response in MCP format and returns it to the client. + +For `passthrough-listener` mode, the runtime proxies MCP traffic directly to the upstream MCP server without conversion. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}}
(MCP Server) + participant Upstream as Upstream Service + + Client->>Gateway: MCP request (tool invocation) + activate Gateway + Gateway->>Gateway: Parse MCP payload + Gateway->>Gateway: Map to HTTP endpoint + Gateway->>Upstream: HTTP request + deactivate Gateway + activate Upstream + Upstream-->>Gateway: HTTP response + deactivate Upstream + activate Gateway + Gateway->>Gateway: Convert to MCP format + Gateway-->>Client: MCP response + deactivate Gateway +{% endmermaid %} + + +{:.info} +> Pings from MCP clients are included in the total request count for an {{site.ai_gateway}} +> instance, in addition to requests made to the MCP server itself. + +## Tools + +A [tool](#schema-aigateway-mcpserver-tools) maps an MCP tool name to an upstream HTTP endpoint. Each tool needs at minimum a description and an HTTP method. The runtime extracts the host, path, headers, and query from the route configuration, so most tool entries don't need to specify them. Override these on the tool entry only when the route doesn't match the upstream endpoint exactly. + +For richer mapping, supply [`request_body`](#schema-aigateway-mcpserver-tools-request-body), [`responses`](#schema-aigateway-mcpserver-tools-responses), and [`parameters`](#schema-aigateway-mcpserver-tools-parameters) specifications in OpenAPI JSON format. The runtime uses them to validate calls and shape upstream HTTP requests. + +Tools can also carry MCP-spec [annotations](#schema-aigateway-mcpserver-tools-annotations) that hint at tool behavior to clients (for example, whether a tool is read-only, idempotent, or destructive). Annotations don't change runtime behavior; they help clients decide whether to surface a tool, confirm before invocation, or treat it as safe to retry. + +[Per-tool ACLs](#schema-aigateway-mcpserver-tools-acls) override the MCP Server's [default tool ACLs](#schema-aigateway-mcpserver-default-tool-acls). See [ACL tool control](#acl-tool-control). + +## Sessions + +`listener` and `conversion-listener` MCP Servers support managed sessions for stateful interactions. Configure session storage through `config.server.session`. The `passthrough-listener` mode doesn't use managed sessions because session state lives on the upstream MCP server. + +Two session strategies: + +1. **Client.** Session state is encrypted into the MCP session ID assigned to the client. Requires `secrets` which are encryption keys; the first entry is used for encryption, all entries are used for decryption to support key rotation. +1. **Redis.** Session state is stored in Redis. Configure connection details and authentication in `config.server.session.redis`. + +{% include_cached /plugins/redis/redis-cloud-auth.md tier='enterprise' %} + +`session_ttl` controls how long sessions live (default 24 hours). Set `managed: false` to disable managed sessions when the upstream maintains state externally. + +Secrets used in session encryption can be referenced from a [Vault](/ai-gateway/entities/ai-vault/). + +## Server configuration + +The `config.server` block carries runtime settings that apply across all tools on the MCP Server: + + +{% table %} +columns: + - title: Field + key: field + - title: Default + key: default + - title: Description + key: description +rows: + - field: "[`forward_client_headers`](#schema-aigateway-mcpserver-config-server-forward-client-headers)" + default: "`true`" + description: Whether to forward client request headers to the upstream when calling tools. + - field: "[`tag`](#schema-aigateway-mcpserver-config-server-tag)" + default: (none) + description: A single tag used by `listener` MCP Servers to filter which `conversion-only` tools to expose. + - field: "[`timeout`](#schema-aigateway-mcpserver-config-server-timeout)" + default: 10 seconds + description: Maximum time to wait for an upstream tool call. +{% endtable %} + + +[`config.max_request_body_size`](#schema-aigateway-mcpserver-config-max-request-body-size) controls the maximum incoming request body size accepted by the MCP Server (default 1 MB). + +## ACL tool control + +When exposing MCP servers through {{site.ai_gateway}}, you may need granular control over which authenticated API consumers can discover and invoke specific tools. The MCP Server's ACL feature lets you define access rules at both the default level (applying to all tools) and per-tool level (for fine-grained exceptions). + +This way, consumers only interact with tools appropriate to their role, while maintaining a complete audit trail of all access attempts. Authentication is handled by an authentication Policy attached to the MCP Server (such as [Key Auth](/plugins/key-auth/) or an OIDC flow), and the resulting Consumer identity is used for ACL checks. + +{:.info} +> **ACL in `listener` mode** +> +> Listener mode does not support direct ACL configuration. Instead, it inherits ACL rules from tagged `conversion-listener` or `conversion-only` MCP Servers. +> +> To use ACLs with `listener` mode: +> 1. Configure `conversion-listener` or `conversion-only` MCP Servers with ACL rules and tags. +> 1. Configure `listener` mode to aggregate tools by matching tags. +> 1. Set `include_consumer_groups: true` on the listener. Without this setting, the listener cannot pass Consumer Group membership to the aggregated tools, and ACL rules will not evaluate correctly. +> +> See [Enforce ACLs on aggregated MCP servers](/mcp/enforce-acls-on-aggregated-mcp-servers/) for a complete example. + +### Attribute types + +Two attribute types determine what the MCP Server evaluates ACL rules against: + +1. **`consumer`** (default). Evaluates against the resolved Consumer identity. +1. **`oauth_access_token`**. Evaluates against a claim extracted from the OAuth access token. Set `access_token_claim_field` to a jq filter (for example, `.user.email` for a nested claim). The OAuth flow itself is supplied by the [AI MCP OAuth2 Policy](/plugins/ai-mcp-oauth2/). + +### Supported identifier types + +When `acl_attribute_type` is `consumer`, ACL rules can reference [Consumers](/gateway/entities/consumer/) and [Consumer Groups](/gateway/entities/consumer-group/) using these identifier types in `allow` and `deny` lists: + +* [`username`](/gateway/entities/consumer/#schema-consumer-username): Consumer username +* [`id`](/gateway/entities/consumer/#schema-consumer-username): Consumer UUID +* [`custom_id`](/gateway/entities/consumer/#schema-consumer-custom-id): Custom Consumer identifier +* [`consumer_groups.name`](/gateway/entities/consumer/#schema-consumer-custom-id): Consumer Group name + +The authenticated Consumer identity is matched against these identifiers. If the [Consumer](/gateway/entities/consumer/) or any of their [Consumer Groups](/gateway/entities/consumer-group/) match an ACL entry, the rule applies. + +### How default and per-tool ACLs work + +The runtime evaluates access using a two-tier system: + + +{% table %} +columns: + - title: ACL type + key: field + - title: Description + key: description +rows: + - field: "`default_tool_acls`" + description: | + Baseline rules that apply to all tools unless overridden. + - field: "`tools[].acls`" + description: | + When configured, these rules replace the default ACL for that specific tool. The per-tool ACL doesn't inherit or merge with `default_tool_acls`. It is an all-or-nothing override. +{% endtable %} + + +{:.info} +> If a tool defines its own ACL, the runtime ignores `default_tool_acls` for that tool: +> +> - Tools with no ACL configuration inherit the default rules (both `allow` and `deny` lists). +> - Tools with an ACL must explicitly list all allowed subjects (even if they were already in `default_tool_acls`). + +### ACL evaluation logic + +Both default and per-tool ACLs use `allow` and `deny` lists. Evaluation follows this order: + +1. **Deny list configuration**. If a `deny` list exists and the subject matches any `deny` entry, the request is rejected (`HTTP 403 Forbidden`). +1. **Allow list configuration**. If an `allow` list exists, the subject must match at least one entry; otherwise, the request is denied (`HTTP 403 Forbidden`). +1. **No allow list configuration**. If no `allow` list exists and the subject is not in `deny`, the request is allowed. +1. **No ACL configuration**. If neither list exists, the request is allowed. + +All access attempts (allowed or denied) are written to the audit log. + +The table below summarizes the possible ACL configurations and their outcomes. + +{% table %} +columns: + - title: Condition + key: condition + - title: "Proxied to upstream service?" + key: proxy + - title: Response code + key: response +rows: + - condition: "Subject matches any `deny` rule" + proxy: No + response: HTTP 403 Forbidden + - condition: "`allow` list exists and subject is not in it" + proxy: No + response: HTTP 403 Forbidden + - condition: "Only `deny` list exists and subject is not in it" + proxy: Yes + response: 200 + - condition: "No ACL rules configured" + proxy: Yes + response: 200 +{% endtable %} + +### ACL tool control request flow + +The runtime evaluates ACLs for both tool discovery and tool invocation. These are two distinct operations with different behaviors: + +**Tool discovery (list tools)**: + +1. MCP client requests the list of available tools. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates each tool against `default_tool_acls`. +1. The runtime returns an HTTP 200 response with only the tools the Consumer is allowed to access. +1. The runtime logs the discovery attempt. + +**Tool invocation**: + +1. MCP client invokes a specific tool. +1. The authentication Policy validates the request and identifies the Consumer. +1. The runtime loads the Consumer's group memberships. +1. The runtime evaluates the tool-specific ACL if it exists, or the default ACL otherwise. +1. The runtime logs the access attempt (allowed or denied). +1. The runtime returns `HTTP 403 Forbidden` if denied, or forwards the request to the upstream MCP server if allowed. + + +{% mermaid %} +sequenceDiagram + participant Client as MCP Client + participant Gateway as {{site.ai_gateway}} + participant Auth as AuthN Policy + participant ACL as MCP Server (ACL/Audit) + participant Up as Upstream MCP Server + participant Log as Audit Sink + + %% ----- List Tools ----- + rect + note over Client,Gateway: List Tools (Default ACL Scope) + Client->>Gateway: GET /tools + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate scoped default ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway-->>Client: Filtered tool list + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end + + %% ----- Tool Invocation ----- + rect + note over Client,Up: Tool Invocation (Per-tool ACL) + Client->>Gateway: POST /tools/{tool} + Gateway->>Auth: Authenticate + Auth-->>Gateway: Consumer identity + Gateway->>ACL: Evaluate per-tool ACL + ACL-->>Log: Audit entry + alt If allowed + Gateway->>Up: Forward request + Up-->>Gateway: Response + Gateway-->>Client: Response + else If denied + Gateway-->>Client: HTTP 403 Forbidden + end + end +{% endmermaid %} + + +## Logging and audits + +[Logging](#schema-aigateway-mcpserver-config-logging) captures three layers of MCP traffic: per-request statistics for telemetry, request and response payloads for full visibility, and [audit entries](/ai-gateway/ai-audit-log-reference/#ai-mcp-logs) for every ACL decision. Payload logging may expose sensitive data; enable it with care. MCP Server analytics surface in [{{site.konnect_short_name}} Explorer and Dashboards](/ai-gateway/monitor-ai-llm-metrics/#mcp-traffic-metrics) alongside other {{site.ai_gateway}} traffic, and export through [OpenTelemetry](/ai-gateway/ai-otel-metrics/#mcp-metrics). + +## Attach Policies + +Policies are how plugin configurations apply to an MCP Server. Authentication, rate limiting, request and response transformation, and OAuth gating (through [AI MCP OAuth2](/plugins/ai-mcp-oauth2/)) attach to the MCP Server through the `policies` field. Each entry is a string that references a Policy by name or ID. Multiple Policies can attach to one MCP Server; each runs as an independent plugin instance. + +For details, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +## Scope of support + +The MCP Server runtime supports MCP operations and upstream interactions, while certain advanced features and non-HTTP protocols are not currently supported. The table below summarizes what is supported and what is outside the current scope. + + +{% feature_table %} +item_title: Features +columns: + - title: Description + key: description + - title: Supported + key: supported + +features: + - title: "Protocol" + description: Handling latest streamable HTTP with HTTP and HTTPS upstreams + supported: true + - title: "OpenAPI operations" + description: Mapping MCP calls to upstream HTTP operations based on the OpenAPI schema + supported: true + - title: "JSON format" + description: Handling standard JSON request and response bodies + supported: true + - title: "Form-encoded data" + description: Handling `application/x-www-form-urlencoded` + supported: true + - title: "SNI routing" + description: Converting SNI-only routes + supported: false + - title: "Form and XML data" + description: Handling formats such as multipart/form-data or XML + supported: false + - title: "Advanced MCP features" + description: Handling structured output, active notifications on tool changes, and session sharing between instances + supported: false + - title: "Non-HTTP protocols" + description: Handling WebSocket and gRPC upstreams + supported: false + - title: "AI Guardrails" + description: Applying guardrails to MCP AI requests and responses + supported: false +{% endfeature_table %} + + +## Set up an MCP Server + +The following example creates a `conversion-listener` MCP Server that converts a flight-booking REST API into a single `searchFlights` MCP tool, restricts access to the `internal-teams` Consumer Group, and stores managed sessions in client-side encrypted form. + +{% entity_example %} +type: mcp_server +data: + display_name: KongAir Flights + name: kongair-flights + type: conversion-listener + acl_attribute_type: consumer + acls: + allow: + - internal-teams + deny: [] + default_tool_acls: + allow: + - internal-teams + deny: [] + policies: [] + config: + logging: + statistics: true + payloads: false + audits: true + max_request_body_size: 1048576 + server: + forward_client_headers: true + timeout: 10000 + session: + managed: true + strategy: client + session_ttl: 86400 + client: + secrets: + - "{vault://my-vault/session-secret}" + tools: + - name: searchFlights + description: Search for available flights between two airports. + method: GET + path: /flights + annotations: + title: Search flights + read_only_hint: true + idempotent_hint: true +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md new file mode 100644 index 0000000000..039e28e240 --- /dev/null +++ b/app/_ai_gateway_entities/ai-model.md @@ -0,0 +1,419 @@ +--- +title: AI Models +content_type: reference +entities: + - ai-model +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-model/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: AI Models registered with the {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayModel +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: About {{site.ai_gateway}} + url: /ai-gateway/ + - text: "{{site.ai_gateway}} providers" + url: /ai-gateway/ai-providers/ + - text: Load balancing with AI Proxy Advanced + url: /ai-gateway/load-balancing/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.ai_gateway}} entities" + url: /ai-gateway/entities/ + - text: Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ +faqs: + - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? + a: | + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. + {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. + You don't configure the underlying plugin directly. + + - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? + a: | + No. Generated primitives are protected from direct modification through the standard Admin API. + Update the Model entity instead, and {{site.ai_gateway}} recreates the underlying primitives within a single transaction. + + - q: How do I configure models in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. + + - q: What happens when I update a Model? + a: | + {{site.ai_gateway}} deletes the Model's derived primitives and recreates them from the updated entity state, all within a single database transaction. + On failure, the transaction rolls back and no partial state is written. + + - q: What happens when I delete a Model? + a: | + The Model and all its derived primitives (Service, Routes, plugin instances) are deleted within a single transaction. + + - q: Can I apply the same configuration to multiple Models? + a: | + Yes, by attaching one Policy with that configuration to each Model. + Policies are not shared between entities, each instance is independent. + See [Policy entity](/ai-gateway/entities/ai-policy/). + + - q: How do I limit which consumers can reach a Model? + a: | + Set the `acls` field on the Model with allow or deny lists. + Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. + + - q: Does the Model entity store provider credentials? + a: | + No. Provider credentials live on the [Provider entity](/ai-gateway/entities/ai-provider/) and are materialized into the underlying primitives at Model creation time. + Updating a Provider propagates the credential change to all Models that reference it. + + - q: Can a client override the model name from the request body? + a: | + By default, no. The request `model` field must match the upstream model on one of the Model's targets, otherwise the runtime returns a `400` error. + To accept a client-side alias, set `config.model.alias` on the Model and clients can send the alias value in the request `model` field instead of the upstream provider model name. + + - q: Can a client override `temperature`, `top_p`, or `top_k` from the request? + a: | + Yes. Values for `temperature`, `top_p`, and `top_k` in the request take precedence over the per-target configuration declared on `target_models[].config`. + + - q: Which algorithm does `lowest-latency` use to pick the fastest target? + a: | + Exponentially Weighted Moving Average (EWMA). EWMA continuously updates with every response, weighting recent observations more heavily, so older latencies decay over time but still contribute. There is no fixed learning-phase window. + + - q: Does the load balancer keep probing slower targets after picking a winner? + a: | + Yes. EWMA ensures every target continues to receive a small share of traffic (typically 0.1% to 5%, depending on the latency gap). This ongoing probing lets the load balancer adapt if a previously slower target becomes faster. + +--- + +## What is a Model? + +A Model is a first-class {{site.ai_gateway}} entity that represents an AI model endpoint exposed through {{site.ai_gateway}}. + +A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services, Routes, or plugin entries by hand. + +Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/models +{% endtable %} + +## Configure a Model + +When you create a Model in {{site.konnect_short_name}} or via the API, the configuration steps generally follow this order: + +1. Choose a type (`model` or `api`) and declare which capabilities the Model exposes. +1. Add one or more target models, each pointing to a Provider with credentials. +1. Select a request and response format (default is `openai`). +1. If you have more than one target, configure load balancing in `config.balancer`. +1. Optionally, attach Policies to add plugin configuration and set `acls` to control access. + +For a concrete example, see [Set up a Model](#set-up-a-model). + +## How it works + +When you configure a Model, you define what capabilities it exposes, which upstream providers it routes to, and how requests are load-balanced and logged. At request time, the Model mediates traffic between clients and upstream provider APIs: + +1. Translates between the request and response format chosen for the Model and the upstream provider's native format. +1. Resolves upstream connection coordinates (protocol, host, port, path, HTTP method) from the selected target and its [Provider](/ai-gateway/entities/ai-provider/), unless the target is a self-hosted model. +1. Authenticates to the upstream provider using credentials stored on the Provider entity. +1. Decorates the upstream request with per-target configuration (such as temperature or token-limit overrides) declared on `target_models[].config`. +1. Records usage statistics (tokens, cost, latency) for attached log Policies, and optionally the full request and response when payload logging is enabled. +1. Fulfills requests to self-hosted models using the supported native format transformations. + +A single Model can expose multiple upstream providers behind a consistent client-facing format, so callers don't change their request shape when the underlying Provider changes. + +## How a Model maps to runtime configuration + +When you create or update a Model, {{site.ai_gateway}} generates a fixed set of primitives: + +* One [Gateway Service](/gateway/entities/service/). +* One [Route](/gateway/entities/route/) per declared capability in the `capabilities` array. +* One [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin per generated Route. + +Provider credentials are added into the AI Proxy Advanced plugin configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. + +Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. + +{:.info} +> **Why a transaction instead of an in-place update?** +> +> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. + +## Capabilities + +The [`capabilities`](#schema-aigateway-model-capabilities) field tells {{site.ai_gateway}} which AI workflows the Model exposes. Each capability becomes one Route on the generated Service. A Model must declare at least one capability. + +Model [`type`](#schema-aigateway-model-type) controls which capability set applies: + +* `model`: synchronous request/response workloads through generative APIs. Supported capabilities are `chat`, `embeddings`, `assistants`, `responses`, `audio-transcriptions`, `audio-translations`, `image-generation`, `image-edits`, `video-generations`, and `realtime`. +* `api`: asynchronous workloads through the files and batches APIs. Supported capabilities are `batches` and `files`. + +Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. + +The following table maps each capability to an OpenAI API reference and the corresponding [AI Proxy plugin](/plugins/ai-proxy/) example. + + +{% table %} +columns: + - title: Capability + key: capability + - title: Description + key: description + - title: Example route + key: example +rows: + - capability: "`chat`" + description: Conversational responses from a sequence of messages. + example: "[`llm/v1/chat`](/plugins/ai-proxy/examples/openai-chat-route/)" + - capability: "`embeddings`" + description: Vector representations for semantic search and similarity matching. + example: "[`llm/v1/embeddings`](/plugins/ai-proxy/examples/embeddings-route-type/)" + - capability: "`assistants`" + description: Persistent tool-using agents with metadata for debugging and evaluation. + example: "[`llm/v1/assistants`](/plugins/ai-proxy/examples/assistants-route-type/)" + - capability: "`responses`" + description: REST-based full-text responses. + example: "[`llm/v1/responses`](/plugins/ai-proxy/examples/responses-route-type/)" + - capability: "`audio-transcriptions`" + description: Speech-to-text. + example: "[`audio/v1/audio/transcriptions`](/plugins/ai-proxy/examples/audio-transcription-openai/)" + - capability: "`audio-translations`" + description: Audio translation between languages. + example: "[`audio/v1/audio/translations`](/plugins/ai-proxy/examples/audio-translation-openai/)" + - capability: "`image-generation`" + description: Generate images from text prompts. + example: "[`image/v1/images/generations`](/plugins/ai-proxy/examples/image-generation-openai/)" + - capability: "`image-edits`" + description: Modify images from text prompts. + example: "[`image/v1/images/edits`](/plugins/ai-proxy/examples/image-edits-openai/)" + - capability: "`video-generations`" + description: Generate videos from text prompts. + example: "[`video/v1/videos/generations`](/plugins/ai-proxy/examples/video-generation-openai/)" + - capability: "`realtime`" + description: Bidirectional WebSocket streaming for low-latency, interactive voice and text. + example: "[`realtime/v1/realtime`](/plugins/ai-proxy-advanced/examples/realtime-route-openai/)" + - capability: "`batches`" + description: Asynchronous bulk LLM requests for long workloads. + example: "[`llm/v1/batches`](/plugins/ai-proxy/examples/batches-route-type/)" + - capability: "`files`" + description: File uploads for long documents and structured input. + example: "[`llm/v1/files`](/plugins/ai-proxy/examples/files-route-type/)" +{% endtable %} + + +## Request and response formats + +The [`formats`](#schema-aigateway-model-formats) array on a Model declares the request and response shapes the Model accepts. Each entry has a `type` that selects the format. The default `openai` format flattens upstream provider responses into the OpenAI shape, so clients can use a single request and response format across providers. + +To preserve a provider's native request and response format instead, set `formats[].type` to a non-OpenAI value. The Model passes requests upstream without conversion, while {{site.ai_gateway}} continues to provide analytics, logging, and cost calculation. + + +{% table %} +columns: + - title: Format + key: format + - title: Provider + key: provider + - title: Native capabilities + key: capabilities +rows: + - format: "`openai`" + provider: All supported providers (default) + capabilities: Translates between OpenAI request and response shapes and the upstream provider format. + - format: "`anthropic`" + provider: "[Anthropic](/ai-gateway/ai-providers/anthropic/#supported-native-llm-formats-for-anthropic)" + capabilities: Messages, batch processing. + - format: "`bedrock`" + provider: "[Amazon Bedrock](/ai-gateway/ai-providers/bedrock/#supported-native-llm-formats-for-amazon-bedrock)" + capabilities: Converse, RAG (RetrieveAndGenerate), reranking, async invocation. + - format: "`cohere`" + provider: "[Cohere](/ai-gateway/ai-providers/cohere/#supported-native-llm-formats-for-cohere)" + capabilities: Reranking. + - format: "`gemini`" + provider: "[Gemini](/ai-gateway/ai-providers/gemini/#supported-native-llm-formats-for-gemini), [Vertex AI](/ai-gateway/ai-providers/vertex/#supported-native-llm-formats-for-gemini-vertex)" + capabilities: Content generation, embeddings, batches, file uploads, reranking, long-running predictions. + - format: "`huggingface`" + provider: "[Hugging Face](/ai-gateway/ai-providers/huggingface/#supported-native-llm-formats-for-hugging-face)" + capabilities: Text generation, streaming. +{% endtable %} + + +When a native format is set, only the corresponding provider is supported with its specific APIs. For format-specific behavior and limitations, see the [AI Proxy plugin reference](/plugins/ai-proxy/#supported-native-llm-formats). + +## Target models + +A Model is a virtual model: it exposes one route ([`config.route`](#schema-aigateway-model-config-route)) and one set of capabilities, and routes requests to one or more concrete upstream models declared in its [`target_models`](#schema-aigateway-model-target-models) array. Each entry represents a single upstream model instance with one URL. + +For each target, you provide the upstream model name (for example, `gpt-4o`) and reference the Provider to use by its `name`. Each target can also override settings such as `temperature`, `max_tokens`, `input_cost`, and `output_cost`. + +There's no separate Target Model entity or endpoint. Target models are managed only as nested data inside a Model, through the same Model API surface used to create, update, and delete the parent. Adding, removing, or modifying a target is an update to the Model itself. + +## Load balancing + +A Model routes to a single target by default. Add more than one target when you want redundancy, fallback between providers, or cost and latency optimization. When you have multiple targets, configure `config.balancer` to distribute requests according to a load balancing algorithm. + +When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). + +### Algorithms + +The [`algorithm`](#schema-aigateway-model-config-balancer-algorithm) field selects one of seven load balancing strategies for distributing requests across target models. + + +{% table %} +columns: + - title: Algorithm + key: algorithm + - title: Behavior + key: behavior +rows: + - algorithm: "[`round-robin`](/plugins/ai-proxy-advanced/examples/round-robin/)" + behavior: Weighted traffic distribution across targets. + - algorithm: "[`consistent-hashing`](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + behavior: Sticky sessions based on header values. + - algorithm: "[`least-connections`](/plugins/ai-proxy-advanced/examples/least-connections/)" + behavior: Route to backends with spare capacity. + - algorithm: "[`lowest-latency`](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + behavior: Route to the fastest-responding model. + - algorithm: "[`lowest-usage`](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + behavior: Route based on token counts or cost. + - algorithm: "[`semantic`](/plugins/ai-proxy-advanced/examples/semantic/)" + behavior: Route based on prompt-to-model similarity. + - algorithm: "[`priority`](/plugins/ai-proxy-advanced/examples/priority/)" + behavior: Tiered failover across model groups. +{% endtable %} + + +### Retry and fallback + +The load balancer supports configurable retries, timeouts, and failover to different targets when one is unavailable. Fallback works across targets with any supported format, so you can mix providers freely (for example, OpenAI and Mistral). For configuration details, see [Retry and fallback configuration](/ai-gateway/load-balancing/#retry-and-fallback). + +{:.info} +> Client errors don't trigger failover. To fail over on additional error types, set +> [`failover_criteria`](#schema-aigateway-model-config-balancer-failover-criteria) to include HTTP codes +> like `http_429` or `http_502`, and `non_idempotent` for POST requests. + +### Health check and circuit breaker + +The load balancer includes a circuit breaker that improves reliability under sustained failures. When a target reaches the failure threshold set by [`max_fails`](#schema-aigateway-model-config-balancer-max-fails), the load balancer stops routing requests to it until the [`fail_timeout`](#schema-aigateway-model-config-balancer-fail-timeout) period elapses. For behavior examples and tuning, see [Circuit breaker](/ai-gateway/load-balancing/#health-check-and-circuit-breaker). + +### Vector store + +A vector store holds numerical representations (embeddings) of requests and responses so the runtime can match new requests against stored vectors. It powers the [`semantic`](#schema-aigateway-model-config-balancer-algorithm) algorithm and any similarity-matching workflow on the Model. Configure storage through [`config.balancer.vectordb`](#schema-aigateway-model-config-balancer-vectordb) by selecting a `strategy`: + +* `redis`: connects to Redis with Vector Similarity Search (VSS), AWS MemoryDB for Redis, or Valkey. {{site.ai_gateway}} auto-detects Valkey from the server name field and uses the Valkey-specific driver. +* `pgvector`: connects to PostgreSQL with the pgvector extension. + +For deeper background on vector storage and similarity matching, see [Embedding-based similarity matching](/ai-gateway/semantic-similarity/). + +### Embeddings + +An embedding model converts request and response text into vector representations for the vector store. Set [`config.balancer.embeddings`](#schema-aigateway-model-config-balancer-embeddings) to reference a Provider and an embedding model name. Supported provider types are `azure`, `bedrock`, `gemini`, and `huggingface`. The same embedding model also powers the `lowest-usage` algorithm when usage is calculated against semantic content. + +## Templating + +The Model resolves runtime values from request data using placeholder substitution. This lets you select the target model dynamically per request, route to per-deployment Azure endpoints, or fan out to multiple providers from a single Model. + +Substitution applies to the [`name`](#schema-aigateway-model-target-models-name) of each target model and to any per-target [`config`](#schema-aigateway-model-target-models-config) option. Three placeholders are available: + +* `$(headers.header_name)`: the value of a request header. +* `$(uri_captures.path_parameter_name)`: the value of a captured URI path parameter. +* `$(query_params.query_parameter_name)`: the value of a query string parameter. + +For end-to-end examples, see [dynamic model selection](/plugins/ai-proxy/examples/sdk-dynamic-model-selection/), [Azure deployment routing](/plugins/ai-proxy/examples/sdk-azure-deployment/), and [proxying multiple models in one Azure instance](/plugins/ai-proxy/examples/sdk-multiple-providers/) on the AI Proxy plugin page. + +## Access control + +A Model's `acls` field controls which identities are allowed to reach the Model. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced at the Service level of the generated primitives. + +For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. + +## Attach Policies + +Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. + +A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. + +You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. + +Not every plugin type is valid as a Model Policy. + +Policies created through the nested on-prem endpoint (`POST /ai/models/{modelId}/policies`) are deleted when the Model is deleted. Policies created independently (for example, at `/v1/ai-gateways/{aiGatewayId}/policies` or `/ai/policies`) are not deleted when the Model is deleted; only the Model's reference is removed. + +For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. + +### Plugin priority and Policy execution order + +A Policy attached to a Model creates one plugin entry on the Service of the Model's derived primitives. That plugin runs at the [priority](/gateway/entities/plugin/#plugin-priority) of its underlying plugin type, which determines when it executes relative to other plugins on the request. + +The AI Proxy Advanced plugin runs at priority `770` and parses the request body to resolve the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They still gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available yet. + +For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/entities/plugin/) to push their execution later. + +## Set up a Model + +The following example creates an OpenAI Model that exposes both `chat` and `responses` capabilities, routed through a single OpenAI Provider, with token usage logging enabled. + +{% entity_example %} +type: model +data: + display_name: GPT-4o Production + name: gpt-4o-production + type: model + enabled: true + capabilities: + - chat + - responses + formats: + - type: openai + acls: + allow: + - internal-teams + deny: [] + policies: [] + target_models: + - name: gpt-4o + provider: + name: my-openai-account + config: + temperature: 0.7 + max_tokens: 4096 + input_cost: 0.0000025 + output_cost: 0.000010 + config: + logging: + statistics: true + payloads: false + response_streaming: allow + max_request_body_size: 1048576 + model: + name_header: true + balancer: + algorithm: round-robin + retries: 3 + connect_timeout: 60000 + read_timeout: 60000 + write_timeout: 60000 +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-policy.md b/app/_ai_gateway_entities/ai-policy.md new file mode 100644 index 0000000000..0d33f55848 --- /dev/null +++ b/app/_ai_gateway_entities/ai-policy.md @@ -0,0 +1,139 @@ +--- +title: AI Policies +content_type: reference +entities: + - ai-policy +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-policy/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: "Policies for {{site.ai_gateway}}." +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayPolicy +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Agent entity + url: /ai-gateway/entities/ai-agent/ + - text: MCP Server entity + url: /ai-gateway/entities/ai-mcp-server/ + - text: Plugin entity + url: /gateway/entities/plugin/ +faqs: + - q: Are Policies shared across multiple entities? + a: | + No. Each Policy is an independent instance. To apply the same plugin + configuration to two Models, create two Policies with matching `config`, + one per Model. + + - q: How is a Policy different from a plugin? + a: | + A Policy is a plugin instance configured through the {{site.ai_gateway}} entity surface + instead of the classic `/plugins` endpoint. The runtime effect is the same: a plugin attached + at the appropriate scope. {{site.ai_gateway}} manages the Policy's lifecycle alongside the + entity it's attached to. + + - q: Can a Policy be scoped to a Consumer or Consumer Group? + a: | + Yes. Add the Policy's `name` or `id` to the Consumer's or Consumer Group's `policies` array. + The plugin runs when the Consumer is identified during a request, or when a member of the + Consumer Group is identified. + + - q: What plugin types can a Policy use? + a: | + Set the plugin name in the Policy's `type` field and provide the plugin's configuration + in the `config` field. Examples include `ai-sanitizer`, `ai-prompt-guard`, + `ai-prompt-decorator`, `ai-rate-limiting-advanced`, and `openid-connect`. The supported set + isn't enumerated on this page, refer to the {{site.ai_gateway}} plugin reference for the full list. + + - q: What happens to a Policy when its parent entity is deleted? + a: | + Standalone Policies referenced from parent entities through a `policies` array are independent + and aren't deleted when a referencing parent is deleted. The reference is simply removed. +--- + +## What is a Policy? + +A Policy is an {{site.ai_gateway}} entity that represents an action, taken by a plugin, that can be attached to an {{site.ai_gateway}} entity. + +Each Policy declares a `type` (which is a plugin name, for example `ai-sanitizer` or `ai-rate-limiting-advanced`) and a `config` block whose contents follow that plugin's own schema. {{site.ai_gateway}} attaches the configured plugin at the scope you select: globally, or to a specific Model, Agent, or MCP Server. + +For the set of plugin types you can use as a Policy `type`, see the [AI plugin reference](/plugins/?category=ai). + +Policies are not shared. Each Policy is one plugin instance. To apply the same configuration to two parent entities, create two Policies. + +Policies are managed through the {{site.ai_gateway}} entity surface: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/policies +{% endtable %} + +## Policy scopes + +A Policy is scoped by where it's referenced from. Each Policy is an independent plugin instance attached at exactly one scope. To apply the same configuration in multiple places, create one Policy per place. + +The available scopes are: + +* **Global**: a Policy that no parent entity references runs for every {{site.ai_gateway}} route on the data plane. Non-AI traffic on the same data plane isn't affected. +* **Model**: referenced from the `policies` array on a [Model entity](/ai-gateway/entities/ai-model/). The plugin runs at the Service of the Model's derived primitives. +* **Agent**: referenced from the `policies` array on an [Agent entity](/ai-gateway/entities/ai-agent/). The plugin runs at the Service of the Agent's derived primitives. +* **MCP Server**: referenced from the `policies` array on an [MCP Server entity](/ai-gateway/entities/ai-mcp-server/). The plugin runs at the Service of the MCP Server's derived primitives. +* **Consumer**: referenced from the `policies` array on a [Consumer entity](/ai-gateway/entities/ai-consumer/). The plugin runs when the Consumer is identified during a request. +* **Consumer Group**: referenced from the `policies` array on a [Consumer Group entity](/ai-gateway/entities/ai-consumer-group/). The plugin runs when a member of the Consumer Group is identified during a request. + +### Creating Policies + +All Policies are created through a single endpoint at `/v1/ai-gateways/{aiGatewayId}/policies`. Scope is set entirely through the reference-array mechanism above: add the Policy's `name` or `id` to the parent entity's `policies` array, or omit the reference for global scope. + +## Lifecycle + +Creating a Policy creates exactly one plugin entry in the underlying runtime. Updating a Policy updates that plugin entry. Deleting a Policy deletes that plugin entry. All scopes support standard CRUD operations through the matching path. + +The `config` field is passed through to the plugin without translation. + +{:.info} +> **Plugin config schemas live with the plugin docs** +> +> {{site.ai_gateway}} does not define plugin configuration schemas under the Policy entity. +> For each plugin you intend to use as a Policy `type`, look up that plugin's reference page for its `config` shape. + +## Set up a global Policy + +The following example creates a global PII sanitizer Policy that runs for every {{site.ai_gateway}} route. + +{% entity_example %} +type: policy +data: + display_name: PII Sanitizer - Global + name: pii-sanitizer-global + type: ai-sanitizer + enabled: true + config: + anonymize: + - phone + - creditcard + stop_on_error: true +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-provider.md b/app/_ai_gateway_entities/ai-provider.md new file mode 100644 index 0000000000..584e639fae --- /dev/null +++ b/app/_ai_gateway_entities/ai-provider.md @@ -0,0 +1,153 @@ +--- +title: AI Providers +content_type: reference +entities: + - ai-provider +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-provider/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: AI provider credentials and configuration used by {{site.ai_gateway}}. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayProvider +works_on: + - konnect +tools: + - deck + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: "{{site.ai_gateway}} providers" + url: /ai-gateway/ai-providers/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ +faqs: + - q: What happens when I update a Provider's credentials? + a: | + {{site.ai_gateway}} propagates the credential change to every Model that references the + Provider (by `name` or `id`). The next request through any of those Models uses the updated + credentials. + + - q: How does a Model reference a Provider? + a: | + Set `target_models[].provider` on the Model to the Provider's `name` or `id`. + + - q: Do Providers generate any runtime primitives on their own? + a: | + No. A Provider entity is a write-time template. Credentials and configuration only enter + the runtime when a Model references the Provider; at that point, the Provider's values are + materialized into the underlying primitives generated for the Model. + + - q: How do I configure providers in on-prem deployments? + a: | + {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure provider credentials and endpoints using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. +--- + +## What is a Provider? + +A Provider is a first-class {{site.ai_gateway}} entity that represents an upstream LLM service connection and its credentials, endpoint configuration, and provider-type-specific options. Each Provider has a `type` that selects the upstream LLM service. See the schema below for supported values, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. + +Models reference a Provider through `target_models[].provider` to route their `target_models` to that upstream. The reference can use either the Provider `name` or `id`. {{site.ai_gateway}} materializes the Provider's credentials into the underlying primitives of every Model that references it. Updating a Provider propagates credential changes to all referencing Models. + +### Relationship to Models + +A Provider stores how to reach and authenticate to an upstream LLM service. A [Model](/ai-gateway/entities/ai-model/) decides which upstream provider model to call and how requests are load-balanced, formatted, and logged. The relationship is many-to-many at the target level: a single Provider can back many Models (for example, an `openai` Provider used by both a chat Model and an embeddings Model), and a single Model can route across multiple Providers through its `target_models` array (for example, a Model with one OpenAI target and one Anthropic target for fallback). + +Providers don't expose model endpoints on their own. They become routable only through a Model that references them. + +Providers can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/providers +{% endtable %} + +## Supported providers + +{{site.ai_gateway}} supports the following upstream providers. The Provider's [`type`](#schema-aigateway-provider-type) field selects one of these connections. Per-provider pages document supported capabilities, configuration requirements, and provider-specific limitations. + +{% html_tag type="div" css_classes="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3" %} +{% icon_card icon="openai.svg" title="OpenAI" cta_url="/ai-gateway/ai-providers/openai/" %} +{% icon_card icon="azure.svg" title="Azure OpenAI" cta_url="/ai-gateway/ai-providers/azure/" %} +{% icon_card icon="bedrock.svg" title="Amazon Bedrock" cta_url="/ai-gateway/ai-providers/bedrock/" %} +{% icon_card icon="anthropic.svg" title="Anthropic" cta_url="/ai-gateway/ai-providers/anthropic/" %} +{% icon_card icon="gemini.svg" title="Gemini" cta_url="/ai-gateway/ai-providers/gemini/" %} +{% icon_card icon="vertex.svg" title="Vertex AI" cta_url="/ai-gateway/ai-providers/vertex/" %} +{% icon_card icon="cohere.svg" title="Cohere" cta_url="/ai-gateway/ai-providers/cohere/" %} +{% icon_card icon="mistral.svg" title="Mistral" cta_url="/ai-gateway/ai-providers/mistral/" %} +{% icon_card icon="huggingface.svg" title="Hugging Face" cta_url="/ai-gateway/ai-providers/huggingface/" %} +{% icon_card icon="metaai.svg" title="Llama" cta_url="/ai-gateway/ai-providers/llama/" %} +{% icon_card icon="xai.svg" title="xAI" cta_url="/ai-gateway/ai-providers/xai/" %} +{% icon_card icon="dashscope.svg" title="Alibaba Cloud DashScope" cta_url="/ai-gateway/ai-providers/dashscope/" %} +{% icon_card icon="cerebras.svg" title="Cerebras" cta_url="/ai-gateway/ai-providers/cerebras/" %} +{% icon_card icon="deepseek.svg" title="DeepSeek" cta_url="/ai-gateway/ai-providers/deepseek/" %} +{% icon_card icon="ollama.svg" title="Ollama" cta_url="/ai-gateway/ai-providers/ollama/" %} +{% icon_card icon="databricks.svg" title="Databricks" cta_url="/ai-gateway/ai-providers/databricks/" %} +{% icon_card icon="vllm.svg" title="vLLM" cta_url="/ai-gateway/ai-providers/vllm/" %} +{% endhtml_tag %} + +## Authentication + +The `config.auth` object declares how {{site.ai_gateway}} authenticates to the upstream provider. The shape of `auth` depends on the Provider's `type`: + +* **`basic`**: header- or query-parameter-based auth. Used by most provider types. +* **`aws`**: IAM access-key and assume-role auth. Used by `bedrock`. +* **`azure`**: Microsoft Entra ID or managed-identity auth. Used by `azure`. +* **`gcp`**: Google service-account auth. Used by `gemini`. + +`bedrock`, `azure`, and `gemini` can also fall back to `basic` auth. See the schema below for field-level details, and the per-provider pages under [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for provider-specific guidance. + +{:.warning} +> Don't commit credential values to source control. Use a secret-management system to inject +> auth values at deployment time, and treat any value checked into a configuration file as +> compromised. + +## Provider references + +Models reference a Provider through the `target_models[].provider` field. The same reference shape is used elsewhere in the schema (such as the embeddings model under a Model's load balancer config). Provider references in {{site.ai_gateway}} entities accept either the Provider `name` or `id`. + +If references use `name`, the `name` field acts as a stable human-readable handle. Renaming a Provider (changing `name`) breaks any Model references that point at the old name. + +## Lifecycle + +Creating a Provider stores the entity but doesn't generate any runtime primitives. Provider credentials enter the runtime only when a Model references the Provider. At that point, the credentials are materialized into the underlying primitives of the Model. + +Updating a Provider re-materializes credentials into every Model that references it. The change takes effect on the next request through any referencing Model. + +## Set up a Provider + +The following example creates an OpenAI Provider that authenticates with a single bearer-token header. A Model can then route to this Provider by setting `target_models[].provider` to `my-openai-account` (or the Provider `id`). + +{% entity_example %} +type: provider +data: + display_name: OpenAI Production + name: my-openai-account + type: openai + config: + auth: + type: basic + headers: + - name: Authorization + value: Bearer +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_ai_gateway_entities/ai-vault.md b/app/_ai_gateway_entities/ai-vault.md new file mode 100644 index 0000000000..2f15006b56 --- /dev/null +++ b/app/_ai_gateway_entities/ai-vault.md @@ -0,0 +1,106 @@ +--- +title: AI Vaults +content_type: reference +entities: + - ai-vault +products: + - ai-gateway +min_version: + ai-gateway: '2.0.0' +permalink: /ai-gateway/entities/ai-vault/ +breadcrumbs: + - /ai-gateway/ + - /ai-gateway/entities/ +description: Vaults for storing and referencing secrets used by {{site.ai_gateway}} entities. +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayVault +works_on: + - konnect +tools: + - deck + - admin-api + - konnect-api +related_resources: + - text: "About {{site.ai_gateway}}" + url: /ai-gateway/ + - text: Provider entity + url: /ai-gateway/entities/ai-provider/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ + - text: "{{site.base_gateway}} Vault entity" + url: /gateway/entities/vault/ +faqs: + - q: How is an {{site.ai_gateway}} Vault different from a {{site.base_gateway}} Vault? + a: | + The runtime entity is the same secret-management abstraction. The {{site.ai_gateway}} surface + manages Vaults through the AI entity convention (`display_name`, `name`, `description`, + `labels`) and exposes them at the `/ai/vaults` API alongside the other AI entities. + + - q: Which secret backends are supported? + a: | + The `type` field selects the backend: `konnect`, `env`, `aws`, `gcp`, `azure`, `conjur`, or `hcv`. + Each type carries its own `config` shape. HashiCorp Vault (`hcv`) further selects an + `auth_method` from `token`, `cert`, `jwt`, `approle`, `kubernetes`, `gcp_iam`, `gcp_gce`, + `aws_ec2`, `aws_iam`, or `azure`. + + - q: How are Vault secrets referenced from other {{site.ai_gateway}} entities? + a: | + Sensitive fields on Provider, Model, MCP Server, and other entities are annotated as + referenceable. Set those fields to a vault reference string (for example, a `{vault://...}` + placeholder) instead of a literal value. The Vault `name` is the lookup key. + + - q: What does `name` control? + a: | + `name` is a user-defined unique identifier and the stable handle used to look up the Vault + configuration when other entities reference secrets. Renaming a Vault breaks any reference + pointing at the old value. +--- + +## What is a Vault? + +A Vault is a first-class {{site.ai_gateway}} entity that registers a secret-management backend so that other entities (Providers, Models, MCP Servers) can reference secrets instead of embedding values directly. + +A Vault entity stores the connection configuration and credentials needed to reach the backend. {{site.ai_gateway}} resolves vault references against the registered Vaults at request time. + +Vaults can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: + +{% table %} +columns: + - title: Control Plane + key: cp + - title: Endpoint + key: endpoint +rows: + - cp: "{{site.konnect_short_name}} {{site.ai_gateway}} API" + endpoint: /v1/ai-gateways/{aiGatewayId}/vaults +{% endtable %} + +## Backends + +Each Vault selects one of the supported secret backends: {{site.konnect_short_name}} Config Store, environment variables, AWS Secrets Manager, Google Secret Manager, Azure Key Vault, CyberArk Conjur, or HashiCorp Vault. The connection details vary per backend; the {{site.konnect_short_name}} UI surfaces the relevant fields based on the backend you choose. + +HashiCorp Vault additionally supports several authentication methods (token, AppRole, JWT, Kubernetes, AWS, GCP, Azure, and others). See the [{{site.base_gateway}} Vault entity](/gateway/entities/vault/) for backend-specific guidance that applies to both deployment modes. + +## Caching + +Cloud-backed vault types (`aws`, `gcp`, `azure`, `conjur`, `hcv`) cache resolved secrets so that {{site.ai_gateway}} doesn't hit the backend on every reference. Cache duration, negative-lookup caching, and how long expired secrets stay in use during backend outages are all tunable. The `env` type doesn't cache because environment-variable lookups don't hit the network. + +## Set up a Vault + +The following example registers an environment-variable vault that resolves references against process environment variables prefixed with `KONG_`. + +{% entity_example %} +type: vault +data: + display_name: Production Env Vault + name: prod-env-vault + description: Vault for production secrets sourced from environment variables. + type: env + config: + prefix: KONG_ +{% endentity_example %} + +## Schema + +{% entity_schema %} diff --git a/app/_api/konnect/ai-gateway/_index.md b/app/_api/konnect/ai-gateway/_index.md new file mode 100644 index 0000000000..a04c2cee46 --- /dev/null +++ b/app/_api/konnect/ai-gateway/_index.md @@ -0,0 +1,3 @@ +--- +konnect_product_id: 38df0a35-37de-48fa-ac9d-60595d26eddf +--- \ No newline at end of file diff --git a/app/_assets/javascripts/apps/EntitySchema.vue b/app/_assets/javascripts/apps/EntitySchema.vue index d82db15813..9c3839efb2 100644 --- a/app/_assets/javascripts/apps/EntitySchema.vue +++ b/app/_assets/javascripts/apps/EntitySchema.vue @@ -15,6 +15,7 @@ diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index ae36a3af09..ab68f2b003 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -32,7 +32,9 @@ formats: admin-api: label: 'Admin API' base_url: 'http://localhost:8001' + ai_gateway_base_url: 'http://localhost:8001' endpoints: + # core entities consumer: '/consumers/' consumer_group: '/consumer_groups/' route: '/routes/' @@ -53,6 +55,11 @@ formats: keyring: '/keyring/' event_hook: '/event-hooks/' partial: '/partials/' + ai_endpoints: + # AI entities (/ai/* on on-prem AI Gateway) + consumer: '/ai-consumers/' + consumer_group: '/ai-consumer-groups/' + vault: '/ai-vaults/' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' @@ -61,11 +68,24 @@ formats: global: '/plugins/' variables: <<: *variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' + ai_model: + placeholder: 'aiModelId' + description: 'The `id` of the AI Model.' + ai_agent: + placeholder: 'aiAgentId' + description: 'The `id` of the AI Agent.' + ai_mcp_server: + placeholder: 'aiMCPServerId' + description: 'The `id` of the AI MCP Server.' konnect-api: label: 'Konnect API' base_url: 'https://{region}.api.konghq.com/v2/control-planes/{control_plane}/core-entities' event_gateway_base_url: 'https://{region}.api.konghq.com/v1/event-gateways/{event_gateway}' + ai_gateway_base_url: 'https://{region}.api.konghq.com/v1/ai-gateways/{ai_gateway}' endpoints: consumer: '/consumers/' consumer_group: '/consumer_groups/' @@ -85,6 +105,15 @@ formats: listener: '/listeners' schema_registry: '/schema-registries' static_key: '/static-keys' + ai_endpoints: + model: '/models' + policy: '/policies' + agent: '/agents' + mcp_server: '/mcp-servers' + provider: '/providers' + consumer: '/consumers/' + consumer_group: '/consumer-groups/' + vault: '/vaults/' plugin_endpoints: consumer: '/consumers/{consumer}/plugins/' consumer_group: '/consumer_groups/{consumer_group}/plugins/' @@ -127,7 +156,11 @@ formats: event_gateway_listener: placeholder: 'eventGatewayListenerId' description: The `id` of the Event Gateway Listener. - + ai_gateway_variables: + <<: *konnect_variables + ai_gateway: + placeholder: 'AIGatewayId' + description: 'The `id` of the AI Gateway.' kic: label: 'KIC' @@ -140,6 +173,13 @@ formats: ui: label: 'UI' entities: + - ai-provider + - ai-model + - ai-agent + - ai-mcp-server + - ai-policy + - ai-consumer + - ai-consumer-group - admin - ca_certificate - certificate @@ -176,4 +216,4 @@ phases: produce: label: 'Produce Phase' cluster: - label: 'Cluster Phase' \ No newline at end of file + label: 'Cluster Phase' diff --git a/app/_data/konnect_oas_data.json b/app/_data/konnect_oas_data.json index 0d492c6990..586dd8a9aa 100644 --- a/app/_data/konnect_oas_data.json +++ b/app/_data/konnect_oas_data.json @@ -1,4 +1,25 @@ [ + { + "id": "38df0a35-37de-48fa-ac9d-60595d26eddf", + "title": "New AI Gateway", + "latestVersion": { + "name": "v2", + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd" + }, + "description": "New AI Gateway API.", + "documentCount": 0, + "versionCount": 1, + "versions": [ + { + "id": "987bb874-f9f9-471e-9ae3-51897cbd2ccd", + "created_at": "2024-02-21T17:28:17.757Z", + "updated_at": "2024-10-17T19:13:18.223Z", + "name": "v2", + "deprecated": false, + "registration_configs": [] + } + ] + }, { "id": "ccb264be-1963-49a4-b6e8-bc7c98a6e4c2", "title": "Application Auth Strategies", diff --git a/app/_data/products/ai-gateway.yml b/app/_data/products/ai-gateway.yml index e40c9b2a8b..fc1a92a474 100644 --- a/app/_data/products/ai-gateway.yml +++ b/app/_data/products/ai-gateway.yml @@ -1,2 +1,8 @@ name: AI Gateway -icon: /_assets/icons/products/ai-gateway.svg \ No newline at end of file +icon: /_assets/icons/products/ai-gateway.svg + +releases: + - release: "2.0" + version: "2.0.0" + name: "v2" + latest: true \ No newline at end of file diff --git a/app/_includes/components/entity_example/format/admin-api.md b/app/_includes/components/entity_example/format/admin-api.md index 1cd81c3cfe..570496cf7a 100644 --- a/app/_includes/components/entity_example/format/admin-api.md +++ b/app/_includes/components/entity_example/format/admin-api.md @@ -1,9 +1,13 @@ {% if include.render_context %} {% case include.presenter.entity_type %} {% when 'consumer' %} -To create a Consumer, call the [Admin API's `/consumers` endpoint](/api/gateway/admin-ee/#/operations/create-consumer). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer, call the [Admin API's `/ai-consumers` endpoint](/api/gateway/admin-ee/#/operations/create-ai-consumer). {% else %} +To create a Consumer, call the [Admin API's `/consumers` endpoint](/api/gateway/admin-ee/#/operations/create-consumer). {% endif %} {% when 'consumer_group' %} -To create a Consumer Group, call the [Admin API's `/consumer_groups` endpoint](/api/gateway/admin-ee/#/operations/create-consumer_group). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer Group, call the [Admin API's `/ai-consumer-groups` endpoint](/api/gateway/admin-ee/#/operations/create-ai-consumer-group).{% else %} +To create a Consumer Group, call the [Admin API's `/consumer_groups` endpoint](/api/gateway/admin-ee/#/operations/create-consumer_group).{% endif %} {% when 'route' %} To create a Route, call the [Admin API’s `/routes` endpoint](/api/gateway/admin-ee/#/operations/create-route). @@ -30,7 +34,9 @@ To create a CA Certificate, call the [Admin API's `/ca_certificates` endpoint](/ {% when 'certificate' %} To create a Certificate, call the [Admin API's `/certificates` endpoint](/api/gateway/admin-ee/#/operations/create-certificate). {% when 'vault' %} -To create a Vault entity, call the [Admin API's `/vaults` endpoint](/api/gateway/admin-ee/#/operations/create-vault). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Vault entity, call the [Admin API's `/ai-vaults` endpoint](/api/gateway/admin-ee/#/operations/create-ai-vault). {% else %} +To create a Vault entity, call the [Admin API's `/vaults` endpoint](/api/gateway/admin-ee/#/operations/create-vault). {% endif %} {% when 'partial' %} To create a Partial, call the [Admin API's `/partials` endpoint](/api/gateway/admin-ee/#/operations/create-partial). {% when 'key' %} diff --git a/app/_includes/components/entity_example/format/deck.md b/app/_includes/components/entity_example/format/deck.md index c7e00fb335..1f1b026082 100644 --- a/app/_includes/components/entity_example/format/deck.md +++ b/app/_includes/components/entity_example/format/deck.md @@ -1,7 +1,7 @@ {% if include.render_context %} {% case include.presenter.entity_type %} -{% when 'consumer' %} -The following creates a new Consumer called **{{ include.presenter.data['username'] }}**: +{% when 'consumer' %}{% assign name = include.presenter.data['name'] | default: include.presenter.data['username'] %} +The following creates a new Consumer called **{{ name }}**: {% when 'consumer_group' %} The following creates a new Consumer Group called **{{ include.presenter.data['name'] }}**: {% when 'route' %} diff --git a/app/_includes/components/entity_example/format/konnect-api.md b/app/_includes/components/entity_example/format/konnect-api.md index da568f8207..5cdbf5530b 100644 --- a/app/_includes/components/entity_example/format/konnect-api.md +++ b/app/_includes/components/entity_example/format/konnect-api.md @@ -1,8 +1,12 @@ {% case include.presenter.entity_type %} {% when 'consumer' %} -To create a Consumer, call the Konnect [control plane config API's `/consumers` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer, call the Konnect [{{site.ai_gateway}} API's `/consumers` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-gateway-consumer).{% else %} +To create a Consumer, call the Konnect [control plane config API's `/consumers` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer).{% endif %} {% when 'consumer_group' %} -To create a Consumer Group, call the Konnect [control plane config API's `/consumer_groups` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer_group). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Consumer Group, call the Konnect [{{site.ai_gateway}} API's `/consumer-groups` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-consumer-group).{% else %} +To create a Consumer Group, call the Konnect [control plane config API's `/consumer_groups` endpoint](/api/konnect/control-planes-config/#/operations/create-consumer_group).{% endif %} {% when 'route' %} To create a Route, call the Konnect [control plane config API's `/routes` endpoint](/api/konnect/control-planes-config/#/operations/create-route). {% when 'service' %} @@ -18,7 +22,9 @@ To create a CA Certificate, call the Konnect [control plane config API's `/ca-ce {% when 'certificate' %} To create a Certificate, call the Konnect [control plane config API's `/certificates` endpoint](/api/konnect/control-planes-config/#/operations/create-certificate). {% when 'vault' %} -To create a Vault entity, call the Konnect [control plane config API's `/vaults` endpoint](/api/konnect/control-planes-config/#/operations/create-vault). +{% if include.presenter.product == 'ai-gateway' -%} +To create a Vault entity, call the Konnect [{{site.ai_gateway}} API's `/vaults` endpoint](/api/konnect/ai-gateway/#/operations/create-ai-gateway-vault). {% else %} +To create a Vault entity, call the Konnect [control plane config API's `/vaults` endpoint](/api/konnect/control-planes-config/#/operations/create-vault). {% endif %} {% when 'key' %} To create a Key, call the Konnect [control plane config API's `/keys` endpoint](/api/konnect/control-planes-config/#/operations/create-key). {% when 'key-set' %} diff --git a/app/_includes/components/entity_example/format/ui_ai.md b/app/_includes/components/entity_example/format/ui_ai.md new file mode 100644 index 0000000000..ab70cb72fc --- /dev/null +++ b/app/_includes/components/entity_example/format/ui_ai.md @@ -0,0 +1,83 @@ +{% if page.layout == 'gateway_entity' %} +{% case include.presenter.entity_type %} +{% when 'provider' %} +The following creates a new AI Provider. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Providers**. +1. Click **New Provider**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a provider (for example: `{{ include.presenter.data['type'] }}`). +1. Configure authentication and connection settings for the selected provider type. +1. Click **Create**. +{% when 'policy' %} +The following creates a new AI Policy. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Policies**. +1. Click **New Policy**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select a policy **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure the policy `config` fields. +1. Click **Create**. +{% when 'consumer' %} +The following creates a new AI Consumer. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Consumers**. +1. Click **New Consumer**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select an authentication **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Configure credentials and optional Consumer Group or Policy references. +1. Click **Create**. +{% when 'consumer_group' %} +The following creates a new AI Consumer Group. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Credentials**. +1. Select the **Groups** tab. +1. Click **New Group**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Optionally add policy references for group-level enforcement. +1. Click **Create**. +{% when 'model' %} +The following creates a new AI Model. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Models**. +1. Click **New Model**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure at least one target model and select the Provider reference. +1. Optionally add policies, ACLs, labels, and fallback/load-balancing settings. +1. Click **Create**. +{% when 'agent' %} +The following creates a new AI Agent. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **Agents**. +1. Click **New Agent**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Select an Agent **Type** (for example: `{{ include.presenter.data['type'] }}`). +1. Enter the upstream Agent **URL** (for example: `{{ include.presenter.data['config']['url'] }}`). +1. Optionally configure logging, max payload size, ACLs, and Policy references. +1. Click **Create**. +{% when 'mcp_server' %} +The following creates a new AI MCP Server. Suggested values are shown in backticks: + +1. In {{site.konnect_short_name}}, navigate to [{{site.ai_gateway_name}}](https://cloud.konghq.com/ai-gateway/) in the sidebar. +1. Select an {{site.ai_gateway}}. +1. Navigate to **MCP Servers**. +1. Click **New MCP Server**. +1. Enter a **Display Name** (for example: `{{ include.presenter.data['display_name'] }}`) and **Name** (for example: `{{ include.presenter.data['name'] }}`). +1. Configure endpoint/auth settings and optional policies. +1. Click **Create**. +{% else %} +UI instructions are not yet available for this {{site.ai_gateway}} entity type. +{% endcase %} +{% endif %} diff --git a/app/_landing_pages/ai-gateway/entities.yaml b/app/_landing_pages/ai-gateway/entities.yaml new file mode 100644 index 0000000000..313199bbb6 --- /dev/null +++ b/app/_landing_pages/ai-gateway/entities.yaml @@ -0,0 +1,109 @@ +metadata: + title: "{{site.ai_gateway}} entities" + content_type: landing_page + description: This page lists the entities that make up {{site.ai_gateway}}. + breadcrumbs: + - /ai-gateway/ + products: + - ai-gateway + works_on: + - on-prem + - konnect + +rows: + - header: + type: h1 + text: "{{site.ai_gateway}} entities" + sub_text: "Entities are the components and objects that make up {{site.ai_gateway}}." + + - header: + type: h2 + text: "Core entities" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: "{{site.ai_gateway}}" + description: The top-level entity that owns Models, Providers, Policies, Agents, MCP Servers, and other AI-specific entities. + cta: + text: "{{site.ai_gateway}} entity" + url: /ai-gateway/entities/ai-gateway/ + - blocks: + - type: card + config: + title: "{{site.ai_gateway}} Provider" + description: Stores upstream provider credentials and connection configuration. Providers are reusable and are not model endpoints. + cta: + text: AI Provider entity + url: /ai-gateway/entities/ai-provider/ + - blocks: + - type: card + config: + title: Model + description: Defines a model endpoint and capability configuration used for model selection and policy targeting. + cta: + text: Model entity + url: /ai-gateway/entities/ai-model/ + - blocks: + - type: card + config: + title: AI Agent + description: An A2A or HTTP agent exposed through the A2A proxy flow. Independent of Model. + cta: + text: AI Agent entity + url: /ai-gateway/entities/ai-agent/ + - blocks: + - type: card + config: + title: AI MCP Server + description: An MCP server in passthrough, listener, or conversion-listener mode. Mode is immutable after creation. + cta: + text: AI MCP Server entity + url: /ai-gateway/entities/ai-mcp-server/ + - blocks: + - type: card + config: + title: AI Policy + description: An AI Gateway plugin instance scoped globally or to a specific AI entity. Policy instances are independent. + cta: + text: AI Policy entity + url: /ai-gateway/entities/ai-policy/ + - blocks: + - type: card + config: + title: AI Consumer + description: A thin wrapper around the existing Consumer entity. + cta: + text: AI Consumer entity + url: /ai-gateway/entities/ai-consumer/ + - blocks: + - type: card + config: + title: AI Consumer Group + description: A thin wrapper around the existing Consumer Group entity. + cta: + text: AI Consumer Group entity + url: /ai-gateway/entities/ai-consumer-group/ + + - header: + type: h2 + text: "Security" + column_count: 3 + columns: + - blocks: + - type: card + config: + title: AI Vault + description: Store and reference secrets used by AI Gateway entities and plugins. + cta: + text: AI Vault entity + url: /ai-gateway/entities/ai-vault/ + - blocks: + - type: card + config: + title: AI Data Plane Certificate + description: Public client certificates that authorize data planes to establish mTLS connections to an AI Gateway. + cta: + text: AI Data Plane Certificate entity + url: /ai-gateway/entities/ai-data-plane-certificate/ diff --git a/app/_plugins/drops/entity_example/presenters/admin-api.rb b/app/_plugins/drops/entity_example/presenters/admin-api.rb index 9eebea6126..4c950ea1ba 100644 --- a/app/_plugins/drops/entity_example/presenters/admin-api.rb +++ b/app/_plugins/drops/entity_example/presenters/admin-api.rb @@ -42,14 +42,40 @@ def data_validate_on_prem config: { url:, headers:, body: data, method: 'POST', status_code: 201 } }) end + def product + @product ||= @example_drop.product + end + private def build_url [ - formats['admin-api']['base_url'], - formats['admin-api']['endpoints'][entity_type] + base_url, + endpoint ].join end + + def base_url + @base_url ||= case @example_drop.product + when 'gateway' + formats['admin-api']['base_url'] + when 'ai-gateway' + formats['admin-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end + + def endpoint + @endpoint ||= case @example_drop.product + when 'gateway' + formats['admin-api']['endpoints'][entity_type] + when 'ai-gateway' + formats['admin-api']['ai_endpoints'][entity_type] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end end class Plugin < Base @@ -72,7 +98,7 @@ def missing_variables def build_url [ - formats['admin-api']['base_url'], + base_url, formats['admin-api']['plugin_endpoints'][@example_drop.target.key] ].join end diff --git a/app/_plugins/drops/entity_example/presenters/konnect-api.rb b/app/_plugins/drops/entity_example/presenters/konnect-api.rb index a890099167..0efa5a4f3f 100644 --- a/app/_plugins/drops/entity_example/presenters/konnect-api.rb +++ b/app/_plugins/drops/entity_example/presenters/konnect-api.rb @@ -44,25 +44,46 @@ def product def default_variables @default_variables ||= - if @example_drop.product == 'gateway' + case @example_drop.product + when 'gateway' formats['konnect-api']['variables'] - else + when 'event-gateway' formats['konnect-api']['event_gateway_variables'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_variables'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end def build_url [ base_url, - formats['konnect-api']['endpoints'][entity_type] + endpoint ].join end def base_url - @base_url ||= if @example_drop.product == 'gateway' + @base_url ||= case @example_drop.product + when 'gateway' formats['konnect-api']['base_url'] - else + when 'event-gateway' formats['konnect-api']['event_gateway_base_url'] + when 'ai-gateway' + formats['konnect-api']['ai_gateway_base_url'] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" + end + end + + def endpoint + @endpoint ||= case @example_drop.product + when 'gateway', 'event-gateway' + formats['konnect-api']['endpoints'][entity_type] + when 'ai-gateway' + formats['konnect-api']['ai_endpoints'][entity_type] + else + raise ArgumentError, "Unsupported product: #{@example_drop.product}" end end end diff --git a/app/_plugins/drops/entity_example/presenters/ui.rb b/app/_plugins/drops/entity_example/presenters/ui.rb index 84b80506a3..62851b692f 100644 --- a/app/_plugins/drops/entity_example/presenters/ui.rb +++ b/app/_plugins/drops/entity_example/presenters/ui.rb @@ -13,7 +13,11 @@ def data end def template_file - '/components/entity_example/format/ui.md' + if @example_drop.product == 'ai-gateway' + '/components/entity_example/format/ui_ai.md' + else + '/components/entity_example/format/ui.md' + end end end diff --git a/app/_plugins/drops/entity_schema.rb b/app/_plugins/drops/entity_schema.rb index fd37919bc2..62b1585efd 100644 --- a/app/_plugins/drops/entity_schema.rb +++ b/app/_plugins/drops/entity_schema.rb @@ -57,20 +57,12 @@ def api_file @api_file ||= [ File.expand_path('../', @site.source), 'api-specs', - *product_path, + @schema.fetch('api'), release_path, 'openapi.yaml' ].join('/') end - def product_path - if @release.ee_version - %w[gateway admin-ee] - else - %w[konnect event-gateway] - end - end - def release_path if @release.ee_version @release.number diff --git a/jekyll.yml b/jekyll.yml index a8cedcfaaa..4b7ab29734 100644 --- a/jekyll.yml +++ b/jekyll.yml @@ -34,6 +34,8 @@ include: # Collections collections: + ai_gateway_entities: + output: true gateway_entities: output: true how-tos: @@ -54,6 +56,16 @@ defaults: permalink: "/how-to/:path/" breadcrumbs: - "/how-to/" + - scope: + path: "_ai_gateway_entities" + type: "ai_gateway_entities" + values: + layout: "gateway_entity" + permalink: "/ai-gateway/entities/:path/" + products: + - ai-gateway + breadcrumbs: + - "/ai-gateway/" - scope: path: "_gateway_entities" type: "gateway_entities" diff --git a/vite.config.ts b/vite.config.ts index 3e7741a319..a73582a2b1 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -63,12 +63,16 @@ export default ({ command, mode }) => { server: { cors: { origin: 'http://localhost:8888' }, proxy: { - '^/api': { + '/vite-dev/api': { changeOrigin: true, target: portalApiUrl, configure: (proxy, options) => { mutateCookieAttributes(proxy) setHostHeader(proxy) + }, + rewrite: (path) => { + return path + .replace(/^\/vite-dev\/api/, '/api/'); } } } From 6958441d6a50bee4722dc9ab680c870acab59e1c Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 11:11:21 +0100 Subject: [PATCH 25/42] add links to new entities --- app/_includes/plugins/ai-proxy/providers/providers.md | 7 +++++-- app/_landing_pages/ai-gateway/ai-providers.yaml | 2 -- app/ai-gateway/ai-providers/kimi.md | 2 +- app/ai-gateway/ai-providers/vercel.md | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/_includes/plugins/ai-proxy/providers/providers.md b/app/_includes/plugins/ai-proxy/providers/providers.md index af231eafc9..3238a6806a 100644 --- a/app/_includes/plugins/ai-proxy/providers/providers.md +++ b/app/_includes/plugins/ai-proxy/providers/providers.md @@ -1,6 +1,9 @@ {%- assign provider = include.providers.providers | where: "name", include.provider_name | first -%} {% if provider %} -You can proxy requests to {{ provider.name }} AI models through {{site.ai_gateway}} using the [AI Proxy](/plugins/ai-proxy/) and [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugins. This reference documents all supported AI capabilities, configuration requirements, and provider-specific details needed for proper integration. +You can proxy requests to {{ provider.name }} AI models through {{site.ai_gateway}} by creating [providers](/ai-gateway/entities/provider/) and [models](/ai-gateway/entities/model/). This reference documents all supported AI capabilities, configuration requirements, and provider-specific details needed for proper integration. + +{:.info} +> Model provider support uses the [AI Proxy](/plugins/ai-proxy/) and [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugins behind the scenes. In some deployment modes you may need to configure these explicitly. ## Upstream paths @@ -109,7 +112,7 @@ rows: ## Supported capabilities -The following tables show the AI capabilities supported by {{ provider.name }} provider when used with the [AI Proxy](/plugins/ai-proxy/) or the [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. +The following tables show the AI capabilities supported by the {{ provider.name }} provider. {:.info} > Set the plugin's [`route_type`](/plugins/ai-proxy/reference/#schema--config-route-type) based on the capability you want to use. See the tables below for supported route types. diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index f4830f7df1..26aaef26b6 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -186,8 +186,6 @@ rows: - type: reference_list config: pages: - - /plugins/ai-proxy/ - - /plugins/ai-proxy-advanced/ - /ai-gateway/load-balancing/ - /ai-gateway/resource-sizing-guidelines-ai/ - /how-to/?tags=ai diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 0130533d70..e787ddaa50 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -57,7 +57,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/provider/). You can then access supported [models](/ai-gateway/entities/model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index fb5afc1071..e4e93d6fff 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -55,9 +55,9 @@ how_to_list: {% include plugins/ai-proxy/providers/providers.md providers=site.data.plugins.ai-proxy provider_name="Vercel" %} -## Configure {{ provider.name }} with AI Proxy +## Configure a {{ provider.name }} provider -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/provider/). You can then access supported [models](/ai-gateway/entities/model/) from {{ provider.name }}. Note that, {{ site.vercel }} hosts [models](https://vercel.com/ai-gateway/models) from other providers so in this example we use `openai/gpt-5.5`. From 3f39fa1e05719094c6429e1ad512fc06e151b7e9 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 11:24:24 +0100 Subject: [PATCH 26/42] fix links --- app/_includes/plugins/ai-proxy/providers/providers.md | 2 +- app/_landing_pages/ai-gateway/ai-providers.yaml | 2 +- app/ai-gateway/ai-providers/kimi.md | 2 +- app/ai-gateway/ai-providers/vercel.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/_includes/plugins/ai-proxy/providers/providers.md b/app/_includes/plugins/ai-proxy/providers/providers.md index 3238a6806a..8abd77b9db 100644 --- a/app/_includes/plugins/ai-proxy/providers/providers.md +++ b/app/_includes/plugins/ai-proxy/providers/providers.md @@ -1,6 +1,6 @@ {%- assign provider = include.providers.providers | where: "name", include.provider_name | first -%} {% if provider %} -You can proxy requests to {{ provider.name }} AI models through {{site.ai_gateway}} by creating [providers](/ai-gateway/entities/provider/) and [models](/ai-gateway/entities/model/). This reference documents all supported AI capabilities, configuration requirements, and provider-specific details needed for proper integration. +You can proxy requests to {{ provider.name }} AI models through {{site.ai_gateway}} by creating [providers](/ai-gateway/entities/ai-provider/) and [models](/ai-gateway/entities/ai-model/). This reference documents all supported AI capabilities, configuration requirements, and provider-specific details needed for proper integration. {:.info} > Model provider support uses the [AI Proxy](/plugins/ai-proxy/) and [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugins behind the scenes. In some deployment modes you may need to configure these explicitly. diff --git a/app/_landing_pages/ai-gateway/ai-providers.yaml b/app/_landing_pages/ai-gateway/ai-providers.yaml index 26aaef26b6..5bbdd029d8 100644 --- a/app/_landing_pages/ai-gateway/ai-providers.yaml +++ b/app/_landing_pages/ai-gateway/ai-providers.yaml @@ -23,7 +23,7 @@ rows: blocks: - type: text text: | - The core of [{{site.ai_gateway}}](/ai-gateway/) is the ability to serve AI [models](/ai-gateway/entities/model/) from various [providers](/ai-gateway/entities/provider/) via a provider-agnostic API. This normalized API layer affords developers and organizations multiple benefits: + The core of [{{site.ai_gateway}}](/ai-gateway/) is the ability to serve AI [models](/ai-gateway/entities/ai-model/) from various [providers](/ai-gateway/entities/ai-provider/) via a provider-agnostic API. This normalized API layer affords developers and organizations multiple benefits: - type: unordered_list items: diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index e787ddaa50..65f14e4560 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -57,7 +57,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/provider/). You can then access supported [models](/ai-gateway/entities/model/) from {{ provider.name }}. +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index e4e93d6fff..36f13930e3 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -57,7 +57,7 @@ how_to_list: ## Configure a {{ provider.name }} provider -To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/provider/). You can then access supported [models](/ai-gateway/entities/model/) from {{ provider.name }}. +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Note that, {{ site.vercel }} hosts [models](https://vercel.com/ai-gateway/models) from other providers so in this example we use `openai/gpt-5.5`. From 79b608e4ba6111e8b4461774c49875d0f2e373ad Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 13:33:47 +0100 Subject: [PATCH 27/42] move kmi icon to right location --- app/{_assets/icons/products => assets/icons}/kimi.svg | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename app/{_assets/icons/products => assets/icons}/kimi.svg (100%) diff --git a/app/_assets/icons/products/kimi.svg b/app/assets/icons/kimi.svg similarity index 100% rename from app/_assets/icons/products/kimi.svg rename to app/assets/icons/kimi.svg From 9ee99501a8ef002c7b9cfdbc81f5ea2996358f70 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 14:41:01 +0100 Subject: [PATCH 28/42] add generic ai proxy example --- .../set-up-a-model-with-ai-proxy.md | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md diff --git a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md new file mode 100644 index 0000000000..9d772b7471 --- /dev/null +++ b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md @@ -0,0 +1,93 @@ +--- +title: Set up a model with AI proxy +permalink: /how-to/set-up-a-model-with-ai-proxy +content_type: how_to +related_resources: + - text: "{{site.ai_gateway}}" + url: /ai-gateway/ + - text: AI Proxy + url: /plugins/ai-proxy/ + +description: Configure the AI Proxy plugin to create a chat route. + +products: + - gateway + - ai-gateway + +works_on: + - on-prem + - konnect + +min_version: + gateway: '3.14' + +plugins: + - ai-proxy + +entities: + - service + - route + - plugin + +tags: + - ai + - openai + - deepseek + +tldr: + q: How do I use the AI Proxy plugin? + a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with a provider, model, and API key. + +tools: + - deck + +prereqs: + entities: + services: + - example-service + routes: + - example-route + +cleanup: + inline: + - title: Clean up Konnect environment + include_content: cleanup/platform/konnect + icon_url: /assets/icons/gateway.svg + - title: Destroy the {{site.base_gateway}} container + include_content: cleanup/products/gateway + icon_url: /assets/icons/gateway.svg +--- + +## Configure the plugin + +To set up AI Proxy specify a provider and a compatible model and set the appropriate authentication header and optionally an upstream URL. + +Additionally, you will need an API key from the upstream API provider. + +In this minimal example, we'll use the OpenAI provider and the `gpt-5.5` model: + +{% entity_examples %} +entities: + plugins: + - name: ai-proxy + config: + route_type: llm/v1/chat + auth: + header_name: Authorization + header_value: Bearer ${api_key} + model: + provider: openai + name: gpt-5.5 + options: + max_tokens: 512 + temperature: 1.0 +variables: + api_key: + value: $API_KEY +{% endentity_examples %} + +Further information can be found in the [AI proxy configuration reference](/plugins/ai-proxy-advanced/reference/). + +## Validate + +{% include how-tos/steps/ai-proxy-validate.md %} From 55a2236233a0fd6ac3e4ddb2263e73bb017c9a17 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 14:45:51 +0100 Subject: [PATCH 29/42] one topic for basic and advanced --- app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md index 9d772b7471..0af6e40339 100644 --- a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md +++ b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md @@ -18,9 +18,6 @@ works_on: - on-prem - konnect -min_version: - gateway: '3.14' - plugins: - ai-proxy @@ -86,7 +83,7 @@ variables: value: $API_KEY {% endentity_examples %} -Further information can be found in the [AI proxy configuration reference](/plugins/ai-proxy-advanced/reference/). +Further information can be found in the [AI Proxy configuration reference](/plugins/ai-proxy/reference/) or [AI Proxy Advanced configuration reference](/plugins/ai-proxy-advanced/reference/). ## Validate From d9166f0874f2ddb94a40eeb85143d16d57399f42 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 16:22:55 +0100 Subject: [PATCH 30/42] fix links --- app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md index 0af6e40339..aeac5b39bd 100644 --- a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md +++ b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md @@ -57,7 +57,7 @@ cleanup: ## Configure the plugin -To set up AI Proxy specify a provider and a compatible model and set the appropriate authentication header and optionally an upstream URL. +To set up AI Proxy specify a [provider](/ai-gateway/ai-providers/) and a compatible model and set the appropriate authentication header and optionally an upstream URL. Additionally, you will need an API key from the upstream API provider. From 9c08b5d3dbdfdcc401f3fc0ca0481935bbe82561 Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 16:37:40 +0100 Subject: [PATCH 31/42] add new standard links --- app/ai-gateway/ai-providers/anthropic.md | 7 ++++--- app/ai-gateway/ai-providers/azure.md | 7 ++++--- app/ai-gateway/ai-providers/bedrock.md | 7 ++++--- app/ai-gateway/ai-providers/cerebras.md | 7 ++++--- app/ai-gateway/ai-providers/cohere.md | 7 ++++--- app/ai-gateway/ai-providers/dashscope.md | 7 ++++--- app/ai-gateway/ai-providers/databricks.md | 5 +++-- app/ai-gateway/ai-providers/deepseek.md | 5 +++-- app/ai-gateway/ai-providers/gemini.md | 7 ++++--- app/ai-gateway/ai-providers/huggingface.md | 7 ++++--- app/ai-gateway/ai-providers/llama.md | 7 ++++--- app/ai-gateway/ai-providers/mistral.md | 7 ++++--- app/ai-gateway/ai-providers/ollama.md | 7 ++++--- app/ai-gateway/ai-providers/openai.md | 7 ++++--- app/ai-gateway/ai-providers/vertex.md | 7 ++++--- app/ai-gateway/ai-providers/vllm.md | 5 +++-- app/ai-gateway/ai-providers/xai.md | 7 ++++--- 17 files changed, 65 insertions(+), 48 deletions(-) diff --git a/app/ai-gateway/ai-providers/anthropic.md b/app/ai-gateway/ai-providers/anthropic.md index 464171b9c9..ebdd8faadb 100644 --- a/app/ai-gateway/ai-providers/anthropic.md +++ b/app/ai-gateway/ai-providers/anthropic.md @@ -61,7 +61,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -85,7 +85,8 @@ data: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/azure.md b/app/ai-gateway/ai-providers/azure.md index a3781fe31c..8c8d94321a 100644 --- a/app/ai-gateway/ai-providers/azure.md +++ b/app/ai-gateway/ai-providers/azure.md @@ -63,7 +63,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -93,7 +93,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/bedrock.md b/app/ai-gateway/ai-providers/bedrock.md index 3e5a6ab1f5..5cc8afdb3f 100644 --- a/app/ai-gateway/ai-providers/bedrock.md +++ b/app/ai-gateway/ai-providers/bedrock.md @@ -77,7 +77,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -109,7 +109,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/cerebras.md b/app/ai-gateway/ai-providers/cerebras.md index 5d1dc38996..308c6bb305 100644 --- a/app/ai-gateway/ai-providers/cerebras.md +++ b/app/ai-gateway/ai-providers/cerebras.md @@ -58,7 +58,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -86,7 +86,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/cohere.md b/app/ai-gateway/ai-providers/cohere.md index b9232593b8..77feadb52c 100644 --- a/app/ai-gateway/ai-providers/cohere.md +++ b/app/ai-gateway/ai-providers/cohere.md @@ -66,7 +66,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -94,7 +94,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/dashscope.md b/app/ai-gateway/ai-providers/dashscope.md index e76d2f1f8f..f182f2de49 100644 --- a/app/ai-gateway/ai-providers/dashscope.md +++ b/app/ai-gateway/ai-providers/dashscope.md @@ -59,7 +59,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -87,7 +87,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/databricks.md b/app/ai-gateway/ai-providers/databricks.md index 1b829edd36..20b9ede3cc 100644 --- a/app/ai-gateway/ai-providers/databricks.md +++ b/app/ai-gateway/ai-providers/databricks.md @@ -86,7 +86,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/deepseek.md b/app/ai-gateway/ai-providers/deepseek.md index 8687f2367c..bd2de45c5c 100644 --- a/app/ai-gateway/ai-providers/deepseek.md +++ b/app/ai-gateway/ai-providers/deepseek.md @@ -81,7 +81,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} diff --git a/app/ai-gateway/ai-providers/gemini.md b/app/ai-gateway/ai-providers/gemini.md index 1be9c766ab..ba965ef7ee 100644 --- a/app/ai-gateway/ai-providers/gemini.md +++ b/app/ai-gateway/ai-providers/gemini.md @@ -74,7 +74,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -100,7 +100,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/huggingface.md b/app/ai-gateway/ai-providers/huggingface.md index cf74ddbd56..5c3bd766a8 100644 --- a/app/ai-gateway/ai-providers/huggingface.md +++ b/app/ai-gateway/ai-providers/huggingface.md @@ -61,7 +61,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -86,7 +86,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/llama.md b/app/ai-gateway/ai-providers/llama.md index 8e9005becf..a3525e565d 100644 --- a/app/ai-gateway/ai-providers/llama.md +++ b/app/ai-gateway/ai-providers/llama.md @@ -59,7 +59,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -79,7 +79,8 @@ data: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/mistral.md b/app/ai-gateway/ai-providers/mistral.md index e994fab335..cde7562607 100644 --- a/app/ai-gateway/ai-providers/mistral.md +++ b/app/ai-gateway/ai-providers/mistral.md @@ -59,7 +59,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -87,7 +87,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/ollama.md b/app/ai-gateway/ai-providers/ollama.md index df163fa8b9..04eded734b 100644 --- a/app/ai-gateway/ai-providers/ollama.md +++ b/app/ai-gateway/ai-providers/ollama.md @@ -57,7 +57,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. +To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin. Here's a minimal configuration for chat completions: @@ -76,7 +76,8 @@ data: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/openai.md b/app/ai-gateway/ai-providers/openai.md index 662e3f2035..9888891242 100644 --- a/app/ai-gateway/ai-providers/openai.md +++ b/app/ai-gateway/ai-providers/openai.md @@ -59,7 +59,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -86,8 +86,9 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} diff --git a/app/ai-gateway/ai-providers/vertex.md b/app/ai-gateway/ai-providers/vertex.md index 0c93e20c5e..6e4cf17729 100644 --- a/app/ai-gateway/ai-providers/vertex.md +++ b/app/ai-gateway/ai-providers/vertex.md @@ -60,7 +60,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -94,8 +94,9 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) ## Authentication with GCP IAM diff --git a/app/ai-gateway/ai-providers/vllm.md b/app/ai-gateway/ai-providers/vllm.md index acdc15a2f2..25b7cb6ea6 100644 --- a/app/ai-gateway/ai-providers/vllm.md +++ b/app/ai-gateway/ai-providers/vllm.md @@ -73,5 +73,6 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) \ No newline at end of file +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) \ No newline at end of file diff --git a/app/ai-gateway/ai-providers/xai.md b/app/ai-gateway/ai-providers/xai.md index 81f75922bd..1c8378238a 100644 --- a/app/ai-gateway/ai-providers/xai.md +++ b/app/ai-gateway/ai-providers/xai.md @@ -61,7 +61,7 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure the [AI Proxy](/plugins/ai-proxy/) or [AI Proxy Advanced](/plugins/ai-proxy-advanced/). +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: @@ -89,7 +89,8 @@ variables: {:.success} > For more configuration options and examples, see: -> - [AI Proxy examples](/plugins/ai-proxy/examples/) -> - [AI Proxy Advanced examples](/plugins/ai-proxy-advanced/examples/) +> - [Set up a provider](/ai-gateway/entities/ai-provider/#set-up-a-provider) +> - [Set up a model](/ai-gateway/entities/ai-model/#set-up-a-model) +> - [How to set up a model with AI proxy](/how-to/set-up-a-model-with-ai-proxy) {% include plugins/ai-proxy/providers/how-tos.md %} \ No newline at end of file From 781caeaa85da1e8e455027d93aa12b78dc225e3b Mon Sep 17 00:00:00 2001 From: jbaross Date: Mon, 1 Jun 2026 16:59:49 +0100 Subject: [PATCH 32/42] remove old provider how tos --- .../set-up-a-model-with-ai-proxy.md | 4 +- ...set-up-ai-proxy-advanced-with-anthropic.md | 97 ------------- ...t-up-ai-proxy-advanced-with-aws-bedrock.md | 114 --------------- .../set-up-ai-proxy-advanced-with-cerebras.md | 107 -------------- .../set-up-ai-proxy-advanced-with-cohere.md | 111 --------------- ...set-up-ai-proxy-advanced-with-dashscope.md | 101 -------------- ...et-up-ai-proxy-advanced-with-databricks.md | 96 ------------- .../set-up-ai-proxy-advanced-with-deepseek.md | 95 ------------- .../set-up-ai-proxy-advanced-with-gemini.md | 130 ------------------ ...t-up-ai-proxy-advanced-with-huggingface.md | 100 -------------- ...t-up-ai-proxy-advanced-with-ollama-qwen.md | 89 ------------ .../set-up-ai-proxy-advanced-with-ollama.md | 90 ------------ .../set-up-ai-proxy-advanced-with-openai.md | 93 ------------- ...set-up-ai-proxy-advanced-with-vertex-ai.md | 108 --------------- .../set-up-ai-proxy-with-anthropic.md | 93 ------------- .../set-up-ai-proxy-with-aws-bedrock.md | 114 --------------- .../set-up-ai-proxy-with-cerebras.md | 106 -------------- .../ai-gateway/set-up-ai-proxy-with-cohere.md | 110 --------------- .../set-up-ai-proxy-with-dashscope.md | 100 -------------- .../set-up-ai-proxy-with-databricks.md | 95 ------------- .../set-up-ai-proxy-with-deepseek.md | 94 ------------- .../ai-gateway/set-up-ai-proxy-with-gemini.md | 128 ----------------- .../set-up-ai-proxy-with-huggingface.md | 99 ------------- .../set-up-ai-proxy-with-ollama-qwen.md | 88 ------------ .../ai-gateway/set-up-ai-proxy-with-ollama.md | 89 ------------ .../ai-gateway/set-up-ai-proxy-with-openai.md | 92 ------------- .../set-up-ai-proxy-with-vertex-ai.md | 106 -------------- app/ai-gateway/ai-providers/anthropic.md | 1 + app/ai-gateway/ai-providers/azure.md | 1 + app/ai-gateway/ai-providers/bedrock.md | 1 + app/ai-gateway/ai-providers/cerebras.md | 1 + app/ai-gateway/ai-providers/cohere.md | 1 + app/ai-gateway/ai-providers/dashscope.md | 1 + app/ai-gateway/ai-providers/databricks.md | 1 + app/ai-gateway/ai-providers/deepseek.md | 1 + app/ai-gateway/ai-providers/gemini.md | 1 + app/ai-gateway/ai-providers/huggingface.md | 1 + app/ai-gateway/ai-providers/kimi.md | 1 + app/ai-gateway/ai-providers/llama.md | 1 + app/ai-gateway/ai-providers/mistral.md | 1 + app/ai-gateway/ai-providers/ollama.md | 1 + app/ai-gateway/ai-providers/openai.md | 1 + app/ai-gateway/ai-providers/vercel.md | 1 + app/ai-gateway/ai-providers/vertex.md | 1 + app/ai-gateway/ai-providers/xai.md | 1 + 45 files changed, 19 insertions(+), 2648 deletions(-) delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-anthropic.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-aws-bedrock.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cerebras.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cohere.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-dashscope.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-databricks.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-deepseek.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-gemini.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-huggingface.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama-qwen.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-openai.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vertex-ai.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-anthropic.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-aws-bedrock.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-cerebras.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-cohere.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-dashscope.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-databricks.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-deepseek.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-gemini.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-huggingface.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama-qwen.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-openai.md delete mode 100644 app/_how-tos/ai-gateway/set-up-ai-proxy-with-vertex-ai.md diff --git a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md index aeac5b39bd..8637bebf23 100644 --- a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md +++ b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md @@ -28,9 +28,7 @@ entities: tags: - ai - - openai - - deepseek - + - ai-proxy tldr: q: How do I use the AI Proxy plugin? a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with a provider, model, and API key. diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-anthropic.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-anthropic.md deleted file mode 100644 index d0ea83cd94..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-anthropic.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Anthropic in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-anthropic/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Anthropic. - -products: - - gateway - - ai-gateway - - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.8' - - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - anthropic - -tldr: - q: How do I use the AI Proxy Advanced plugin with Anthropic? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin, configure it with the Anthropic provider, then add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Anthropic - include_content: prereqs/anthropic - icon_url: /assets/icons/anthropic.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - - ---- - -## Configure the plugin - -To set up AI Proxy Advanced with {{ site.anthropic }}, we need to specify the [model](https://docs.anthropic.com/en/docs/about-claude/models#model-names) and [{{ site.anthropic }} API version](https://docs.anthropic.com/en/api/versioning#version-history) to use. - -In this example, we'll use the {{ site.claude }} `claude-sonnet-4-5` model and version 2023-06-01 of the API: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: x-api-key - header_value: ${anthropic_api_key} - model: - provider: anthropic - name: claude-sonnet-4-5 - options: - anthropic_version: "2023-06-01" - max_tokens: 1024 -variables: - anthropic_api_key: - value: $ANTHROPIC_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-aws-bedrock.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-aws-bedrock.md deleted file mode 100644 index c78065b69c..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-aws-bedrock.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Set up AI Proxy Advanced with AWS Bedrock in {{site.base_gateway}}. -permalink: /how-to/set-up-ai-proxy-advanced-with-aws-bedrock/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using AWS Bedrock. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - aws-bedrock - -tldr: - q: How do I use the AI Proxy Advanced plugin with AWS Bedrock? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the AWS Bedrock provider and add the model and your AWS credentials. - -tools: - - deck - -prereqs: - inline: - - title: AWS credentials and Bedrock model access - content: | - Before you begin, you must have AWS credentials with Bedrock permissions: - - - **AWS Access Key ID**: Your AWS access key - - **AWS Secret Access Key**: Your AWS secret key - - **Region**: AWS region where Bedrock is available (for example, `us-east-1`) - - 1. Enable the chat model in the [AWS Bedrock console](https://console.aws.amazon.com/bedrock/) under **Model Access**. Navigate to **Bedrock** > **Model access** and request access to `meta.llama3-70b-instruct-v1:0`. - - 1. Export the required values as environment variables: - - ```sh - export DECK_AWS_ACCESS_KEY_ID="" - export DECK_AWS_SECRET_ACCESS_KEY="" - ``` - icon_url: /assets/icons/aws.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with AWS Bedrock, specify the model and set the authenticate using AWS credentials. - -In this example, we'll use the Meta Llama 3 70B Instruct model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - allow_override: false - aws_access_key_id: ${aws_access_key_id} - aws_secret_access_key: ${aws_secret_access_key} - model: - provider: bedrock - name: meta.llama3-70b-instruct-v1:0 - options: - bedrock: - aws_region: us-east-1 -variables: - aws_access_key_id: - value: $AWS_ACCESS_KEY_ID - aws_secret_access_key: - value: $AWS_SECRET_ACCESS_KEY -formats: - - deck -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cerebras.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cerebras.md deleted file mode 100644 index a5a1006c03..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cerebras.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Cerebras in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-cerebras/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Cerebras . - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - cerebras - -tldr: - q: How do I use the AI Proxy Advanced plugin with Cerebras? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Cerebras provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Cerebras - content: | - This tutorial uses Cerebras: - 1. [Create a Cerebras account](https://chat.cerebras.ai). - 1. Get an API key. - 1. Create a decK variable with the API key: - - ```sh - export DECK_CEREBRAS_API_KEY='YOUR CEREBRAS API KEY' - ``` - icon_url: /assets/icons/cerebras.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - ---- - -## Configure the plugin - -To set up AI Proxy Advanced with {{ site.cerebras }}, we need to specify the model to use. - -In this example, we'll use the gpt-oss-120b model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${cerebras_api_key} - model: - provider: cerebras - name: gpt-oss-120b - options: - max_tokens: 512 - temperature: 1.0 -variables: - cerebras_api_key: - value: $CEREBRAS_API_KEY - description: The API key to use to connect to Cerebras. -formats: - - deck -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cohere.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cohere.md deleted file mode 100644 index 3bc5b5eb89..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-cohere.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Cohere in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-cohere/ - -content_type: how_to - -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Cohere. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - cohere - -tldr: - q: How do I use the AI Proxy Advanced plugin with Cohere? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Cohere provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Cohere - include_content: prereqs/cohere - icon_url: /assets/icons/cohere.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with {{ site.cohere }}, configure API key authentication and specify the {{ site.cohere }} model to use. - -In this example, we'll use the {{ site.cohere }} command model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${cohere_api_key} - model: - provider: cohere - name: command-a-03-2025 - options: - max_tokens: 512 - temperature: 1.0 -variables: - cohere_api_key: - value: $COHERE_API_KEY -{% endentity_examples %} - - -## Validate -To validate, send a request to the Route: - -{% validation request-check %} -url: /anything -status_code: 200 -method: POST -headers: - - 'Accept: application/json' - - 'Content-Type: application/json' -body: - messages: - - role: "system" - content: "You are a mathematician." - - role: "user" - content: "What is 1+1?" -{% endvalidation %} \ No newline at end of file diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-dashscope.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-dashscope.md deleted file mode 100644 index 503bf46bc2..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-dashscope.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Set up AI Proxy Advanced with DashScope (Alibaba Cloud) in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-dashscope/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using DashScope (Alibaba Cloud). - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - dashscope - -tldr: - q: How do I use the AI Proxy Advanced plugin with DashScope (Alibaba Cloud)? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the DashScope (Alibaba Cloud) provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: DashScope - icon_url: /assets/icons/dashscope.svg - content: | - You need an active DashScope account with API access. Sign up at the [Alibaba Cloud DashScope platform](https://dashscope.aliyuncs.com/), obtain your API key from the API-KEY interface, and export it to your environment: - ```sh - export DECK_DASHSCOPE_API_KEY='YOUR DASHSCOPE API KEY' - ``` - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - -automated_tests: false ---- - -## Configure the plugin - -To set up AI Proxy Advanced with DashScope (Alibaba Cloud), specify the model and set the appropriate authentication header. - -In this example, we'll use the Qwen Plus model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${key} - model: - provider: dashscope - name: qwen-plus - options: - dashscope: - international: true - max_tokens: 512 - temperature: 1.0 -variables: - key: - value: $DASHSCOPE_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-databricks.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-databricks.md deleted file mode 100644 index 49988d5935..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-databricks.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Databricks -permalink: /how-to/set-up-ai-proxy-advanced-with-databricks/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Databricks - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - databricks - -tldr: - q: How do I use the AI Proxy Advanced plugin with Databricks? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Databricks provider, and the GPT OSS 20B model. - -tools: - - deck - -prereqs: - inline: - - title: Databricks - include_content: prereqs/databricks - icon_url: /assets/icons/databricks.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Configure the plugin with your Databricks workspace ID and the databricks-gpt-oss-20b model. - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${key} - model: - provider: databricks - name: databricks-gpt-oss-20b - options: - databricks: - workspace_instance_id: ${workspace} - -variables: - key: - value: "$DATABRICKS_TOKEN" - workspace: - value: "$DATABRICKS_WORKSPACE_INSTANCE_ID" -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-deepseek.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-deepseek.md deleted file mode 100644 index c484af71cf..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-deepseek.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Set up AI Proxy Advanced with DeepSeek in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-deepseek/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using DeepSeek. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - deepseek - -tldr: - q: How do I use the AI Proxy Advanced plugin with DeepSeek? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, a DeepSeek model, and your DeepSeek API key. - -tools: - - deck - -prereqs: - inline: - - title: DeepSeek - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with {{ site.deepseek }}, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `deepseek-chat` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: openai - name: deepseek-chat - options: - upstream_url: https://api.deepseek.com/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $DEEPSEEK_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-gemini.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-gemini.md deleted file mode 100644 index d5bb797f27..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-gemini.md +++ /dev/null @@ -1,130 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Gemini in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-gemini/ - -content_type: how_to - -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Gemini. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - gemini - -tldr: - q: How do I use the AI Proxy Advanced plugin with Gemini? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Gemini provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Gemini - content: | - - Before you begin, you must get the Gemini API key from Google Cloud: - - 1. Go to the Google Cloud Console. - 1. Select or create a project. - 1. Navigate to APIs & Services. - 1. In the APIs & Services sidebar, click Library. - 1. Search for “Generative Language API”. - 1. Click Gemini API. - 1. Click Enable. - 1. Navigate back to APIs & Services. - 1. In the APIs & Services sidebar, clickCredentials. - 1. From the Create Credentials dropdown menu, select API Key. - 1. Copy the generated API key. - 1. Export the API key as an environment variable: - - ```sh - export DECK_GEMINI_API_KEY="YOUR-GEMINI-API-KEY" - ``` - icon_url: /assets/icons/gcp.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with {{ site.gemini }}, configure API key authentication and specify the {{ site.gemini }} model to use. - -In this example, we use the `gemini-2.5-flash` model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - model: - provider: gemini - name: gemini-2.5-flash - auth: - param_name: key - param_value: ${gemini_api_key} - param_location: query - route_type: llm/v1/chat -variables: - gemini_api_key: - value: $GEMINI_API_KEY - description: The API key to use to connect to {{ site.gemini }}. -{% endentity_examples %} - - -## Validate -To validate, send a request to the Route: - -{% validation request-check %} -url: /anything -status_code: 200 -method: POST -headers: - - 'Accept: application/json' - - 'Content-Type: application/json' -body: - messages: - - role: "system" - content: "You are a mathematician." - - role: "user" - content: "What is 1+1?" -{% endvalidation %} - diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-huggingface.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-huggingface.md deleted file mode 100644 index b56c5bbb4e..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-huggingface.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -title: Set up AI Proxy Advanced with HuggingFace in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-huggingface/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using HuggingFace. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - huggingface - -tldr: - q: How do I use the AI Proxy Advanced plugin with HuggingFace? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the HuggingFace provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: HuggingFace - content: | - You need an active HuggingFace account with API access. Sign up at [HuggingFace](https://huggingface.co/) and obtain your API token from the [Access Tokens page](https://huggingface.co/settings/tokens). Ensure you have access to the HuggingFace Inference API, and export your token to your environment: - ```sh - export DECK_HUGGINGFACE_TOKEN='YOUR HUGGINGFACE API TOKEN' - ``` - icon_url: /assets/icons/huggingface.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - ---- - -## Configure the plugin - -To set up AI Proxy Advanced with HuggingFace, we need to specify the model to use. - -In this example, we'll use the Qwen3-4B-Instruct-2507 model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${huggingface_token} - model: - provider: huggingface - name: Qwen/Qwen3-4B-Instruct-2507 -variables: - huggingface_token: - value: $HUGGINGFACE_TOKEN - description: The token to use to connect to Hugging Face. -formats: - - deck -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama-qwen.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama-qwen.md deleted file mode 100644 index 9bb2eb8f49..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama-qwen.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Ollama and a Qwen model -permalink: /how-to/set-up-ai-proxy-advanced-with-ollama-qwen/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using the Ollama provider with a Qwen model. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - ollama - -tldr: - q: How do I use the AI Proxy Advanced plugin with Ollama and a Qwen model? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Ollama provider and the qwen3 model. - -tools: - - deck - -prereqs: - inline: - - title: Ollama - include_content: prereqs/ollama-qwen - icon_url: /assets/icons/ollama.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Set up the AI Proxy Advanced plugin to route chat requests to {{ site.ollama }}’s Qwen 3 model by configuring the model options, including the `upstream_url` pointing to your local {{ site.ollama }} instance: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - model: - provider: ollama - name: qwen3 - options: - upstream_url: ${ollama_upstream_url} -variables: - ollama_upstream_url: - value: $OLLAMA_UPSTREAM_URL -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama.md deleted file mode 100644 index a1767aae7e..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-ollama.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Ollama -permalink: /how-to/set-up-ai-proxy-advanced-with-ollama/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Ollama. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - llama - -tldr: - q: How do I use the AI Proxy Advanced plugin with Ollama? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Ollama provider, and the Llama2 model. - -tools: - - deck - -prereqs: - inline: - - title: Ollama - include_content: prereqs/ollama - icon_url: /assets/icons/ollama.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Set up the AI Proxy Advanced plugin to route chat requests to {{ site.ollama }}’s Llama2 model by configuring the model options, including the ollama format and the upstream_url pointing to your local {{ site.ollama }} instance. - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - model: - provider: llama2 - name: llama2 - options: - llama2_format: ollama - upstream_url: ${ollama_upstream_url} -variables: - ollama_upstream_url: - value: $OLLAMA_UPSTREAM_URL -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-openai.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-openai.md deleted file mode 100644 index e8aca051ec..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-openai.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Set up AI Proxy Advanced with OpenAI in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-openai/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using OpenAI. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - -tldr: - q: How do I use the AI Proxy Advanced plugin with OpenAI? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the OpenAI provider, the gpt-4o model, and your OpenAI API key. - -tools: - - deck - -prereqs: - inline: - - title: OpenAI - include_content: prereqs/openai - icon_url: /assets/icons/openai.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with OpenAI, specify the [model](https://platform.openai.com/docs/models) and set the appropriate authentication header. - -In this example, we'll use the GPT-4o model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${openai_api_key} - model: - provider: openai - name: gpt-4o - options: - max_tokens: 512 - temperature: 1.0 -variables: - openai_api_key: - value: $OPENAI_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vertex-ai.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vertex-ai.md deleted file mode 100644 index 9a6e490b8b..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-advanced-with-vertex-ai.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -title: Set up AI Proxy Advanced with Vertex AI in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-advanced-with-vertex-ai/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Vertex AI. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - vertex-ai - -tldr: - q: How do I use the AI Proxy Advanced plugin with Vertex AI? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Vertex AI provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Vertex AI - include_content: prereqs/vertex-ai - icon_url: /assets/icons/gcp.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy Advanced with Vertex AI, specify the model and set the appropriate authentication header. - -In this example, we'll use the {{ site.gemini }} 2.0 Flash Exp model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy-advanced - config: - targets: - - route_type: llm/v1/chat - model: - provider: gemini - name: gemini-2.0-flash-exp - options: - gemini: - api_endpoint: ${gcp_api_endpoint} - project_id: ${gcp_project_id} - location_id: ${gcp_location_id} - auth: - gcp_use_service_account: true - gcp_service_account_json: ${gcp_service_account_json} -variables: - gcp_project_id: - value: $GCP_PROJECT_ID - gcp_location_id: - value: $GCP_LOCATION_ID - gcp_service_account_json: - value: $GCP_SERVICE_ACCOUNT_JSON - literal_block: true - gcp_api_endpoint: - value: $GCP_API_ENDPOINT -formats: - - deck -{% endentity_examples %} - - - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-anthropic.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-anthropic.md deleted file mode 100644 index bf3450d63b..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-anthropic.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: Set up AI Proxy with Anthropic in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-anthropic/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Anthropic. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - anthropic - -tldr: - q: How do I use the AI Proxy plugin with Anthropic? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Anthropic provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Anthropic - include_content: prereqs/anthropic - icon_url: /assets/icons/anthropic.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - ---- - -## Configure the plugin - -To set up AI Proxy with {{ site.anthropic }} we need to specify the [model](https://docs.anthropic.com/en/docs/about-claude/models#model-names) and [{{ site.anthropic }} API version](https://docs.anthropic.com/en/api/versioning#version-history) to use. - -In this example, we'll use the {{ site.claude }} `claude-sonnet-4-5` model and version 2023-06-01 of the API: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: x-api-key - header_value: ${anthropic_api_key} - model: - provider: anthropic - name: claude-sonnet-4-5 - options: - anthropic_version: "2023-06-01" - max_tokens: 1024 -variables: - anthropic_api_key: - value: $ANTHROPIC_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-aws-bedrock.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-aws-bedrock.md deleted file mode 100644 index 863ef609a5..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-aws-bedrock.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: Set up AI Proxy with AWS Bedrock in {{site.base_gateway}}. -permalink: /how-to/set-up-ai-proxy-with-aws-bedrock/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using AWS Bedrock. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - aws-bedrock - -tldr: - q: How do I use the AI Proxy plugin with AWS Bedrock? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the AWS Bedrock provider and add the model and your AWS credentials. - -tools: - - deck - -prereqs: - inline: - - title: AWS credentials and Bedrock model access - content: | - Before you begin, you must have AWS credentials with Bedrock permissions: - - - **AWS Access Key ID**: Your AWS access key - - **AWS Secret Access Key**: Your AWS secret key - - **Region**: AWS region where Bedrock is available (for example, `us-east-1`) - - 1. Enable the chat model in the [AWS Bedrock console](https://console.aws.amazon.com/bedrock/) under **Model Access**. Navigate to **Bedrock** > **Model access** and request access to `meta.llama3-70b-instruct-v1:0`. - - 1. Export the required values as environment variables: - - ```sh - export DECK_AWS_ACCESS_KEY_ID="" - export DECK_AWS_SECRET_ACCESS_KEY="" - ``` - icon_url: /assets/icons/aws.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with AWS Bedrock, specify the model and set the authenticate using AWS credentials. - -In this example, we'll use the Meta Llama 3 70B Instruct model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - allow_override: false - aws_access_key_id: ${aws_access_key_id} - aws_secret_access_key: ${aws_secret_access_key} - model: - provider: bedrock - name: meta.llama3-70b-instruct-v1:0 - options: - bedrock: - aws_region: us-east-1 -variables: - aws_access_key_id: - value: $AWS_ACCESS_KEY_ID - aws_secret_access_key: - value: $AWS_SECRET_ACCESS_KEY -formats: - - deck -{% endentity_examples %} - - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cerebras.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cerebras.md deleted file mode 100644 index 1f31872392..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cerebras.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Set up AI Proxy with Cerebras in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-cerebras/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Cerebras . - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - cerebras - -tldr: - q: How do I use the AI Proxy Advanced plugin with Cerebras? - a: Create a Gateway Service and a Route, then enable the AI Proxy Advanced plugin and configure it with the Cerebras provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Cerebras - content: | - This tutorial uses Cerebras: - 1. [Create a Cerebras account](https://chat.cerebras.ai). - 1. Get an API key. - 1. Create a decK variable with the API key: - - ```sh - export DECK_CEREBRAS_API_KEY='YOUR CEREBRAS API KEY' - ``` - icon_url: /assets/icons/cerebras.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - ---- - -## Configure the plugin - -To set up AI Proxy with {{ site.cerebras }}, we need to specify the model to use. - -In this example, we'll use the gpt-oss-120b model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${cerebras_api_key} - model: - provider: cerebras - name: gpt-oss-120b - options: - max_tokens: 512 - temperature: 1.0 -variables: - cerebras_api_key: - value: $CEREBRAS_API_KEY - description: The API key to use to connect to Cerebras. -formats: - - deck -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cohere.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cohere.md deleted file mode 100644 index 93dd96ae67..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-cohere.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: Set up AI Proxy with Cohere in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-cohere/ - -content_type: how_to - -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Cohere. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - cohere - -tldr: - q: How do I use the AI Proxy plugin with Cohere? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Cohere provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Cohere - include_content: prereqs/cohere - icon_url: /assets/icons/cohere.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with {{ site.cohere }}, configure API key authentication and specify the {{ site.cohere }} model to use. - -In this example, we'll use the {{ site.cohere }} command model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${cohere_api_key} - model: - provider: cohere - name: command-a-03-2025 - options: - max_tokens: 512 - temperature: 1.0 -variables: - cohere_api_key: - value: $COHERE_API_KEY -{% endentity_examples %} - - -## Validate -To validate, send a request to the Route: - -{% validation request-check %} -url: /anything -status_code: 200 -method: POST -headers: - - 'Accept: application/json' - - 'Content-Type: application/json' -body: - messages: - - role: "system" - content: "You are a mathematician." - - role: "user" - content: "What is 1+1?" -{% endvalidation %} \ No newline at end of file diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-dashscope.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-dashscope.md deleted file mode 100644 index fa741c43d3..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-dashscope.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -title: Set up AI Proxy with DashScope (Alibaba Cloud) in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-dashscope/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using DashScope (Alibaba Cloud). - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - dashscope - -tldr: - q: How do I use the AI Proxy plugin with DashScope (Alibaba Cloud)? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the DashScope (Alibaba Cloud) provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: DashScope - icon_url: /assets/icons/dashscope.svg - content: | - You need an active DashScope account with API access. Sign up at the [Alibaba Cloud DashScope platform](https://dashscope.aliyuncs.com/), obtain your API key from the API-KEY interface, and export it to your environment: - ```sh - export DECK_DASHSCOPE_API_KEY='YOUR DASHSCOPE API KEY' - ``` - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - -automated_tests: false ---- - -## Configure the plugin - -To set up AI Proxy with DashScope (Alibaba Cloud), specify the model and set the appropriate authentication header. - -In this example, we'll use the Qwen Plus model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${key} - model: - provider: dashscope - name: qwen-plus - options: - dashscope: - international: true - max_tokens: 512 - temperature: 1.0 -variables: - key: - value: $DASHSCOPE_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-databricks.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-databricks.md deleted file mode 100644 index bc4442ee68..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-databricks.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Set up AI Proxy with Databricks -permalink: /how-to/set-up-ai-proxy-with-databricks/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Databricks - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - databricks - -tldr: - q: How do I use the AI Proxy plugin with Databricks? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Databricks provider, and the GPT OSS 20B model. - -tools: - - deck - -prereqs: - inline: - - title: Databricks - include_content: prereqs/databricks - icon_url: /assets/icons/databricks.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Configure the plugin with your Databricks workspace ID and the databricks-gpt-oss-20b model. - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${key} - model: - provider: databricks - name: databricks-gpt-oss-20b - options: - databricks: - workspace_instance_id: ${workspace} - -variables: - key: - value: "$DATABRICKS_TOKEN" - workspace: - value: "$DATABRICKS_WORKSPACE_INSTANCE_ID" -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-deepseek.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-deepseek.md deleted file mode 100644 index d6a953353c..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-deepseek.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Set up AI Proxy with DeepSeek in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-deepseek/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using DeepSeek. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - - deepseek - -tldr: - q: How do I use the AI Proxy plugin with DeepSeek? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider, a DeepSeek model, and your DeepSeek API key. - -tools: - - deck - -prereqs: - inline: - - title: DeepSeek - include_content: prereqs/deepseek - icon_url: /assets/icons/deepseek.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with {{ site.deepseek }}, use the `openai` provider, specify the [model](https://api-docs.deepseek.com/quick_start/pricing) and set the appropriate authentication header and upstream URL. - -In this example, we'll use the `deepseek-chat` model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${api_key} - model: - provider: openai - name: deepseek-chat - options: - upstream_url: https://api.deepseek.com/chat/completions - max_tokens: 512 - temperature: 1.0 -variables: - api_key: - value: $DEEPSEEK_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-gemini.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-gemini.md deleted file mode 100644 index 767b108e23..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-gemini.md +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Set up AI Proxy with Gemini in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-gemini/ - -content_type: how_to - -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Gemini. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - gemini - -tldr: - q: How do I use the AI Proxy plugin with Gemini? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Gemini provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Gemini - content: | - - Before you begin, you must get the Gemini API key from Google Cloud: - - 1. Go to the Google Cloud Console. - 1. Select or create a project. - 1. Navigate to APIs & Services. - 1. In the APIs & Services sidebar, click Library. - 1. Search for “Generative Language API”. - 1. Click Gemini API. - 1. Click Enable. - 1. Navigate back to APIs & Services. - 1. In the APIs & Services sidebar, clickCredentials. - 1. From the Create Credentials dropdown menu, select API Key. - 1. Copy the generated API key. - 1. Export the API key as an environment variable: - - ```sh - export DECK_GEMINI_API_KEY="YOUR-GEMINI-API-KEY" - ``` - icon_url: /assets/icons/gcp.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with {{ site.gemini }}, configure API key authentication and specify the {{ site.gemini }} model to use. - -In this example, we use the gemini-2.5-flash model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - param_name: key - param_value: ${gemini_api_key} - param_location: query - model: - provider: gemini - name: gemini-2.5-flash -variables: - gemini_api_key: - value: $GEMINI_API_KEY - description: The API key to use to connect to Gemini. -{% endentity_examples %} - - -## Validate -To validate, send a request to the Route: - -{% validation request-check %} -url: /anything -status_code: 200 -method: POST -headers: - - 'Accept: application/json' - - 'Content-Type: application/json' -body: - messages: - - role: "system" - content: "You are a mathematician." - - role: "user" - content: "What is 1+1?" -{% endvalidation %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-huggingface.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-huggingface.md deleted file mode 100644 index fbbef01cbf..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-huggingface.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: Set up AI Proxy with HuggingFace in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-huggingface/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using HuggingFace. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - huggingface - -tldr: - q: How do I use the AI Proxy plugin with HuggingFace? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the HuggingFace provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: HuggingFace - content: | - You need an active HuggingFace account with API access. Sign up at [HuggingFace](https://huggingface.co/) and obtain your API token from the [Access Tokens page](https://huggingface.co/settings/tokens). Ensure you have access to the HuggingFace Inference API, and export your token to your environment: - ```sh - export DECK_HUGGINGFACE_TOKEN='YOUR HUGGINGFACE API TOKEN' - ``` - icon_url: /assets/icons/huggingface.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg - ---- - -## Configure the plugin - -To set up AI Proxy with HuggingFace, we need to specify the model to use. - -In this example, we'll use the Qwen3-4B-Instruct-2507 model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${huggingface_token} - model: - provider: huggingface - name: Qwen/Qwen3-4B-Instruct-2507 -variables: - huggingface_token: - value: $HUGGINGFACE_TOKEN - description: The token to use to connect to Hugging Face. -formats: - - deck -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama-qwen.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama-qwen.md deleted file mode 100644 index c75b53d800..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama-qwen.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: Set up AI Proxy with Ollama and a Qwen model -permalink: /how-to/set-up-ai-proxy-with-ollama-qwen/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using the Ollama provider with a Qwen model. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.14' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - ollama - -tldr: - q: How do I use the AI Proxy plugin with Ollama and a Qwen model? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Ollama provider and the Qwen 3 model. - -tools: - - deck - -prereqs: - inline: - - title: Ollama - include_content: prereqs/ollama-qwen - icon_url: /assets/icons/ollama.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Set up the AI Proxy plugin to route chat requests to {{ site.ollama }}’s Qwen 3 model by configuring the model options, including the `upstream_url` pointing to your local {{ site.ollama }} instance: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - model: - provider: ollama - name: qwen3 - options: - upstream_url: ${ollama_upstream_url} -variables: - ollama_upstream_url: - value: $OLLAMA_UPSTREAM_URL -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama.md deleted file mode 100644 index b26a1b700b..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-ollama.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: Set up AI Proxy with Ollama -permalink: /how-to/set-up-ai-proxy-with-ollama/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy Advanced plugin to create a chat route using Ollama. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy-advanced - -entities: - - service - - route - - plugin - -tags: - - ai - - llama - -tldr: - q: How do I use the AI Proxy plugin with Ollama? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Ollama provider, and the llama2 model. - -tools: - - deck - -prereqs: - inline: - - title: Ollama - include_content: prereqs/ollama - icon_url: /assets/icons/ollama.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -Set up the AI Proxy plugin to route chat requests to {{ site.ollama }}’s Llama2 model by configuring the model options, including the ollama format and the `upstream_url` pointing to your local {{ site.ollama }} instance. - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - model: - provider: llama2 - name: llama2 - options: - llama2_format: ollama - upstream_url: ${ollama_upstream_url} -variables: - ollama_upstream_url: - value: $OLLAMA_UPSTREAM_URL -{% endentity_examples %} - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-openai.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-openai.md deleted file mode 100644 index 5d62972ae5..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-openai.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -title: Set up AI Proxy with OpenAI in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-openai/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using OpenAI. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - openai - -tldr: - q: How do I use the AI Proxy plugin with OpenAI? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the OpenAI provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: OpenAI - include_content: prereqs/openai - icon_url: /assets/icons/openai.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with OpenAI, specify the [model](https://platform.openai.com/docs/models) and set the appropriate authentication header. - -In this example, we'll use the gpt-4o model: - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - auth: - header_name: Authorization - header_value: Bearer ${openai_api_key} - model: - provider: openai - name: gpt-4o - options: - max_tokens: 512 - temperature: 1.0 -variables: - openai_api_key: - value: $OPENAI_API_KEY -{% endentity_examples %} - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vertex-ai.md b/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vertex-ai.md deleted file mode 100644 index 0ad5fce36f..0000000000 --- a/app/_how-tos/ai-gateway/set-up-ai-proxy-with-vertex-ai.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: Set up AI Proxy with Vertex AI in {{site.base_gateway}} -permalink: /how-to/set-up-ai-proxy-with-vertex-ai/ -content_type: how_to -related_resources: - - text: "{{site.ai_gateway}}" - url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ - -description: Configure the AI Proxy plugin to create a chat route using Vertex AI. - -products: - - gateway - - ai-gateway - -works_on: - - on-prem - - konnect - -min_version: - gateway: '3.6' - -plugins: - - ai-proxy - -entities: - - service - - route - - plugin - -tags: - - ai - - vertex-ai - -tldr: - q: How do I use the AI Proxy plugin with Vertex AI? - a: Create a Gateway Service and a Route, then enable the AI Proxy plugin and configure it with the Vertex AI provider and add the model and your API key. - -tools: - - deck - -prereqs: - inline: - - title: Vertex AI - include_content: prereqs/vertex-ai - icon_url: /assets/icons/gcp.svg - entities: - services: - - example-service - routes: - - example-route - -cleanup: - inline: - - title: Clean up Konnect environment - include_content: cleanup/platform/konnect - icon_url: /assets/icons/gateway.svg - - title: Destroy the {{site.base_gateway}} container - include_content: cleanup/products/gateway - icon_url: /assets/icons/gateway.svg ---- - -## Configure the plugin - -To set up AI Proxy with Vertex AI, specify the model and set the appropriate authentication header. - -In this example, we'll use the {{ site.gemini }} 2.0 Flash Exp model: - - -{% entity_examples %} -entities: - plugins: - - name: ai-proxy - config: - route_type: llm/v1/chat - model: - provider: gemini - name: gemini-2.0-flash-exp - options: - gemini: - api_endpoint: ${gcp_api_endpoint} - project_id: ${gcp_project_id} - location_id: ${gcp_location_id} - auth: - gcp_use_service_account: true - gcp_service_account_json: ${gcp_service_account_json} -variables: - gcp_project_id: - value: $GCP_PROJECT_ID - gcp_location_id: - value: $GCP_LOCATION_ID - gcp_service_account_json: - value: $GCP_SERVICE_ACCOUNT_JSON - literal_block: true - gcp_api_endpoint: - value: $GCP_API_ENDPOINT -formats: - - deck -{% endentity_examples %} - - - -## Validate - -{% include how-tos/steps/ai-proxy-validate.md %} diff --git a/app/ai-gateway/ai-providers/anthropic.md b/app/ai-gateway/ai-providers/anthropic.md index ebdd8faadb..104e33d9f0 100644 --- a/app/ai-gateway/ai-providers/anthropic.md +++ b/app/ai-gateway/ai-providers/anthropic.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - anthropic + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/azure.md b/app/ai-gateway/ai-providers/azure.md index 8c8d94321a..324af89588 100644 --- a/app/ai-gateway/ai-providers/azure.md +++ b/app/ai-gateway/ai-providers/azure.md @@ -55,6 +55,7 @@ how_to_list: - ai-gateway tags: - azure + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/bedrock.md b/app/ai-gateway/ai-providers/bedrock.md index 5cc8afdb3f..8181e1f811 100644 --- a/app/ai-gateway/ai-providers/bedrock.md +++ b/app/ai-gateway/ai-providers/bedrock.md @@ -65,6 +65,7 @@ how_to_list: - ai-gateway tags: - bedrock + - ai-proxy description: true view_more: false diff --git a/app/ai-gateway/ai-providers/cerebras.md b/app/ai-gateway/ai-providers/cerebras.md index 308c6bb305..a9068af281 100644 --- a/app/ai-gateway/ai-providers/cerebras.md +++ b/app/ai-gateway/ai-providers/cerebras.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - cerebras + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/cohere.md b/app/ai-gateway/ai-providers/cohere.md index 77feadb52c..50091344ab 100644 --- a/app/ai-gateway/ai-providers/cohere.md +++ b/app/ai-gateway/ai-providers/cohere.md @@ -55,6 +55,7 @@ how_to_list: - ai-gateway tags: - cohere + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/dashscope.md b/app/ai-gateway/ai-providers/dashscope.md index f182f2de49..6da20c0d26 100644 --- a/app/ai-gateway/ai-providers/dashscope.md +++ b/app/ai-gateway/ai-providers/dashscope.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - dashscope + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/databricks.md b/app/ai-gateway/ai-providers/databricks.md index 20b9ede3cc..822545ac3c 100644 --- a/app/ai-gateway/ai-providers/databricks.md +++ b/app/ai-gateway/ai-providers/databricks.md @@ -48,6 +48,7 @@ how_to_list: - ai-gateway tags: - databricks + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/deepseek.md b/app/ai-gateway/ai-providers/deepseek.md index bd2de45c5c..e8dcf857da 100644 --- a/app/ai-gateway/ai-providers/deepseek.md +++ b/app/ai-gateway/ai-providers/deepseek.md @@ -48,6 +48,7 @@ how_to_list: - ai-gateway tags: - deepseek + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/gemini.md b/app/ai-gateway/ai-providers/gemini.md index ba965ef7ee..2bdd659d0b 100644 --- a/app/ai-gateway/ai-providers/gemini.md +++ b/app/ai-gateway/ai-providers/gemini.md @@ -64,6 +64,7 @@ how_to_list: - ai-gateway tags: - gemini + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/huggingface.md b/app/ai-gateway/ai-providers/huggingface.md index 5c3bd766a8..4af5418547 100644 --- a/app/ai-gateway/ai-providers/huggingface.md +++ b/app/ai-gateway/ai-providers/huggingface.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - huggingface + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 65f14e4560..835d732a56 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -48,6 +48,7 @@ how_to_list: - ai-gateway tags: - kimi + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/llama.md b/app/ai-gateway/ai-providers/llama.md index a3525e565d..f87bace7c7 100644 --- a/app/ai-gateway/ai-providers/llama.md +++ b/app/ai-gateway/ai-providers/llama.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - llama + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/mistral.md b/app/ai-gateway/ai-providers/mistral.md index cde7562607..e25114b749 100644 --- a/app/ai-gateway/ai-providers/mistral.md +++ b/app/ai-gateway/ai-providers/mistral.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - mistral + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/ollama.md b/app/ai-gateway/ai-providers/ollama.md index 04eded734b..a707d131bb 100644 --- a/app/ai-gateway/ai-providers/ollama.md +++ b/app/ai-gateway/ai-providers/ollama.md @@ -48,6 +48,7 @@ how_to_list: - ai-gateway tags: - ollama + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/openai.md b/app/ai-gateway/ai-providers/openai.md index 9888891242..80c5ca53d7 100644 --- a/app/ai-gateway/ai-providers/openai.md +++ b/app/ai-gateway/ai-providers/openai.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - openai + - ai-proxy description: true view_more: false diff --git a/app/ai-gateway/ai-providers/vercel.md b/app/ai-gateway/ai-providers/vercel.md index 36f13930e3..4fbebef665 100644 --- a/app/ai-gateway/ai-providers/vercel.md +++ b/app/ai-gateway/ai-providers/vercel.md @@ -48,6 +48,7 @@ how_to_list: - ai-gateway tags: - vercel + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/vertex.md b/app/ai-gateway/ai-providers/vertex.md index 6e4cf17729..d2e5808134 100644 --- a/app/ai-gateway/ai-providers/vertex.md +++ b/app/ai-gateway/ai-providers/vertex.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - vertex-ai + - ai-proxy description: true view_more: false --- diff --git a/app/ai-gateway/ai-providers/xai.md b/app/ai-gateway/ai-providers/xai.md index 1c8378238a..fd9cfeddb9 100644 --- a/app/ai-gateway/ai-providers/xai.md +++ b/app/ai-gateway/ai-providers/xai.md @@ -50,6 +50,7 @@ how_to_list: - ai-gateway tags: - xai + - ai-proxy description: true view_more: false --- From f6002de6a9f1b19501ff6744f11ed81de701a9a7 Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 2 Jun 2026 15:29:14 +0100 Subject: [PATCH 33/42] test new entity examples --- app/ai-gateway/ai-providers/anthropic.md | 64 ++++++++++++++++++++ app/ai-gateway/ai-providers/kimi.md | 76 ++++++++++++++++++------ 2 files changed, 122 insertions(+), 18 deletions(-) diff --git a/app/ai-gateway/ai-providers/anthropic.md b/app/ai-gateway/ai-providers/anthropic.md index 104e33d9f0..32e19b9e3c 100644 --- a/app/ai-gateway/ai-providers/anthropic.md +++ b/app/ai-gateway/ai-providers/anthropic.md @@ -62,6 +62,70 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/) as follows: + +{% entity_example %} +type: provider +data: + display_name: Anthropic + name: my-anthropic-account + type: anthropic + config: + auth: + type: basic + headers: + - name: Authorization + value: Bearer +{% endentity_example %} + +You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }} as follows: + +{% entity_example %} +type: model +data: + display_name: Claude Sonnet + name: claude-sonnet-production + type: model + enabled: true + capabilities: + - chat + - responses + formats: + - type: anthropic + acls: + allow: + - internal-teams + deny: [] + policies: [] + target_models: + - name: claude-sonnet-4-6 + provider: + name: my-anthropic-account + config: + temperature: 0.7 + max_tokens: 4096 + input_cost: 0.0000025 + output_cost: 0.000010 + config: + logging: + statistics: true + payloads: false + response_streaming: allow + max_request_body_size: 1048576 + model: + name_header: true + balancer: + algorithm: round-robin + retries: 3 + connect_timeout: 60000 + read_timeout: 60000 + write_timeout: 60000 +{% endentity_example %} + + +--- + + To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. Here's a minimal configuration for chat completions: diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 835d732a56..43d81e411c 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -9,8 +9,13 @@ breadcrumbs: permalink: /ai-gateway/ai-providers/kimi/ +min_version: + ai-gateway: '2.0.0' +schema: + api: konnect/ai-gateway + path: /schemas/AIGatewayModel + works_on: - - on-prem - konnect products: @@ -31,9 +36,6 @@ plugins: - ai-proxy-advanced - ai-proxy -min_version: - gateway: '2.0.0' - related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ @@ -58,24 +60,62 @@ how_to_list: ## Configure {{ provider.name }} with AI Proxy -To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/). You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }}. - -Here's a minimal configuration for chat completions: +To use {{ provider.name }} with {{site.ai_gateway}}, configure a new [provider](/ai-gateway/entities/ai-provider/) as follows: {% entity_example %} -type: plugin +type: provider data: - name: ai-proxy + display_name: Kimi AI + name: my-kimi-account + type: kimi config: - route_type: llm/v1/chat auth: - header_name: Authorization - header_value: Bearer ${key} - model: - provider: kimi - name: kimi-k2.6 + type: basic + headers: + - name: Authorization + value: Bearer +{% endentity_example %} + +You can then access supported [models](/ai-gateway/entities/ai-model/) from {{ provider.name }} as follows: -variables: - key: - value: "$MOONSHOT_API_KEY" +{% entity_example %} +type: model +data: + display_name: kimi k2.6 Production + name: kimi-k2.6-production + type: model + enabled: true + capabilities: + - chat + - responses + formats: + - type: openai + acls: + allow: + - internal-teams + deny: [] + policies: [] + target_models: + - name: kimi-k2.6 + provider: + name: my-kimi-account + config: + temperature: 0.7 + max_tokens: 4096 + input_cost: 0.0000025 + output_cost: 0.000010 + config: + logging: + statistics: true + payloads: false + response_streaming: allow + max_request_body_size: 1048576 + model: + name_header: true + balancer: + algorithm: round-robin + retries: 3 + connect_timeout: 60000 + read_timeout: 60000 + write_timeout: 60000 {% endentity_example %} From 81c33f154a6d3588a4960b6fd6eebc542f156338 Mon Sep 17 00:00:00 2001 From: jbaross Date: Tue, 2 Jun 2026 16:00:26 +0100 Subject: [PATCH 34/42] test new entity examples --- app/ai-gateway/ai-providers/kimi.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/app/ai-gateway/ai-providers/kimi.md b/app/ai-gateway/ai-providers/kimi.md index 43d81e411c..373cd26577 100644 --- a/app/ai-gateway/ai-providers/kimi.md +++ b/app/ai-gateway/ai-providers/kimi.md @@ -11,31 +11,25 @@ permalink: /ai-gateway/ai-providers/kimi/ min_version: ai-gateway: '2.0.0' + schema: api: konnect/ai-gateway path: /schemas/AIGatewayModel works_on: - - konnect + - konnect + +tools: + - deck + - konnect-api products: - gateway - ai-gateway -tools: - - admin-api - - konnect-api - - deck - - kic - - terraform - tags: - ai -plugins: - - ai-proxy-advanced - - ai-proxy - related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ From ee5814bbc25118e699d5567fae8dc81ab3a7db66 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 06:34:16 +0200 Subject: [PATCH 35/42] update agent and mcp server entities --- app/_ai_gateway_entities/ai-agent.md | 6 ++- app/_ai_gateway_entities/ai-mcp-server.md | 46 ++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/app/_ai_gateway_entities/ai-agent.md b/app/_ai_gateway_entities/ai-agent.md index 9ffd7b9cb8..0348b626ad 100644 --- a/app/_ai_gateway_entities/ai-agent.md +++ b/app/_ai_gateway_entities/ai-agent.md @@ -107,6 +107,10 @@ When an Agent has type `a2a`, proxied traffic is processed in four phases: Non-A2A traffic, and traffic to `http` Agents, is proxied without these steps. +## Routing configuration + +Beyond the `url` field, Agents can define HTTP routing rules through `config.route`. This allows you to match requests by method, path, host, and other HTTP patterns. Use `route` when you need fine-grained control over which traffic reaches the Agent. If only a URL is needed, the `url` field is simpler. + {% mermaid %} sequenceDiagram @@ -301,7 +305,7 @@ data: logging: statistics: true payloads: false - max_payload_size: 524288 + max_payload_size: 1048576 {% endentity_example %} ## Schema diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md index 6257e9156c..3d9de073c0 100644 --- a/app/_ai_gateway_entities/ai-mcp-server.md +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -39,13 +39,14 @@ faqs: The MCP runtime behind an MCP Server entity speaks MCP protocol version `2025-06-18`. Upstream MCP servers may run `2025-06-18` or `2025-11-25`. Versions from 2024 are not supported. - - q: What's the difference between the four server types? + - q: What's the difference between the server types? a: | `passthrough-listener` proxies MCP traffic to an upstream MCP server without converting tools. `conversion-listener` converts a RESTful API into MCP tools and accepts MCP requests on the same Route. `conversion-only` defines a tool library that other MCP Servers reference by tag but doesn't accept incoming MCP traffic itself. `listener` aggregates tools from one or more - `conversion-only` MCP Servers into a single MCP endpoint. + `conversion-only` MCP Servers into a single MCP endpoint. `upstream-server` registers a real + MCP server into an aggregation pool, dynamically fetching its tools for a `listener` to aggregate. - q: Can the same Consumer's identity gate access to specific tools? a: | @@ -134,7 +135,7 @@ rows: ## Server modes -The `type` field selects one of four modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. +The `type` field selects one of five modes. Each mode determines how the runtime handles MCP requests and whether it converts RESTful APIs into MCP tools. {% table %} @@ -174,13 +175,48 @@ rows: - mode: "`listener`" description: | Similar to `conversion-listener`, but instead of defining its own tools, it binds tools - from one or more `conversion-only` MCP Servers through `config.server.tag`. + from one or more `conversion-only` or `upstream-server` MCP Servers through `config.server.tag`. usecase: | - A single MCP endpoint that aggregates tools from multiple `conversion-only` MCP Servers. + A single MCP endpoint that aggregates tools from multiple `conversion-only` or `upstream-server` MCP Servers. Typical in multi-service or multi-team environments that expose a unified MCP interface. + - mode: "`upstream-server`" + description: | + Registers a real MCP server into an aggregation pool. Dynamically fetches the upstream's + tool list and caches it. Works together with a `listener` MCP Server that uses shared tags + to aggregate tools. Supports optional OAuth2 authentication to fetch tool lists from the upstream. + usecase: | + Expose an existing upstream MCP server's tools alongside others through a single `listener` + endpoint. The listener aggregates all tagged upstreams, so adding a new upstream is just + deploying a new `upstream-server` with matching tags. {% endtable %} +## Tool aggregation with upstream-server + +When using `listener` with `upstream-server` MCP Servers, the runtime aggregates tools from all upstreams that share the listener's tag. This pattern centralizes tool discovery and management for agents while keeping upstream services decoupled. + +### How aggregation works + +1. **Tags connect upstreams to listeners**: Set `config.server.tag` on the listener (e.g., `my-tools`). Set the same tag on every `upstream-server` MCP Server you want included. Any upstream with matching tags gets pulled into the aggregation. + +2. **Tool discovery**: When an MCP client calls `tools/list`, the listener fetches tool lists from every tagged upstream. If an upstream requires authentication, configure `config.server.tools_list_auth` with OAuth2 credentials so the listener can fetch its tools. + +3. **Tool caching**: Each `upstream-server` caches its tool list for the duration specified by `config.tools_cache_ttl_seconds`. Set to `0` to fetch fresh on every client request. + +4. **Tool name disambiguation**: If two upstreams expose tools with the same name, the listener prepends the service name to avoid collisions (e.g., `weather-service/get-forecast`). Disable this with `config.server.preserve_upstream_tool_names: true` if you're sure names won't collide. + +5. **Tool invocation**: When a client calls a tool, the listener routes the request to whichever upstream registered it. From the client's perspective, it's one call to one URL. + +### Upstream authentication + +By default, the listener connects to upstreams without credentials. If an upstream MCP server requires authentication: + +- Set `config.server.tools_list_auth` on the `upstream-server` plugin with OAuth2 client-credentials configuration +- Kong fetches a token from your identity provider when first needed, caches it, and refreshes it when it expires +- The token is used only when fetching the upstream's tool list; it's separate from agent authentication + +This allows different upstreams to use different credentials, managed centrally by Kong. + ## How MCP traffic flows For `conversion-listener`, `conversion-only`, and `listener` modes, the runtime converts MCP requests into HTTP calls and wraps the responses back in MCP format: From 083b8350bf3b3152cb7de83f53a2d1e689de9202 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 06:52:52 +0200 Subject: [PATCH 36/42] update mcp server --- app/_ai_gateway_entities/ai-mcp-server.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/_ai_gateway_entities/ai-mcp-server.md b/app/_ai_gateway_entities/ai-mcp-server.md index 3d9de073c0..6df3018546 100644 --- a/app/_ai_gateway_entities/ai-mcp-server.md +++ b/app/_ai_gateway_entities/ai-mcp-server.md @@ -214,8 +214,11 @@ By default, the listener connects to upstreams without credentials. If an upstre - Set `config.server.tools_list_auth` on the `upstream-server` plugin with OAuth2 client-credentials configuration - Kong fetches a token from your identity provider when first needed, caches it, and refreshes it when it expires - The token is used only when fetching the upstream's tool list; it's separate from agent authentication +- Different upstreams can use different credentials, managed centrally by Kong -This allows different upstreams to use different credentials, managed centrally by Kong. +### Header forwarding + +When the listener routes tool calls to an upstream, it can forward request headers from the original MCP client. Set `config.server.forward_client_headers: true` on the `listener` or `upstream-server` to pass through headers like authentication or context information. This allows upstreams to see the client's original request context. ## How MCP traffic flows From 8e1ceae77cd576b0345c73d1d0c907475c077c6f Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Wed, 3 Jun 2026 10:16:47 +0200 Subject: [PATCH 37/42] Remove on-prem mentions --- app/_ai_gateway_entities/ai-gateway.md | 6 +++--- app/_ai_gateway_entities/ai-model.md | 6 +++--- app/_ai_gateway_entities/ai-vault.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/_ai_gateway_entities/ai-gateway.md b/app/_ai_gateway_entities/ai-gateway.md index ae0e57d47d..b2238888cb 100644 --- a/app/_ai_gateway_entities/ai-gateway.md +++ b/app/_ai_gateway_entities/ai-gateway.md @@ -60,9 +60,9 @@ faqs: - q: Is the {{site.ai_gateway}} entity available on-prem? a: | - No. The {{site.ai_gateway}} entity is a {{site.konnect_short_name}} concept. On-prem deployments - manage the same child entities (Models, Providers, Policies, and so on) directly through - the Admin API, without a parent `ai-gateways/{id}` container. + No. {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). + See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. --- ## What is an {{site.ai_gateway}}? diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md index 039e28e240..043c141304 100644 --- a/app/_ai_gateway_entities/ai-model.md +++ b/app/_ai_gateway_entities/ai-model.md @@ -38,7 +38,7 @@ related_resources: faqs: - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? a: | - A Model entity is the first-class {{site.ai_gateway}} entity you declare through the `/ai/models` API or {{site.konnect_short_name}}. + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API, UI, or decK. {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. You don't configure the underlying plugin directly. @@ -350,13 +350,13 @@ For per-request authentication and identity, configure the appropriate authentic Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. -A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. On-prem also supports the nested endpoint `/ai/models/{modelId}/policies`, which creates and attaches a Policy in one call. +A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. Not every plugin type is valid as a Model Policy. -Policies created through the nested on-prem endpoint (`POST /ai/models/{modelId}/policies`) are deleted when the Model is deleted. Policies created independently (for example, at `/v1/ai-gateways/{aiGatewayId}/policies` or `/ai/policies`) are not deleted when the Model is deleted; only the Model's reference is removed. +Policies attached to a Model are not deleted when the Model is deleted; only the Model's reference is removed. For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/) reference. diff --git a/app/_ai_gateway_entities/ai-vault.md b/app/_ai_gateway_entities/ai-vault.md index 2f15006b56..04169c1946 100644 --- a/app/_ai_gateway_entities/ai-vault.md +++ b/app/_ai_gateway_entities/ai-vault.md @@ -35,7 +35,7 @@ faqs: a: | The runtime entity is the same secret-management abstraction. The {{site.ai_gateway}} surface manages Vaults through the AI entity convention (`display_name`, `name`, `description`, - `labels`) and exposes them at the `/ai/vaults` API alongside the other AI entities. + `labels`) and exposes them through the {{site.konnect_short_name}} API alongside the other AI entities. - q: Which secret backends are supported? a: | From 306e12092d2e66efaaac005bea61c4ca71ad661c Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 07:59:47 +0200 Subject: [PATCH 38/42] feat(ai-gateway): Update load balancing capabilities documentation for AI Gateway 2.0 (#5308) --- app/_ai_gateway_entities/ai-model.md | 90 ++++++++---------- app/_data/entity_examples/config.yml | 5 + app/ai-gateway/load-balancing.md | 135 ++++++++++++++++----------- 3 files changed, 125 insertions(+), 105 deletions(-) diff --git a/app/_ai_gateway_entities/ai-model.md b/app/_ai_gateway_entities/ai-model.md index 043c141304..ccdf4d1dff 100644 --- a/app/_ai_gateway_entities/ai-model.md +++ b/app/_ai_gateway_entities/ai-model.md @@ -18,14 +18,13 @@ schema: works_on: - konnect tools: - - deck - konnect-api related_resources: - text: About {{site.ai_gateway}} url: /ai-gateway/ - text: "{{site.ai_gateway}} providers" url: /ai-gateway/ai-providers/ - - text: Load balancing with AI Proxy Advanced + - text: Load balancing url: /ai-gateway/load-balancing/ - text: Provider entity url: /ai-gateway/entities/ai-provider/ @@ -36,22 +35,22 @@ related_resources: - text: Consumer Group entity url: /ai-gateway/entities/ai-consumer-group/ faqs: - - q: What's the difference between a Model entity and a `model` field inside a plugin configuration? + - q: What's the difference between a Model entity and the `model` field in a Policy configuration? a: | - A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API, UI, or decK. - {{site.ai_gateway}} derives the underlying plugin and its `model` configuration from the entity. - You don't configure the underlying plugin directly. + A Model entity is the first-class {{site.ai_gateway}} entity you declare through the {{site.konnect_short_name}} API and UI. + It defines routing, capabilities, and load balancing. A Policy is a reusable configuration that adds behavior (like caching or guardrails) to a Model. + You declare both separately and attach Policies to Models. - - q: Can I edit the Service, Routes, or plugins that {{site.ai_gateway}} generates from a Model? + - q: Can I edit the Service or Routes that {{site.ai_gateway}} generates from a Model? a: | No. Generated primitives are protected from direct modification through the standard Admin API. Update the Model entity instead, and {{site.ai_gateway}} recreates the underlying primitives within a single transaction. - - q: How do I configure models in on-prem deployments? - a: | - {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. - For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} plugins directly (for example, the AI Proxy plugin). - See the [{{site.base_gateway}} plugin catalog](/gateway/plugins/) for available AI-related plugins. + # - q: How do I configure models in on-prem deployments? + # a: | + # {{site.ai_gateway}} entities are available only in {{site.konnect_short_name}}. + # For on-prem deployments, configure AI proxy behavior using {{site.base_gateway}} directly through its plugin interface. + # See the [{{site.base_gateway}} documentation](/gateway/) for available AI-related capabilities. - q: What happens when I update a Model? a: | @@ -60,7 +59,7 @@ faqs: - q: What happens when I delete a Model? a: | - The Model and all its derived primitives (Service, Routes, plugin instances) are deleted within a single transaction. + The Model and all its derived primitives (Service, Routes) are deleted within a single transaction. - q: Can I apply the same configuration to multiple Models? a: | @@ -81,7 +80,7 @@ faqs: - q: Can a client override the model name from the request body? a: | By default, no. The request `model` field must match the upstream model on one of the Model's targets, otherwise the runtime returns a `400` error. - To accept a client-side alias, set `config.model.alias` on the Model and clients can send the alias value in the request `model` field instead of the upstream provider model name. + To accept a client-side alias, set [`config.target_models[].model.alias`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-model-alias) on each target. Clients can then send the alias value in the request `model` field instead of the upstream provider model name. See [Request routing by model alias](/ai-gateway/load-balancing/#request-routing-by-model-alias) for details and examples. - q: Can a client override `temperature`, `top_p`, or `top_k` from the request? a: | @@ -101,9 +100,9 @@ faqs: A Model is a first-class {{site.ai_gateway}} entity that represents an AI model endpoint exposed through {{site.ai_gateway}}. -A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services, Routes, or plugin entries by hand. +A Model declares which capabilities it exposes (such as `chat`, `responses`, or `embeddings`), which upstream provider models it routes to, and how requests are load-balanced and logged. {{site.ai_gateway}} translates a Model into the underlying primitives that the runtime uses to serve traffic, so you don't need to assemble Services or Routes by hand. -Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API, or decK: +Models can be created and managed through the {{site.konnect_short_name}} UI, the {{site.ai_gateway}} API: {% table %} columns: @@ -124,7 +123,7 @@ When you create a Model in {{site.konnect_short_name}} or via the API, the confi 1. Add one or more target models, each pointing to a Provider with credentials. 1. Select a request and response format (default is `openai`). 1. If you have more than one target, configure load balancing in `config.balancer`. -1. Optionally, attach Policies to add plugin configuration and set `acls` to control access. +1. Optionally, attach Policies to add additional capabilities and set `acls` to control access. For a concrete example, see [Set up a Model](#set-up-a-model). @@ -147,16 +146,15 @@ When you create or update a Model, {{site.ai_gateway}} generates a fixed set of * One [Gateway Service](/gateway/entities/service/). * One [Route](/gateway/entities/route/) per declared capability in the `capabilities` array. -* One [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin per generated Route. -Provider credentials are added into the AI Proxy Advanced plugin configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. +Provider credentials are added into the generated runtime configuration at generation time, sourced from the Provider entity that the Model's `target_models` reference. Updating the Provider propagates credential changes to every Model that uses it. -Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service, Routes, or plugin entries through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. +Generated primitives are protected. Direct PUT, PATCH, or DELETE calls against the underlying Service or Routes through the standard Admin API are rejected. To change anything about a Model's runtime footprint, update the Model entity. {{site.ai_gateway}} deletes and recreates the derived primitives within a single transaction. {:.info} > **Why a transaction instead of an in-place update?** > -> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes and plugin entries are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. +> A Model's structure (which capabilities exist, which providers it routes to) determines how many Routes are needed. A delete-and-recreate cycle is the simplest way to keep the entity and its derived primitives consistent, especially when capabilities are added or removed. ## Capabilities @@ -169,7 +167,7 @@ Model [`type`](#schema-aigateway-model-type) controls which capability set appli Not every provider supports every capability. The set of capabilities you can declare on a Model depends on what the provider in `target_models` exposes. See [{{site.ai_gateway}} providers](/ai-gateway/ai-providers/) for per-provider details. -The following table maps each capability to an OpenAI API reference and the corresponding [AI Proxy plugin](/plugins/ai-proxy/) example. +The following table maps each capability to an OpenAI API reference. For load balancing configuration details, see [Load balancing](/ai-gateway/load-balancing/). {% table %} @@ -178,45 +176,31 @@ columns: key: capability - title: Description key: description - - title: Example route - key: example rows: - capability: "`chat`" description: Conversational responses from a sequence of messages. - example: "[`llm/v1/chat`](/plugins/ai-proxy/examples/openai-chat-route/)" - capability: "`embeddings`" description: Vector representations for semantic search and similarity matching. - example: "[`llm/v1/embeddings`](/plugins/ai-proxy/examples/embeddings-route-type/)" - capability: "`assistants`" description: Persistent tool-using agents with metadata for debugging and evaluation. - example: "[`llm/v1/assistants`](/plugins/ai-proxy/examples/assistants-route-type/)" - capability: "`responses`" description: REST-based full-text responses. - example: "[`llm/v1/responses`](/plugins/ai-proxy/examples/responses-route-type/)" - capability: "`audio-transcriptions`" description: Speech-to-text. - example: "[`audio/v1/audio/transcriptions`](/plugins/ai-proxy/examples/audio-transcription-openai/)" - capability: "`audio-translations`" description: Audio translation between languages. - example: "[`audio/v1/audio/translations`](/plugins/ai-proxy/examples/audio-translation-openai/)" - capability: "`image-generation`" description: Generate images from text prompts. - example: "[`image/v1/images/generations`](/plugins/ai-proxy/examples/image-generation-openai/)" - capability: "`image-edits`" description: Modify images from text prompts. - example: "[`image/v1/images/edits`](/plugins/ai-proxy/examples/image-edits-openai/)" - capability: "`video-generations`" description: Generate videos from text prompts. - example: "[`video/v1/videos/generations`](/plugins/ai-proxy/examples/video-generation-openai/)" - capability: "`realtime`" description: Bidirectional WebSocket streaming for low-latency, interactive voice and text. - example: "[`realtime/v1/realtime`](/plugins/ai-proxy-advanced/examples/realtime-route-openai/)" - capability: "`batches`" description: Asynchronous bulk LLM requests for long workloads. - example: "[`llm/v1/batches`](/plugins/ai-proxy/examples/batches-route-type/)" - capability: "`files`" description: File uploads for long documents and structured input. - example: "[`llm/v1/files`](/plugins/ai-proxy/examples/files-route-type/)" {% endtable %} @@ -257,7 +241,7 @@ rows: {% endtable %} -When a native format is set, only the corresponding provider is supported with its specific APIs. For format-specific behavior and limitations, see the [AI Proxy plugin reference](/plugins/ai-proxy/#supported-native-llm-formats). +When a native format is set, only the corresponding provider is supported with its specific APIs. ## Target models @@ -271,7 +255,7 @@ There's no separate Target Model entity or endpoint. Target models are managed o A Model routes to a single target by default. Add more than one target when you want redundancy, fallback between providers, or cost and latency optimization. When you have multiple targets, configure `config.balancer` to distribute requests according to a load balancing algorithm. -When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing with AI Proxy Advanced](/ai-gateway/load-balancing/). +When a Model has more than one target, the [load balancer](#schema-aigateway-model-config-balancer) sits between the virtual model and its targets, distributing requests according to `config.balancer`. For algorithm details, selection guidance, and tuning, see [Load balancing](/ai-gateway/load-balancing/). ### Algorithms @@ -285,19 +269,19 @@ columns: - title: Behavior key: behavior rows: - - algorithm: "[`round-robin`](/plugins/ai-proxy-advanced/examples/round-robin/)" + - algorithm: "`round-robin`" behavior: Weighted traffic distribution across targets. - - algorithm: "[`consistent-hashing`](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + - algorithm: "`consistent-hashing`" behavior: Sticky sessions based on header values. - - algorithm: "[`least-connections`](/plugins/ai-proxy-advanced/examples/least-connections/)" + - algorithm: "`least-connections`" behavior: Route to backends with spare capacity. - - algorithm: "[`lowest-latency`](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + - algorithm: "`lowest-latency`" behavior: Route to the fastest-responding model. - - algorithm: "[`lowest-usage`](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + - algorithm: "`lowest-usage`" behavior: Route based on token counts or cost. - - algorithm: "[`semantic`](/plugins/ai-proxy-advanced/examples/semantic/)" + - algorithm: "`semantic`" behavior: Route based on prompt-to-model similarity. - - algorithm: "[`priority`](/plugins/ai-proxy-advanced/examples/priority/)" + - algorithm: "`priority`" behavior: Tiered failover across model groups. {% endtable %} @@ -338,23 +322,23 @@ Substitution applies to the [`name`](#schema-aigateway-model-target-models-name) * `$(uri_captures.path_parameter_name)`: the value of a captured URI path parameter. * `$(query_params.query_parameter_name)`: the value of a query string parameter. -For end-to-end examples, see [dynamic model selection](/plugins/ai-proxy/examples/sdk-dynamic-model-selection/), [Azure deployment routing](/plugins/ai-proxy/examples/sdk-azure-deployment/), and [proxying multiple models in one Azure instance](/plugins/ai-proxy/examples/sdk-multiple-providers/) on the AI Proxy plugin page. +For examples of using templating, consult the {{site.ai_gateway}} documentation and API reference. ## Access control A Model's `acls` field controls which identities are allowed to reach the Model. The field accepts `allow` and `deny` lists. Each entry is a string that references a Consumer, Consumer Group, or Authenticated Group by name. Access is enforced at the Service level of the generated primitives. -For per-request authentication and identity, configure the appropriate authentication plugin globally or as a Policy on the Model. +For per-request authentication and identity, configure the appropriate authentication Policy globally or attach it to the Model. ## Attach Policies -Policies are how plugin configurations apply to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. +Policies apply configuration and behavior to a Model. A Policy attached to a Model runs at the Service level of the Model's generated primitives, so it applies to every request routed through any of the Model's capabilities. A Model declares the Policies it uses through its `policies` field. Each entry is a string that references a Policy by name or ID. {{site.konnect_short_name}} resolves these references against Policies created at `/v1/ai-gateways/{aiGatewayId}/policies`. -You can attach multiple Policies to a single Model. Each Policy has an independent plugin instance, so attaching the same plugin type twice with different configurations creates two separate plugin entries. +You can attach multiple Policies to a single Model. Each Policy is applied independently, so attaching the same Policy type twice with different configurations creates two separate instances. -Not every plugin type is valid as a Model Policy. +Not every Policy type is valid as a Model attachment. Policies attached to a Model are not deleted when the Model is deleted; only the Model's reference is removed. @@ -362,11 +346,11 @@ For further information, see the [Policy entity](/ai-gateway/entities/ai-policy/ ### Plugin priority and Policy execution order -A Policy attached to a Model creates one plugin entry on the Service of the Model's derived primitives. That plugin runs at the [priority](/gateway/entities/plugin/#plugin-priority) of its underlying plugin type, which determines when it executes relative to other plugins on the request. +A Policy attached to a Model runs on the Service of the Model's derived primitives. That Policy runs at the [priority](/gateway/entities/plugin/#plugin-priority) determined by its type, which affects when it executes relative to other Policies on the request. -The AI Proxy Advanced plugin runs at priority `770` and parses the request body to resolve the model name. Any Policy whose underlying plugin type has a priority higher than `770` runs before that resolution. Authentication plugin types (such as OpenID Connect) fall into this category. They still gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available yet. +Model routing executes at a specific point in the request pipeline. Policies have different priorities that determine when they run. Higher priority Policies types may run before the Model routing is resolved. Authentication Policies (such as OpenID Connect) fall into this category. They gate access correctly because routing to the Model's generated Service already occurred, but model-level identity details (provider and target model) are not available until after Model resolution. -For Policies whose runtime behavior depends on the resolved Model identity, attach plugin types that run at priority `770` or lower, or use [dynamic plugin ordering](/gateway/entities/plugin/) to push their execution later. +For Policies whose behavior depends on the resolved Model identity, use Policy types that run at or after Model resolution, or use [dynamic plugin ordering](/gateway/entities/plugin/#dynamic-plugin-ordering) to adjust execution order as needed. ## Set up a Model diff --git a/app/_data/entity_examples/config.yml b/app/_data/entity_examples/config.yml index 3c3c610eff..87e156e9e0 100644 --- a/app/_data/entity_examples/config.yml +++ b/app/_data/entity_examples/config.yml @@ -60,6 +60,7 @@ formats: # core entities consumer: '/consumers/' consumer_group: '/consumer_groups/' + model: '/models/' route: '/routes/' service: '/services/' target: '/upstreams/{upstream}/targets/' @@ -89,6 +90,10 @@ formats: route: '/routes/{route}/plugins/' service: '/services/{service}/plugins/' global: '/plugins/' + ai_policy_endpoints: + ai_model: '/models/{ai_model}/policies/' + ai_agent: '/agents/{ai_agent}/policies/' + ai_mcp_server: '/mcp-servers/{ai_mcp_server}/policies/' variables: <<: *variables ai_gateway: diff --git a/app/ai-gateway/load-balancing.md b/app/ai-gateway/load-balancing.md index ec5cc3baea..04e4473aab 100644 --- a/app/ai-gateway/load-balancing.md +++ b/app/ai-gateway/load-balancing.md @@ -1,46 +1,54 @@ --- -title: "Load balancing with AI Proxy Advanced" +title: "Load balancing with {{site.ai_gateway_name}}" layout: reference content_type: reference -description: This guide provides an overview of load balancing and retry and fallback strategies in the AI Proxy Advanced plugin. +description: "This guide provides an overview of load balancing and retry and fallback strategies in {{site.ai_gateway}}." breadcrumbs: - /ai-gateway/ works_on: - - on-prem - konnect products: - gateway - ai-gateway +tools: + - admin-api + - konnect-api + tags: - ai - load-balancing - - ai-proxy - -plugins: - - ai-proxy-advanced min_version: - gateway: '3.10' + ai-gateway: '2.0.0' related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ - - text: AI Proxy Advanced - url: /plugins/ai-proxy-advanced/ + - text: Model entity + url: /ai-gateway/entities/ai-model/ --- {{site.ai_gateway}} provides load balancing capabilities to distribute requests across multiple LLM models. You can use these features to improve fault tolerance, optimize resource utilization, and balance traffic across your AI systems. -The [AI Proxy Advanced](/plugins/ai-proxy-advanced/) plugin supports several load balancing algorithms similar to those used for Kong upstreams, extended for AI model routing. You configure load balancing through the [Upstream entity](/gateway/entities/upstream/), which lets you control how requests are routed to various AI providers and models. +In {{site.ai_gateway}} 2.0.0 and later, load balancing is configured on the [Model entity](/ai-gateway/entities/ai-model/) through `config.balancer` and `target_models`. + + ### Load balancing algorithms {{site.ai_gateway}} supports multiple load balancing strategies for distributing traffic across AI models. Each algorithm addresses different goals: balancing load, improving cache-hit ratios, reducing latency, or providing [failover reliability](#retry-and-fallback). -The following table describes the available algorithms and considerations for selecting one. +The following table describes the available algorithms for [Model entities](/ai-gateway/entities/ai-model/) and considerations for selecting one. {% table %} @@ -52,54 +60,54 @@ columns: - title: Considerations key: considerations rows: - - algorithm: "[Round-robin (weighted)](/plugins/ai-proxy-advanced/examples/round-robin/)" + - algorithm: "Round-robin (weighted)" description: | Distributes requests across models based on their assigned weights. For example, if models `gpt-4`, `gpt-4o-mini`, and `gpt-3` have weights of `70`, `25`, and `5`, they receive approximately 70%, 25%, and 5% of traffic respectively. Requests are distributed proportionally, independent of usage or latency metrics. considerations: | * Traffic is routed proportionally based on weights. * Requests follow a circular sequence adjusted by weight. * Does not account for cache-hit ratios, latency, or current load. - - algorithm: "[Consistent-hashing](/plugins/ai-proxy-advanced/examples/consistent-hashing/)" + - algorithm: "Consistent-hashing" description: | - Routes requests based on a hash of a configurable header value. Requests with the same header value are routed to the same model, enabling sticky sessions for maintaining context across user interactions. The [`hash_on_header`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-hash-on-header) setting defines the header to hash. The default is `X-Kong-LLM-Request-ID`. + Routes requests based on a hash of a configurable header value. Requests with the same header value are routed to the same model, enabling sticky sessions for maintaining context across user interactions. The [`hash_on_header`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-hash-on-header) setting defines the header to hash. The default is `X-Kong-LLM-Request-ID`. considerations: | * Effective with consistent keys like user IDs. * Requires diverse hash inputs for balanced distribution. * Useful for session persistence and cache-hit optimization. - - algorithm: "[Least-connections](/plugins/ai-proxy-advanced/examples/least-connections/)" + - algorithm: "Least-connections" description: | - {% new_in 3.13 %} Tracks the number of in-flight requests for each backend and routes new requests to the backend with the highest spare capacity. The [`weight`](/plugins/ai-proxy-advanced/reference/#schema--config-targets-weight) parameter is used to calculate connection capacity. + Tracks the number of in-flight requests for each backend and routes new requests to the backend with the highest spare capacity. The [`weight`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-weight) parameter is used to calculate connection capacity. considerations: | * Dynamically adapts to backend response times. * Routes away from slower backends as they accumulate open connections. * Does not account for cache-hit ratios. - - algorithm: "[Lowest-usage](/plugins/ai-proxy-advanced/examples/lowest-usage/)" + - algorithm: "Lowest-usage" description: | - Routes requests to models with the lowest measured resource usage. The [`tokens_count_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-tokens-count-strategy) parameter defines how usage is measured: prompt token counts, response token counts, or cost {% new_in 3.10 %}. + Routes requests to models with the lowest measured resource usage. The [`tokens_count_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-tokens-count-strategy) parameter defines how usage is measured: prompt token counts, response token counts, or cost. considerations: | * Balances load based on actual consumption metrics. * Useful for cost optimization and avoiding overloading individual models. - - algorithm: "[Lowest-latency](/plugins/ai-proxy-advanced/examples/lowest-latency/)" + - algorithm: "Lowest-latency" description: | - Routes requests to the model with the lowest observed latency. The [`latency_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-latency-strategy) parameter defines how latency is measured. The default (`tpot`) uses time-per-output-token. The `e2e` option uses end-to-end response time. + Routes requests to the model with the lowest observed latency. The [`latency_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-latency-strategy) parameter defines how latency is measured. The default (`tpot`) uses time-per-output-token. The `e2e` option uses end-to-end response time.

The algorithm uses peak EWMA (Exponentially Weighted Moving Average) to track latency from TCP connect through body response. Metrics decay over time. considerations: | * Prioritizes models with the fastest response times. * Suited for latency-sensitive applications. * Less suitable for long-lived connections like WebSockets. - - algorithm: "[Semantic](/plugins/ai-proxy-advanced/examples/semantic/)" + - algorithm: "Semantic" description: | Routes requests based on semantic similarity between the prompt and model descriptions. Embeddings are generated using a specified model (for example, `text-embedding-3-small`), and similarity is calculated using vector search.

- {% new_in 3.13 %} Multiple targets can share [identical descriptions](/plugins/ai-proxy-advanced/examples/semantic-with-fallback/). When they do, the balancer performs round-robin fallback among them if the primary target fails. Weights affect fallback order. + Multiple targets can share identical descriptions. When they do, the balancer performs round-robin fallback among them if the primary target fails. Weights affect fallback order. considerations: | * Requires a vector database (for example, Redis) for similarity matching. * The `distance_metric` and `threshold` settings control matching sensitivity. * Best for routing prompts to domain-specialized models. - - algorithm: "[Priority](/plugins/ai-proxy-advanced/examples/priority/)" + - algorithm: "Priority" description: | - {% new_in 3.10 %} Routes requests to models based on assigned priority groups. The balancer always selects from the highest-priority group first. If all targets in that group are unavailable, it falls back to the next group. Within each group, the [`weight`](/plugins/ai-proxy-advanced/reference/#schema--config-targets-weight) parameter controls traffic distribution. + Routes requests to models based on assigned priority groups. The balancer always selects from the highest-priority group first. If all targets in that group are unavailable, it falls back to the next group. Within each group, the [`weight`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-weight) parameter controls traffic distribution. considerations: | * Higher-priority groups receive all traffic until they fail. * Lower-priority groups serve as fallback only. @@ -107,9 +115,17 @@ rows: {% endtable %} +For examples of each algorithm, see [Algorithm examples](/ai-gateway/entities/ai-model/#algorithm-examples) in the [Model entity](/ai-gateway/entities/ai-model/) reference. + +### Request routing by model alias + +Model aliases allow clients to send an alias instead of the actual model name in the request. This decouples the external model identifier from the internal provider model, enabling flexible routing without changing client code. + +Each target in a Model entity can have an optional [`model.alias`](/ai-gateway/entities/ai-model/#schema-aigateway-model-target-models-model-alias) field. When a client sends `"model": "alias-value"` in the request body, {{site.ai_gateway}} routes to the matching target. This feature works independently of load balancing algorithms — the alias determines which target (or set of targets) handles the request, and the configured load balancing algorithm selects the final backend within that set. + ### Retry and fallback -The load balancer includes built-in support for **retries** and **fallbacks**. When a request fails, the balancer can automatically retry the same target or redirect the request to a different upstream target. +The load balancer includes built-in support for **retries** and **fallbacks**. When a request fails, the balancer can automatically retry the same target or redirect the request to a different target model. #### How retry and fallback works @@ -143,7 +159,7 @@ flowchart LR #### Retry and fallback configuration -{{site.ai_gateway}} load balancer supports fine-grained control over failover behavior. Use [`failover_criteria`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-failover-criteria) to define when a request should retry on the next upstream target. By default, retries occur on `error` and `timeout`. An `error` means a failure occurred while connecting to the server, forwarding the request, or reading the response header. A `timeout` indicates that any of those stages exceeded the allowed time. +The {{site.ai_gateway}} load balancer supports fine-grained control over failover behavior on the [Model entity](/ai-gateway/entities/ai-model/). Use [`failover_criteria`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-failover-criteria) to define when a request should retry on the next target model. By default, retries occur on `error` and `timeout`. An `error` means a failure occurred while connecting to the server, forwarding the request, or reading the response header. A `timeout` indicates that any of those stages exceeded the allowed time. You can add more criteria to adjust retry behavior as needed: @@ -155,23 +171,23 @@ columns: - title: Description key: description rows: - - setting: "[`retries`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-retries)" + - setting: "[`retries`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-retries)" description: | Defines how many times to retry a failed request before reporting failure to the client. Increase for better resilience to transient errors; decrease if you need lower latency and faster failure. - - setting: "[`failover_criteria`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-failover-criteria)" + - setting: "[`failover_criteria`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-failover-criteria)" description: | Specifies which types of failures (e.g., `http_429`, `http_500`) should trigger a failover to a different target. Customize based on your tolerance for specific errors and desired failover behavior. - - setting: "[`connect_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-connect-timeout)" + - setting: "[`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout)" description: | Sets the maximum time allowed to establish a TCP connection with a target. Lower it for faster detection of unreachable servers; raise it if some servers may respond slowly under load. - - setting: "[`read_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-read-timeout)" + - setting: "[`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout)" description: | Defines the maximum time to wait for a server response after sending a request. Lower it for real-time applications needing quick responses; increase it for long-running operations. - - setting: "[`write_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-write-timeout)" + - setting: "[`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" description: | Sets the maximum time allowed to send the request payload to the server. Increase if large request bodies are common; keep short for small, fast payloads. @@ -180,7 +196,7 @@ rows: #### Retry and fallback scenarios -You can customize {{site.ai_gateway}} load balancer to fit different application needs, such as minimizing latency, enabling sticky sessions, or optimizing for cost. The table below maps common scenarios to key configuration options that control load balancing behavior: +You can customize the {{site.ai_gateway}} load balancer to fit different application needs, such as minimizing latency, enabling sticky sessions, or optimizing for cost. The table below maps common scenarios to key configuration options that control load balancing behavior: {% table %} @@ -193,36 +209,51 @@ columns: key: description rows: - scenario: "Requests must not hang longer than 3 seconds" - action: "Adjust [`connect_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-vectordb-redis-connect-timeout), [`read_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-vectordb-redis-read-timeout), [`write_timeout`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-write-timeout)" + action: "Adjust [`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout), [`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout), [`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" description: | - Shorten these timeouts to quickly fail if a server is slow or unresponsive, ensuring faster error handling and responsiveness. + Shorten these timeouts to quickly fail if a target model is slow or unresponsive, ensuring faster error handling and responsiveness. - scenario: "Prioritize the lowest-latency target" - action: "Set [`latency_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-latency-strategy) to `e2e`" + action: "Set [`latency_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-latency-strategy) to `e2e`" description: | Optimize routing based on full end-to-end response time, selecting the target that minimizes total latency. - scenario: "Need predictable fallback for the same user" - action: "Use [`hash_on_header`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-hash-on-header)" + action: "Use [`hash_on_header`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-hash-on-header)" description: | - Ensure that the same user consistently routes to the same target, enabling sticky sessions and reliable fallback behavior. + Ensure that the same user consistently routes to the same target model, enabling sticky sessions and reliable fallback behavior. - scenario: "Models have different costs" - action: "Set [`tokens_count_strategy`](/plugins/ai-proxy-advanced/reference/#schema--config-balancer-tokens-count-strategy) to `cost`" + action: "Set [`tokens_count_strategy`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-tokens-count-strategy) to `cost`" description: | - Route requests intelligently by considering cost, balancing model performance with budget optimization. + Route requests by considering cost, balancing model performance with budget targets. {% endtable %} -#### Version compatibility for fallbacks +### Health check and circuit breaker -{:.info} -> **{{site.base_gateway}} version compatibility for fallbacks:** -> {% new_in 3.10 %} -> - Full fallback support across targets, even with different API formats. -> - Mix models from different providers if needed (for example, OpenAI and {{ site.mistral }}). -> -> Pre-3.10: -> - Fallbacks only allowed between targets using the same API format. -> - Example: OpenAI-to-OpenAI fallback is supported; OpenAI-to-OLLAMA is not. +For Model entities, circuit breaker behavior is controlled through the balancer configuration on the Model. Use these settings to fail fast when a target model is unhealthy and to retry or fall back to another target instead of waiting for repeated slow responses. + + +{% table %} +columns: + - title: Setting + key: setting + - title: Use + key: use +rows: + - setting: "[`connect_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-connect-timeout), [`read_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-read-timeout), [`write_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-write-timeout)" + use: "Reduce how long {{site.base_gateway}} waits before treating a target model as unavailable." + - setting: "[`max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails)" + use: "Set the number of failed attempts allowed before {{site.base_gateway}} marks a target model unhealthy." + - setting: "[`fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout)" + use: "Set how long {{site.base_gateway}} keeps a target model in a failed state before trying it again." +{% endtable %} + + +The load balancer supports health checks and circuit breakers to improve reliability. If the number of unsuccessful attempts to a target reaches [`config.balancer.max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails), the load balancer stops sending requests to that target until it reconsiders the target after the period defined by [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout). The diagram below illustrates this behavior: + +![Circuit breaker](/assets/images/ai-gateway/circuit-breaker.jpg){: style="display:block; margin-left:auto; margin-right:auto; width:50%; border-radius:10px" } + +Consider an example where [`config.balancer.max_fails`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-max-fails) is 3 and [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout) is 10 seconds. When failed requests for a target reach 3, the target is marked unhealthy and the load balancer stops sending requests to it. After 10 seconds, the target is reconsidered. If the request to this target still fails, the target remains unhealthy and the load balancer continues to exclude it. If the request succeeds, the target is marked healthy again and recovers from the circuit breaker. -### Health check and circuit breaker {% new_in 3.13 %} +The failure counter tracks total failures, not consecutive failures. If a target receives 2 failed requests, then 1 successful request within the timeout window, the counter remains at 2. The counter resets only when a successful request occurs after [`config.balancer.fail_timeout`](/ai-gateway/entities/ai-model/#schema-aigateway-model-config-balancer-fail-timeout) has elapsed since the last failed request. -{% include ai-gateway/circuit-breaker.md %} \ No newline at end of file +If all targets become unhealthy simultaneously, requests fail with `HTTP 500`. From 45c22274af7a24aa3f808c36499126c22b277187 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 08:08:55 +0200 Subject: [PATCH 39/42] update min_version --- app/ai-gateway/load-balancing.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/ai-gateway/load-balancing.md b/app/ai-gateway/load-balancing.md index 04e4473aab..a0ea831146 100644 --- a/app/ai-gateway/load-balancing.md +++ b/app/ai-gateway/load-balancing.md @@ -22,7 +22,7 @@ tags: - load-balancing min_version: - ai-gateway: '2.0.0' + ai-gateway: '2.0' related_resources: - text: "{{site.ai_gateway}}" @@ -37,9 +37,9 @@ In {{site.ai_gateway}} 2.0.0 and later, load balancing is configured on the [Mod From 8050e0415628bd82278612bbbf95a4e1b69f6e37 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Thu, 11 Jun 2026 08:21:46 +0200 Subject: [PATCH 40/42] Update min_version for Resource sizing guidelines doc --- app/ai-gateway/resource-sizing-guidelines-ai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/ai-gateway/resource-sizing-guidelines-ai.md b/app/ai-gateway/resource-sizing-guidelines-ai.md index d7381a8eb7..35995b2bd9 100644 --- a/app/ai-gateway/resource-sizing-guidelines-ai.md +++ b/app/ai-gateway/resource-sizing-guidelines-ai.md @@ -11,7 +11,7 @@ works_on: - on-prem min_version: - gateway: '3.12' + gateway: '2.0' tags: - performance From 21371267f1376baf49f99b09c12329b02e8c3f14 Mon Sep 17 00:00:00 2001 From: tomek-labuk Date: Tue, 16 Jun 2026 15:51:13 +0200 Subject: [PATCH 41/42] feat(ai-gateway): Align semantic similarity documentation with AI GW 2.0 (#5501) --- .../md/ai-gateway/v2/ai-vector-db.md | 18 +++ app/ai-gateway/semantic-similarity.md | 135 ++++++++---------- 2 files changed, 77 insertions(+), 76 deletions(-) create mode 100644 app/_includes/md/ai-gateway/v2/ai-vector-db.md diff --git a/app/_includes/md/ai-gateway/v2/ai-vector-db.md b/app/_includes/md/ai-gateway/v2/ai-vector-db.md new file mode 100644 index 0000000000..4d27970519 --- /dev/null +++ b/app/_includes/md/ai-gateway/v2/ai-vector-db.md @@ -0,0 +1,18 @@ +A vector database stores and compares vector embeddings—numerical representations of text, prompts, documents, or other content. When you configure semantic features in [AI Models](/ai-gateway/entities/ai-model/) or [AI Policies](/ai-gateway/entities/ai-policy/), embeddings are generated and stored in the vector database so that incoming requests can be compared against the stored vectors to find semantically similar matches. For example, an incoming prompt is embedded and compared against cached prompt keys, model descriptions, document chunks, or allow/deny lists to determine semantic similarity. + +{{site.ai_gateway}} semantic features support the following vector databases: + +* Using `vectordb.strategy: redis` and parameters in `vectordb.redis`: + * **[Redis](https://redis.io/docs/latest/stack/search/reference/vectors/)** with Vector Similarity Search (VSS) + * **[Redis Cloud](https://redis.io/cloud/)** + * **[Valkey](https://valkey.io/topics/search/)**: When you configure `vectordb.strategy: redis`, {{site.base_gateway}} queries the server and checks the server name field. If it detects Valkey request, it automatically uses the Valkey-specific driver. + * Managed Redis with cloud authentication: + * **AWS ElastiCache** (`auth_provider: aws`) + * **Azure Managed Redis** (`auth_provider: azure`) + * **Google Cloud Memorystore** (`auth_provider: gcp`) + + For configuration details, see [Using cloud authentication with Redis](#using-cloud-authentication-with-redis). +* Using `vectordb.strategy: pgvector` and parameters in `vectordb.pgvector`: + * **[PostgreSQL with pgvector](https://github.com/pgvector/pgvector)** {% new_in 2.0 %} + +Configure vector database settings in [AI Models](/ai-gateway/entities/ai-model/) and [AI Policies](/ai-gateway/entities/ai-policy/) to enable semantic similarity features. diff --git a/app/ai-gateway/semantic-similarity.md b/app/ai-gateway/semantic-similarity.md index b25cee3146..4209c67b1d 100644 --- a/app/ai-gateway/semantic-similarity.md +++ b/app/ai-gateway/semantic-similarity.md @@ -1,44 +1,31 @@ --- -title: "Embedding-based similarity matching in Kong AI gateway plugins" +title: "Embedding-based similarity matching in {{site.ai_gateway}}" layout: reference content_type: reference -description: This reference explains how {{site.ai_gateway}} plugins use embedding-based similarity to compare prompts with various inputs—such as cached entries, upstream targets, document chunks, or allow/deny lists. +description: This reference explains how {{site.ai_gateway}} uses embedding-based similarity to compare prompts with various inputs—such as cached entries, target model descriptions, document chunks, or allow/deny lists. breadcrumbs: - /ai-gateway/ works_on: - - on-prem - konnect products: - - gateway - ai-gateway tags: - ai - load-balancing -plugins: - - ai-proxy-advanced - - ai-semantic-cache - - ai-rag-injector - - ai-semantic-prompt-guard - - ai-semantic-response-guard - min_version: - gateway: '3.10' + ai-gateway: '2.0' related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ - - text: "{{site.ai_gateway}} plugins" - url: /plugins/?category=ai - - text: Use AI Semantic Prompt Guard plugin to govern your LLM traffic - url: /how-to/use-ai-semantic-prompt-guard-plugin/ - - text: Ensure chatbots adhere to compliance policies with the AI RAG Injector plugin - url: /how-to/use-ai-rag-injector-plugin/ - - text: Control prompt size with the AI Compressor plugin - url: /how-to/compress-llm-prompts/ + - text: Policy entity + url: /ai-gateway/entities/ai-policy/ + - text: "{{site.ai_gateway}} Model entity" + url: /ai-gateway/entities/ai-model/ - text: Semantic processing and vector similarity search with Kong and Redis url: https://konghq.com/blog/engineering/semantic-processing-and-vector-similarity-search-with-kong-and-redis - text: Vector embeddings @@ -49,85 +36,81 @@ related_resources: icon: /assets/icons/redis.svg --- -In large language tasks, applications that interact with language models rely on semantic search—not by exact word matches, but by similarity in meaning. This is achieved using vector embeddings, which represent pieces of text as points in a high-dimensional space. - -These embeddings enable the concept of semantic similarity, where the “distance” between vectors reflects how closely related two pieces of text are. Similarity can be measured using techniques like cosine similarity or Euclidean distance, forming the quantitative basis for comparing meaning. +Vector embeddings represent text as points in high-dimensional space, where the distance between vectors reflects semantic similarity. This enables semantic search—comparing meaning rather than exact words—powering LLM workflows like intelligent caching, retrieval, classification, and anomaly detection. ![Vector embeddings example](/assets/images/ai-gateway/vectors.svg) > _**Figure 1:** A simplified representation of vector text embeddings in a three-dimensional space._ -For example, in the image, "king" and "emperor" are semantically more similar than a "king" is to an "otter". - -Vector embeddings power a range of LLM workflows, including semantic search, document clustering, recommendation systems, anomaly detection, content similarity analysis, and classification via auto-labeling. +For example, in the figure 1, “king” and “emperor” are semantically more similar than “king” is to “otter”. Similarity is measured using techniques like cosine similarity or Euclidean distance, which quantify the relationship between vectors. ## Semantic similarity in {{site.ai_gateway}} -In {{site.ai_gateway}}, several plugins leverage embedding-based similarity: +Based on meaning rather than exact matches, {{site.ai_gateway}} can perform intelligent request routing, caching, and content filtering using semantic similarity queries. A [Model](/ai-gateway/entities/ai-model/) can leverage semantic similarity in two ways: -{% table %} -columns: - - title: Plugin - key: plugin - - title: Description - key: description -rows: - - plugin: "[AI Proxy Advanced](/plugins/ai-semantic-prompt-guard/)" - description: Performs semantic routing by embedding each upstream’s description at config time and storing the results in a selected vector database. At runtime, it embeds the prompt and queries vector database to route requests to the most semantically appropriate upstream. - - plugin: "[AI Semantic Cache](/plugins/ai-semantic-cache/)" - description: Indexes previous prompts and responses as embeddings. On each request, it searches for semantically similar inputs and serves cached responses when possible to reduce redundant LLM calls. - - plugin: "[AI RAG Injector](/plugins/ai-rag-injector/)" - description: Retrieves semantically relevant chunks from a vector database. It embeds the prompt, performs a similarity search, and injects the results into the prompt to enable retrieval-augmented generation. - - plugin: "[AI Semantic Prompt Guard](/plugins/ai-semantic-prompt-guard/)" - description: Compares incoming prompts against allow/deny lists using embedding similarity to detect and block misuse patterns. - - plugin: | - [AI Semantic Response Guard](/plugins/ai-semantic-response-guard/) {% new_in 3.12 %} - description: Filters LLM responses by comparing their semantic content against predefined allow and deny lists. It analyzes the full response body, generates embeddings, and enforces rules to block unsafe or unwanted outputs before returning them to the client. -{% endtable %} +1. **Semantic load balancing**: Route requests to upstream providers based on how semantically similar the prompt is to each provider's capabilities, using the `semantic` load balancing algorithm. +2. **Semantic Policies**: Attach Policies like AI Semantic Cache or AI Semantic Prompt Guard to add similarity-based caching, retrieval-augmented generation (RAG), and guardrails. ### Vector databases -To compare embeddings efficiently, {{site.ai_gateway}} semantic plugins rely on vector databases. These specialized data stores index high-dimensional embeddings and enable **fast similarity search** based on distance metrics like cosine similarity or Euclidean distance. - -When a plugin needs to find semantically similar content—whether it’s a past prompt, an upstream description, or a document chunk—it sends a query to a vector database. The database returns the closest matches, allowing the plugin to make decisions like caching, routing, injecting, or blocking. +To store and compare embeddings efficiently, {{site.ai_gateway}} semantic features rely on vector databases. These specialized datastores index high-dimensional embeddings and enable **fast similarity search** based on distance metrics like cosine similarity or Euclidean distance. +A Model Entity’s [semantic load balancer](/ai-gateway/entities/ai-model/#algorithms) stores vector representations of each target model’s semantic description at configuration time, and uses the vector database to compare incoming prompts against those stored vectors. -{% include_cached /plugins/ai-vector-db.md name=page.name %} +Semantic policies also use vector databases to perform similarity searches at request time. The selected database stores the embeddings generated by the Model or Policies (either at config time or runtime), and determines the accuracy and performance of semantic operations. -The selected database stores the embeddings generated by the plugin (either at config time or runtime), and determines the accuracy and performance of semantic operations. +{% include md/ai-gateway/v2/ai-vector-db.md %} ### What is compared for similarity? -Each plugin applies similarity search slightly differently depending on its goal. These comparisons determine whether the plugin routes, blocks, reuses, or enriches a prompt based on meaning rather than syntax. +Each policy applies similarity search slightly differently depending on its goal. These comparisons determine whether the policy routes, blocks, reuses, or enriches a prompt based on meaning rather than syntax. -The following table describes how each AI plugin compares embeddings: +The following table describes how each {{site.ai_gateway}} policy compares embeddings: - {% table %} columns: - - title: Plugin - key: plugin - - title: Compared embeddings - key: comparison + - title: Semantic feature + key: feature + - title: Incoming data + key: incoming + - title: Compared against + key: stored rows: - - plugin: "AI Proxy Advanced" - comparison: "Prompt vs. `description` field of each upstream target" - - plugin: "AI Semantic Prompt Guard" - comparison: "Prompt vs. allowlist and denylist prompts" - - plugin: "AI Semantic Cache" - comparison: "Prompt vs. cached prompt keys" - - plugin: "AI RAG Injector" - comparison: "Prompt vs. vectorized document chunks" + - feature: "Model semantic load balancing" + incoming: "Incoming prompts" + stored: "Stored embeddings of each target model's semantic description" + - feature: "AI Semantic Cache policy" + incoming: "Incoming prompts" + stored: "Cached prompt keys" + - feature: "AI RAG Injector policy" + incoming: "Incoming prompts" + stored: "Vectorized document chunks" + - feature: "AI Semantic Prompt Guard / Response Guard policies" + incoming: "Request content or responses" + stored: "Vectorized allow/deny lists" {% endtable %} - +### How semantic similarity is applied + +Semantic similarity is used differently depending on the feature: + +**Model semantic load balancing** (`semantic` algorithm): +- Generates embeddings for each target model's semantic description at configuration time and stores them in the vector database. +- At request time, embeds the incoming prompt using the same embedding model and compares it against the stored target embeddings. +- Routes requests to the target whose description is most semantically similar to the prompt, using the distance metric (cosine or Euclidean) configured for the Model. +- The quality of routing depends on semantic description quality and consistent use of the same embedding model for both targets and prompts. +**Semantic Policies**: +- Each semantic Policy uses similarity search slightly differently based on its goal. +- AI Semantic Cache compares prompts against cached prompt keys to find reusable responses. +- AI RAG Injector compares prompts against vectorized document chunks to retrieve relevant context. +- AI Semantic Prompt Guard and AI Semantic Response Guard compare content against vectorised allow and deny lists to detect misuse patterns semantically. ## Dimensionality Embedding models work by converting text into high-dimensional floating-point arrays where mathematical distance reflects semantic relationship. In other words, ingested text data becomes points in a vector space, which enables similarity searches in vector databases, and the dimension of embeddings plays a critical role for this. -Dimensionality determines how many numerical features represent each piece of content—similar to how a detailed profile might have dimensions for age, interests, location, and preferences. Higher dimensions create more detailed "fingerprints" that capture nuanced relationships, with smaller distances between vectors indicating stronger conceptual similarity and larger distances showing weaker associations. +Dimensionality determines how many numerical features represent each piece of content—similar to how a detailed profile might have dimensions for age, interests, location, and preferences. A higher number of dimensions creates more detailed "fingerprints" that capture nuanced relationships. Smaller distances between vectors indicate stronger conceptual similarity and larger distances show weaker associations. -For example, this request to the OpenAI [/embeddings API](/plugins/ai-proxy/examples/embeddings-route-type/) via {{site.ai_gateway}}: +For example, this request to the OpenAI `/embeddings` API via {{site.ai_gateway}}: ```json { @@ -187,7 +170,7 @@ The `embedding` array contains 20 floating-point numbers—each one representing If you use embedding models that support defining the dimensionality of the embedding output, you should consider how to balance accuracy and performance based on your use case. -However, dimensionality extremes at the far ends of the spectrum present significant drawbacks: +However, extremes at the far ends of the spectrum present significant drawbacks: {% table %} columns: @@ -219,7 +202,7 @@ rows: ### Cosine and Euclidean similarity -{{site.ai_gateway}} supports both cosine similarity and Euclidean distance for vector comparisons, allowing you to choose the method best suited for your use case. You can configure the method using `config.vectordb.distance_metric` setting in the respective plugin. +{{site.ai_gateway}} supports both cosine similarity and Euclidean distance for vector comparisons, allowing you to choose the method best suited for your use case. You can configure the method using the `config.vectordb.distance_metric` setting in the respective policy. * Use `cosine` for nuanced semantic similarity (for example, document comparison, text clustering), especially when content length varies or dataset diversity is high. * Use `euclidean` when magnitude matters (for example, images, sensor data) or you're working with dense, well-aligned feature sets. @@ -231,7 +214,7 @@ Cosine similarity measures the angle between vectors, ignoring their magnitude. ![Cosine similarity example](/assets/images/ai-gateway/cosine-similarity.svg) > _**Figure 2:** Visualization of cosine similarity as the angle between vector directions._ -Cosine tends to perform well across both low and high dimensional space, especially in high-diversity datasets because it captures vector orientation rather than size. This can be useful, for example, when comparing texts about Microsoft, Apple, and {{ site.google}}. +Cosine tends to perform well across both low and high dimensional space, especially in high-diversity datasets because it captures vector orientation rather than size. This can be useful, for example, when comparing texts about Microsoft, Apple, and {{site.google}}. #### Euclidean distance @@ -274,7 +257,7 @@ rows: ## Similarity threshold -The `vectordb.threshold` parameter controls how strictly the vector database evaluates similarity during a query. It is passed directly to the vector engine—such as Redis or PGVector—and defines which results qualify as matches. In Redis, for example, this maps to the `distance_threshold` query parameter. By default, Redis sets this to `0.2`, but you can override it to suit your use case. +The `config.vectordb.threshold` parameter controls how strictly the vector database evaluates similarity during a query. It is passed directly to the vector engine (such as Redis or PostgreSQL with pgvector) and defines which results qualify as matches. In Redis, for example, this maps to the `distance_threshold` query parameter. By default, Redis sets this to `0.2`, but you can override it to suit your use case. The threshold defines how permissive the matching is. **Higher threshold values allow looser matches, while lower values enforce stricter matching.** The threshold range is 0 to 1. @@ -288,15 +271,15 @@ In both cases, if the [{{site.base_gateway}} logs](/gateway/logs/) indicate "no The optimal threshold depends on the selected distance metric, the embedding model's dimensionality, and the variation in your data. Tuning may be required for best results. {:.info} -> In Kong's AI semantic plugins, this threshold is **not** post-processed or filtered by the plugin itself. The plugin sends it directly to the vector database, which uses it to determine matching documents based on the configured **distance metric**. +> In {{site.ai_gateway}} semantic policies, this threshold is **not** post-processed or filtered by the policy itself. The policy sends it directly to the vector database, which uses it to determine matching documents based on the configured **distance metric**. ### Threshold sensitivity and cache hit effectiveness -The closer your similarity threshold is to `1`, the more likely you are to get **cache misses** when using plugins like **AI Semantic Cache**. This is because a higher threshold makes the similarity filter more strict, so only embeddings that are nearly identical to the query will qualify as a match. In practice, this means even small variations in phrasing, structure, or context can cause the system to miss otherwise semantically similar entries and fall back to calling the LLM again. +The closer your similarity threshold is to `1`, the more likely you are to get **cache misses** when using the **AI Semantic Cache** policy. This is because a higher threshold makes the similarity filter more strict, so only embeddings that are nearly identical to the query will qualify as a match. In practice, this means even small variations in phrasing, structure, or context can cause the system to miss otherwise semantically similar entries and fall back to calling the LLM again. This happens because vector embeddings are not perfectly robust to minor semantic shifts, especially for short or ambiguous prompts. Raising the threshold narrows the match window, so you're effectively demanding a near-exact match in a complex vector space, which is rare unless the input is repeated verbatim. -The chart below illustrates this effect: as the similarity threshold increase (for example, becomes more strict), the cache hit rate typically falls. This reflects the broader acceptance of matches in the embedding space, which helps reduce redundant LLM calls at the cost of some semantic looseness. +The chart below illustrates this effect: as the similarity threshold increases (for example, becomes more strict), the cache hit rate typically falls. This reflects the broader acceptance of matches in the embedding space, which helps reduce redundant LLM calls at the cost of some semantic looseness. ![Similarity threshold and cache rate hits](/assets/images/ai-gateway/cache-hit-rate.svg) > _**Figure 5:** As the similarity threshold decreases (becomes more permissive), cache hit rate increases—illustrating the trade-off between strict semantic matching and LLM efficiency._ From a858ed3eeed67449ef24ca54eb86bc04bf0f6eab Mon Sep 17 00:00:00 2001 From: jbaross Date: Thu, 18 Jun 2026 17:19:15 +0100 Subject: [PATCH 42/42] frontmatter cleanup --- app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md index 8637bebf23..a8c05ecffc 100644 --- a/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md +++ b/app/_how-tos/ai-gateway/set-up-a-model-with-ai-proxy.md @@ -5,22 +5,16 @@ content_type: how_to related_resources: - text: "{{site.ai_gateway}}" url: /ai-gateway/ - - text: AI Proxy - url: /plugins/ai-proxy/ description: Configure the AI Proxy plugin to create a chat route. products: - - gateway + - ai-gateway works_on: - - on-prem - konnect -plugins: - - ai-proxy - entities: - service - route