diff --git a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java index a018d1b509..8ca91e4ebb 100644 --- a/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java +++ b/annot/src/main/java/com/predic8/membrane/annot/yaml/parsing/binding/ObjectBinder.java @@ -33,11 +33,7 @@ import java.util.List; import java.util.Objects; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findRequiredSetters; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.findSingleSetterOrNullForAnnotation; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.getSingleChildSetter; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isCollapsed; -import static com.predic8.membrane.annot.yaml.McYamlIntrospector.isNoEnvelope; +import static com.predic8.membrane.annot.yaml.McYamlIntrospector.*; import static com.predic8.membrane.annot.yaml.NodeValidationUtils.ensureMappingStart; public final class ObjectBinder { @@ -49,7 +45,8 @@ public final class ObjectBinder { public static T bind(ParsingContext pc, Class clazz, JsonNode node) throws ConfigurationParsingException { try { - T configObj = clazz.getConstructor().newInstance(); + T configObj = instantiate(clazz); + BeanDefinition currentBeanDefinition = BeanDefinitionContext.current(); if (currentBeanDefinition != null && pc.getRegistry() != null) { pc.getRegistry().rememberBeanDefinition(configObj, currentBeanDefinition); @@ -102,6 +99,14 @@ public static T bind(ParsingContext pc, Class clazz, JsonNode node) th } } + private static @NotNull T instantiate(Class clazz) throws InvocationTargetException, InstantiationException, IllegalAccessException { + try { + return clazz.getConstructor().newInstance(); + } catch (NoSuchMethodException e) { + throw new ConfigurationParsingException("Class %s does not have a public no-arg constructor.".formatted(clazz.getName())); + } + } + private static @NotNull T handleCollapsed(ParsingContext ctx, Class clazz, JsonNode node, T configObj) { if (node.isNull()) throw new ConfigurationParsingException("Collapsed element must not be null."); @@ -117,7 +122,6 @@ private static T handleNoEnvelopeList(ParsingContext pc, Class clazz, return configObj; } - @SuppressWarnings("ConstantValue") private static void applyCollapsedScalar(Class clazz, JsonNode node, T target) { Method attributeSetter = findSingleSetterOrNullForAnnotation(clazz, MCAttribute.class); Method textSetter = findSingleSetterOrNullForAnnotation(clazz, MCTextContent.class); diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java new file mode 100644 index 0000000000..ed9fe0929c --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/AbstractLLMEvent.java @@ -0,0 +1,69 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(AbstractLLMEvent.class); + + protected static final ObjectMapper om = new ObjectMapper(); + + protected final JsonNode json; + + protected AbstractLLMEvent(JsonNode json) { + this.json = json; + } + + public abstract String getType(); + + public JsonNode getJson() { + return json; + } + + public static AbstractLLMEvent create(SSEParser.SSEEvent sse) { + + if ("[DONE]".equals(sse.data())) { + return new ChatCompletionDoneEvent(); + } + + var opt = JsonUtil.getJsonObject(sse.data()); + if (opt.isEmpty()) { + log.info("Unknown event format: {}", sse.data()); + } + + var json = opt.get(); + + // Responses API + if (json.has("type")) { + return new ResponsesApiEvent(json); + } + + // Chat Completions API + if ("chat.completion.chunk".equals(json.path("object").asText())) { + return new ChatCompletionEvent(json); + } + + log.debug("Unknown event format: {}", json); + + return null; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java new file mode 100644 index 0000000000..cc234b8113 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionDoneEvent.java @@ -0,0 +1,29 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.node.NullNode; + +public class ChatCompletionDoneEvent extends AbstractLLMEvent { + + public ChatCompletionDoneEvent() { + super(NullNode.getInstance()); + } + + @Override + public String getType() { + return "chat.completion.done"; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java new file mode 100644 index 0000000000..1fde1e736f --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ChatCompletionEvent.java @@ -0,0 +1,77 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ChatCompletionEvent extends AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(ChatCompletionEvent.class); + + public ChatCompletionEvent(JsonNode json) { + super(json); + + parseChoices(json); + + } + + + private static void parseChoices(JsonNode json) { + for (JsonNode choice : json.path("choices")) { + + JsonNode delta = choice.path("delta"); + + if (delta.has("content")) { + log.debug("Content delta: {}", + delta.path("content").asText()); + } + + if (delta.has("tool_calls")) { + + for (JsonNode tc : delta.path("tool_calls")) { + + JsonNode fn = tc.path("function"); + + if (fn.has("name")) { + log.debug("Tool call name delta: {}", + fn.path("name").asText()); + } + + if (fn.has("arguments")) { + log.debug("Tool call arguments delta: {}", + fn.path("arguments").asText()); + } + } + } + + String finishReason = choice.path("finish_reason").asText(null); + + if (finishReason != null && !"null".equals(finishReason)) { + log.debug("Finish reason: {}", finishReason); + } + } + } + + @Override + public String getType() { + return "chat.completion.chunk"; + } + + public JsonNode getChoices() { + return json.path("choices"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java new file mode 100644 index 0000000000..2824842f67 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/LLMGatewayInterceptor.java @@ -0,0 +1,241 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCChildElement; +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.AbstractInterceptor; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiStore; +import com.predic8.membrane.core.interceptor.llmgateway.store.AiApiUser; +import com.predic8.membrane.core.util.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static com.predic8.membrane.core.interceptor.Outcome.CONTINUE; +import static com.predic8.membrane.core.interceptor.Outcome.RETURN; +import static com.predic8.membrane.core.util.json.JsonUtil.setJsonBody; + +/* + * @description

+ * API Gateway for Large Language Models (LLMs). + *

+ * Features: + *
    + *
  • Sharing an API key between multiple users
  • + *
  • Enforcing token limits
  • + *
  • Logging LLM usage
  • + *
+ *

+ * @topic 10. AI + */ +@MCElement(name = "llmGateway") +public class LLMGatewayInterceptor extends AbstractInterceptor { + + private static final Logger log = LoggerFactory.getLogger(LLMGatewayInterceptor.class); + + public static final String MEMBRANE_AI_USER = "membrane.ai.user"; + + private LLMProvider provider; + private LLMErrorCreator errorCreator; + + private String apiKey; + + private Policies policies = new Policies(); + + private AiApiStore store; + + @Override + public void init() { + super.init(); + errorCreator = provider.getErrorCreator(); + if (store != null) + store.init(router); + + // Check if the replacement markers are still there + if (apiKey != null && apiKey.contains("<<") && apiKey.contains(">>")) { + throw new ConfigurationException("The configuration contains the replacement marker %s. Substitute it with the API key of the model.".formatted(apiKey)); + } + } + + @Override + public Outcome handleRequest(Exchange exc) { + + LLMRequest aiReq; + try { + aiReq = provider.getLLMRequest(exc); + } catch (Exception e) { + exc.setResponse(errorCreator.invalidRequestError("Error parsing request: " + e.getMessage())); + return RETURN; + } + + if (!exc.getRequest().isPOSTRequest()) { + if (apiKey != null) + aiReq.setApiKey(apiKey); + return CONTINUE; + } + + AiApiUser user = null; + if (store != null) { + var opt = store.getUser(aiReq.getApiKey()); + if (opt.isEmpty()) { + exc.setResponse(errorCreator.authenticationFailed()); + return RETURN; + } + user = opt.get(); + log.debug("User: {}", user); + exc.setProperty(MEMBRANE_AI_USER, user); + } + + long inputTokens = aiReq.estimateInputTokens(); + log.debug("Estimated input tokens: {}", inputTokens); + + // Check store limits + if (store != null) { + var effectiveMaxTokens = computeEffectiveMaxOutputTokens(aiReq.getRequestedMaxOutputTokens(), policies.getMaxOutputTokens()); + var remaining = store.checkLimit(user, inputTokens, effectiveMaxTokens); + log.debug("User {} has {} remaining tokens left", user, remaining); + if (remaining <= 0) { + log.info("Token limit exceeded. Remaining: {} input: {} maxOutput: {}", remaining, inputTokens, effectiveMaxTokens); + exc.setResponse(errorCreator.tokenLimitExceeded(inputTokens + effectiveMaxTokens, remaining, store.getRemainingResetTime())); + return RETURN; + } + } + + // If APIKey is specified, use that for the LLM. Overwrites keys from the client + if (apiKey != null) { + aiReq.setApiKey(apiKey); + } + + log.debug("Requested model: {}", aiReq.getModel()); + + var requestedMaxOutputTokens = aiReq.getRequestedMaxOutputTokens(); + + if (policies.getMaxOutputTokens() > 0) { + if (requestedMaxOutputTokens <= 0) { + log.info("No max. output requested. Setting limit to {}.", policies.getMaxOutputTokens()); + aiReq.setMaxOutputTokens(policies.getMaxOutputTokens()); + } else if (requestedMaxOutputTokens > policies.getMaxOutputTokens()) { + log.info("Requested max. output tokens {} exceed the limit. Setting limit to {}.", requestedMaxOutputTokens, policies.getMaxOutputTokens()); + aiReq.setMaxOutputTokens(policies.getMaxOutputTokens()); + } + } + + if (policies.getMaxInputTokens() != 0) { + if (inputTokens > policies.getMaxInputTokens()) { + log.info("Input tokens {} exceed the limit of {}.", inputTokens, policies.getMaxInputTokens()); + exc.setResponse(errorCreator.inputTokensExceeded(policies.getMaxInputTokens(), inputTokens)); + return RETURN; + } + } + + if (policies.getModels() != null) { + var model = aiReq.getModel(); + if (!policies.getModels().contains(model)) { + exc.setResponse(errorCreator.modelNotAllowed(model, policies.getModels())); + return RETURN; + } + } + + log.debug("Agent provides the tools: {}", aiReq.getTools()); + + setJsonBody(exc.getRequest(), aiReq.getJson()); + return CONTINUE; + } + + long computeEffectiveMaxOutputTokens(long requestedMaxOutputTokens, long maxOutputTokens) { + if (requestedMaxOutputTokens <= 0) + return maxOutputTokens; + return Math.min(requestedMaxOutputTokens, maxOutputTokens); + } + + @Override + public Outcome handleResponse(Exchange exc) { + provider.getLLMResponse(exc, res -> { + var user = exc.getProperty(MEMBRANE_AI_USER, AiApiUser.class); + log.debug("Token usage of user {}: {}", user, res.getUsage()); + if (store != null) { + store.store(user, res.getUsage()); + } + }); + + return CONTINUE; + } + + public String getApiKey() { + return apiKey; + } + + /** + * @param apiKey LLM provider API key + * @description API key for the LLM provider. Specify here the API key from OpenAI or Anthropic. + */ + @MCAttribute + public void setApiKey(String apiKey) { + this.apiKey = apiKey; + } + + public AiApiStore getAiStore() { + return store; + } + + /** + * @param store Store for API keys and usage statistics + * @description The LLM Gateway can operate stateless and statefully. For stateful operation, specify an AiApiStore. + * A store is needed for user authentication at the gateway. + * The gateway will use the store to enforce token limits and log usage statistics. + */ + @MCChildElement(allowForeign = true, order = 30) + public void setAiStore(AiApiStore store) { + this.store = store; + } + + @Override + public String getDisplayName() { + return "LLM Gateway"; + } + + public LLMProvider getProvider() { + return provider; + } + + /** + * @param provider The LLM provider to use. + * @description The LLM provider to use. Currently, OpenAI, Anthropic and Gemini are supported. + * The provider determines the API used to talk to the LLM. The provider can be different as long as the API is supported. + */ + @MCChildElement(order = 10) + public void setProvider(LLMProvider provider) { + this.provider = provider; + } + + public Policies getPolicies() { + return policies; + } + + /** + * + * @param policies Usage policy for the LLM Gateway. + */ + @MCChildElement(order = 20) + public void setPolicies(Policies policies) { + this.policies = policies; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java new file mode 100644 index 0000000000..cfbf960e10 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/Policies.java @@ -0,0 +1,76 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCElement; + +import java.util.List; + +/** + * LLM Gateway policies for token usage and model restrictions. + */ +@MCElement(name = "policies", topLevel = false, id="llm-gateway-policies") +public class Policies { + + private List models; + private int maxOutputTokens; + private int maxInputTokens; + + public List getModels() { + return models; + } + + /** + * @param models List of models that can be used by the gateway. + * @desciption Restricts the models that can be used by the gateway. + * @default null (no restriction) + */ + @MCAttribute + public void setModels(List models) { + this.models = models; + } + + + public int getMaxOutputTokens() { + return maxOutputTokens; + } + + /** + * @param maxOutputTokens Maximum number of tokens the LLM should use to generate a response. + * @description Maximum number of tokens the LLM should use to generate a response. This is just a hint that the gateway + * sends to the LLM provider. The provider may use a different limit. + * @default 0 (unlimited) + */ + @MCAttribute + public void setMaxOutputTokens(int maxOutputTokens) { + this.maxOutputTokens = maxOutputTokens; + } + + public int getMaxInputTokens() { + return maxInputTokens; + } + + /** + * @param maxInputTokens Maximum number of tokens that a request can use. + * @description Restricts token usage for the input. The size of the input is estimated by gateway based on the request size. + * Actual token usage may be deviate from this value. + */ + @MCAttribute + public void setMaxInputTokens(int maxInputTokens) { + this.maxInputTokens = maxInputTokens; + } + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java new file mode 100644 index 0000000000..4b726bec62 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/ResponsesApiEvent.java @@ -0,0 +1,66 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ResponsesApiEvent extends AbstractLLMEvent { + + private static final Logger log = LoggerFactory.getLogger(ResponsesApiEvent.class); + + private final String type; + + public ResponsesApiEvent(JsonNode json) { + super(json); + + this.type = json.path("type").asText(); + + log.debug("Responses API event: {}", type); + + if ("response.output_item.done".equals(type)) { + + var item = json.path("item"); + + if (item.isObject()) { + var on = (ObjectNode) item; + + if ("function_call".equals(on.path("type").asText())) { + if (log.isDebugEnabled()) { + log.debug("Function call: {} with params {}", + on.path("name").asText(), + on.path("arguments").asText()); + } else { + log.info("Function call: {}", on.path("name")); + } + } + } + } + } + + @Override + public String getType() { + return type; + } + + @Override + public String toString() { + return "ResponsesApiEvent{" + + "type='" + type + '\'' + + '}'; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java new file mode 100644 index 0000000000..6ecf4d7ef5 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMErrorCreator.java @@ -0,0 +1,47 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.ObjectMapper; + +public abstract class AbstractLLMErrorCreator implements LLMErrorCreator { + + private static final ObjectMapper om = new ObjectMapper(); + + public static String createJson(Object o) { + try { + return om.writeValueAsString(o); + } catch (Exception e) { + return """ + { "error": "Could not create JSON" } + """; + } + } + + public String envelope(String message, String type, String param, String code) { + return createJson(new ErrorEnvelope(new ErrorBody(message,type,param,code))); + } + + private record ErrorEnvelope(ErrorBody error) { + } + + private record ErrorBody( + String message, + String type, + String param, + String code + ) { + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java new file mode 100644 index 0000000000..391324f38e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMMessage.java @@ -0,0 +1,39 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; + +public class AbstractLLMMessage { + + protected final Exchange exchange; + + public enum API { COMPLETIONS, NORMAL } + + protected API api; + + protected AbstractLLMMessage(Exchange exchange) { + this.exchange = exchange; + api = getAPI(exchange); + } + + protected API getAPI(Exchange exchange) { + if (exchange.getRequest().getUri().contains("/chat/completions")) { + return API.COMPLETIONS; + } else { + return API.NORMAL; + } + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java new file mode 100644 index 0000000000..f5955d6acb --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMRequest.java @@ -0,0 +1,92 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; + +import static com.predic8.membrane.core.http.Header.AUTHORIZATION; + +public abstract class AbstractLLMRequest extends AbstractLLMMessage implements LLMRequest { + + private static final Logger log = LoggerFactory.getLogger(AbstractLLMRequest.class); + + public static final String BEARER_PREFIX = "Bearer"; + + protected ObjectNode json; + + public AbstractLLMRequest(Exchange exchange) { + super(exchange); + + if (exchange.getRequest().isJSON()) { + json = JsonUtil.getJsonObject(exchange.getRequest()).orElseThrow(() -> new RuntimeException("Cannot parse input as JSON message.")); + } else { + log.info("Request is not JSON:"); + throw new RuntimeException("Request is not JSON."); + } + } + + public List getTools() { + return Collections.emptyList(); + } + + protected ArrayNode getToolsNode() { + if (json == null) + return null; + if (json.path("tools").isArray()) + return (ArrayNode) json.path("tools"); + return null; + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(AUTHORIZATION); + exchange.getRequest().getHeader().add(AUTHORIZATION, "Bearer " + apiKey); + } + + @Override + public String getApiKey() { + var ah = exchange.getRequest().getHeader().getAuthorization(); + if (ah == null) { + return null; + } + + int index = ah.indexOf(BEARER_PREFIX); + if (index < 0) { + return null; + } + + var token = ah.substring(index + BEARER_PREFIX.length()).trim(); + + return token.isEmpty() ? null : token; + } + + @Override + public ObjectNode getJson() { + return json; + } + + @Override + public String getModel() { + return json.path("model").asText(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java new file mode 100644 index 0000000000..4732d0a0a5 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/AbstractLLMResponse.java @@ -0,0 +1,98 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.http.AbstractMessageObserver; +import com.predic8.membrane.core.http.Chunk; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Consumer; + +public abstract class AbstractLLMResponse extends AbstractLLMMessage implements LLMResponse { + + private static final Logger log = LoggerFactory.getLogger(AbstractLLMResponse.class); + + protected ObjectNode json; + protected Consumer postProcessor; + + public AbstractLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange); + this.postProcessor = postProcessor; + var msg = exchange.getResponse(); + + if (msg.isStream()) { + + log.debug("Streaming response."); + + var parser = new SSEParser(getTerminalEvents()); + + msg.getBody().addObserver(new AbstractMessageObserver() { + @Override + public void bodyChunk(Chunk chunk) { + processChunk(chunk, parser); + } + }); + } else { + json = JsonUtil.getJsonObject(exchange.getResponse()) + .orElse(JsonNodeFactory.instance.objectNode().put("error", "No JSON object response from model.")); + postProcessor.accept(this); + } + } + + protected void processChunk(Chunk chunk, SSEParser parser) { + // Wait for terminal chunk + if (!parser.parse(chunk)) { + return; + } + + // Now all chunks are parsed + + var events = parser.getEvents(); + var terminal = parser.getTerminalEvent(); + + log.debug("Events: {}", events.size()); + events.forEach(this::process); + + terminal.ifPresent(event -> { + processTerminalEvent(event); + postProcessor.accept(AbstractLLMResponse.this); + }); + } + + protected void processTerminalEvent(SSEParser.SSEEvent terminal) {} + + @Override + public boolean isError() { + return json.get("error") != null && !json.get("error").isNull(); + } + + protected static int getOutputTokens(JsonNode usage) { + return usage.path("output_tokens").asInt( + usage.path("completion_tokens").asInt(0) + ); + } + + protected static int getInputTokens(JsonNode usage) { + return usage.path("input_tokens").asInt( + usage.path("prompt_tokens").asInt(0)); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java new file mode 100644 index 0000000000..732a1332fe --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMErrorCreator.java @@ -0,0 +1,38 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.http.Response; + +import java.util.Collection; + +public interface LLMErrorCreator { + + Response invalidRequestError(String message); + + Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds); + + Response modelNotAllowed(String model, Collection allowedModels); + + Response authenticationFailed(); + + /** + * + * @param maxTokens as configured + * @param estimatedTokens estimated number of input tokens + * @return Response error response + */ + Response inputTokensExceeded(long maxTokens, long estimatedTokens); +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java new file mode 100644 index 0000000000..1fb2fc4eae --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMProvider.java @@ -0,0 +1,27 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.exchange.Exchange; + +import java.util.function.Consumer; + +public interface LLMProvider { + + LLMRequest getLLMRequest(Exchange request); + LLMResponse getLLMResponse(Exchange request, Consumer postProcessor); + LLMErrorCreator getErrorCreator(); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java new file mode 100644 index 0000000000..371115e911 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMRequest.java @@ -0,0 +1,43 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +import java.util.List; + +public interface LLMRequest { + + String getModel(); + + String getApiKey(); + + void setApiKey(String apiKey); + + /** + * The max number of tokens that the model is allowed to generate as specified by the client. + * @return The max number of tokens that the model is allowed to generate. -1 if no limit is set. + */ + long getRequestedMaxOutputTokens(); + + void setMaxOutputTokens(int maxOutputTokens); + + long estimateInputTokens(); + + ObjectNode getJson(); + + List getTools(); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java new file mode 100644 index 0000000000..3d3ed9bd78 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/LLMResponse.java @@ -0,0 +1,32 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider; + +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser.SSEEvent; + +import java.util.Set; + +public interface LLMResponse { + + boolean isError(); + + Usage getUsage(); + + Set getTerminalEvents(); + + void process(SSEEvent event); + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java new file mode 100644 index 0000000000..643786b0a4 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsErrorCreator.java @@ -0,0 +1,67 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator; + +import java.util.Collection; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class ChatCompletionsErrorCreator extends AbstractLLMErrorCreator { + + @Override + public Response invalidRequestError(String message) { + return badRequest().json(envelope(message, "invalid_request_error", null, "bad_request")).build(); + } + + public Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds) { + return statusCode(429).json(envelope( + "Token rate limit exceeded. Request requires %d tokens but only %d remain. Please wait %d seconds before retrying.".formatted(tokenRequired, tokenRemaining, tokenResetInSeconds), + "rate_limit_error", + null, + "token_limit_exceeded")).build(); + } + + public Response modelNotAllowed(String model, Collection allowedModels) { + return badRequest().json(envelope( + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)), + "invalid_request_error", + null, + "model_not_allowed")).build(); + } + + public Response authenticationFailed() { + return unauthorized().header(WWW_AUTHENTICATE, "Bearer").json(envelope( + "Invalid authentication credentials", + "invalid_request_error", + null, + "invalid_authentication")).build(); + } + + public Response inputTokensExceeded(long maxTokens, long estimatedTokens) { + return badRequest().json(envelope( + """ + This model's maximum context length is %d tokens. + Your request contains approximately %d tokens. + """.formatted(maxTokens, estimatedTokens).trim(), + "invalid_request_error", + "input", + "context_length_exceeded")).build(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java new file mode 100644 index 0000000000..1ac5be3699 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsProvider.java @@ -0,0 +1,62 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.util.function.Consumer; + +/** + * @description + * OpenAI Chat Completions API compatible provider. + * Can be used for the following providers: + *
    + *
  • Azure OpenAI
  • + *
  • Google Gemini (OpenAI compatible endpoint)
  • + *
  • TogetherAI
  • + *
  • Fireworks AI
  • + *
  • DeepSeek AI
  • + *
  • OpenRouter
  • + *
  • Mistral AI
  • + *
  • DeepInfra
  • + *
  • SiliconFlow
  • + *
  • NVIDIA NIM
  • + *
  • ML Studio
  • + *
  • vLLM
  • + *
  • Ollama
  • + *
+ */ +@MCElement(name = "chatCompletions") +public class ChatCompletionsProvider implements LLMProvider { + @Override + public LLMRequest getLLMRequest(Exchange request) { + return new ChatCompletionsRequest(request); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new ChatCompletionsResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return null; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java new file mode 100644 index 0000000000..4ecbf9065a --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsRequest.java @@ -0,0 +1,61 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.openai.AbstractOpenAiLLMRequest; + +import java.util.List; + +import static java.util.Collections.emptyList; + +public class ChatCompletionsRequest extends AbstractOpenAiLLMRequest { + + public ChatCompletionsRequest(Exchange exchange) { + super(exchange); + + if (json == null) { + return; + } + + // Make sure that when streaming is enabled, the usage is included in the response. + if (json.path("stream").asBoolean(false)) { + var streamOptions = json.withObject("/stream_options"); + streamOptions.put("include_usage", true); + } + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_tokens", maxOutputTokens); + } + + public List getTools() { + var tools = getToolsNode(); + if (tools == null) + return emptyList(); + return tools.valueStream() + .filter(n -> "function".equals(n.path("type").asText(""))) + .map(n -> n.path("function").path("name").asText("")) + .filter(name -> !name.isEmpty()) + .toList(); + } + + @Override + public long getRequestedMaxOutputTokens() { + return json.path("max_completion_tokens").asLong(0); + } + +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java new file mode 100644 index 0000000000..2b1acc0047 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/chatcompletions/ChatCompletionsResponse.java @@ -0,0 +1,69 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class ChatCompletionsResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(ChatCompletionsResponse.class); + + public ChatCompletionsResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + + var usage = json.path("usage"); + + var inputTokens = usage.path("prompt_tokens").asInt(0); + var outputTokens = usage.path("completion_tokens").asInt(0); + var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens); + + return new Usage( + inputTokens, + outputTokens, + totalTokens + ); + } + + @Override + public Set getTerminalEvents() { + return Set.of("[DONE]"); + } + + @Override + protected void processTerminalEvent(SSEParser.SSEEvent terminal) { + postProcessor.accept(ChatCompletionsResponse.this); + } + + @Override + public void process(SSEParser.SSEEvent e) { + log.debug("Data: {}", e.data()); + var event = AbstractLLMEvent.create(e); + log.debug("Event: {}", event); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java new file mode 100644 index 0000000000..1fbcf2f1a1 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorCreator.java @@ -0,0 +1,99 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.claude.ClaudeErrorResponse.ClaudeError; + +import java.util.Collection; +import java.util.UUID; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class ClaudeErrorCreator implements LLMErrorCreator { + + private static final String INVALID_REQUEST_ERROR = "invalid_request_error"; + private static final String AUTHENTICATION_ERROR = "authentication_error"; + private static final String RATE_LIMIT_ERROR = "rate_limit_error"; + + @Override + public Response invalidRequestError(String message) { + return badRequest() + .json(error(INVALID_REQUEST_ERROR, message)) + .build(); + } + + @Override + public Response tokenLimitExceeded(long tokenRequired, long tokenRemaining, long tokenResetInSeconds) { + long visibleRemaining = Math.max(0, tokenRemaining); + + return statusCode(429) + .json(error( + RATE_LIMIT_ERROR, + """ + Token rate limit exceeded. + Request requires %d tokens but only %d remain. + Retry after %d seconds. + """.formatted(tokenRequired, visibleRemaining, tokenResetInSeconds).trim() + )) + .build(); + } + + @Override + public Response modelNotAllowed(String model, Collection allowedModels) { + return badRequest() + .json(error( + INVALID_REQUEST_ERROR, + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)) + )) + .build(); + } + + @Override + public Response authenticationFailed() { + return unauthorized() + .header(WWW_AUTHENTICATE, "Bearer") + .json(error(AUTHENTICATION_ERROR, "Invalid bearer token")) + .build(); + } + + @Override + public Response inputTokensExceeded(long maxTokens, long estimatedTokens) { + return badRequest() + .json(error( + INVALID_REQUEST_ERROR, + """ + prompt is too long: + %d tokens > %d maximum + """.formatted(estimatedTokens, maxTokens).trim() + )) + .build(); + } + + private String error(String type, String message) { + return ClaudeErrorResponse.builder() + .type("error") + .error( + ClaudeError.builder() + .type(type) + .message(message) + ) + .requestId("membrane_" + UUID.randomUUID()) + .toJson(); + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java new file mode 100644 index 0000000000..0ff004834e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeErrorResponse.java @@ -0,0 +1,114 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class ClaudeErrorResponse { + + private static final ObjectMapper om = new ObjectMapper(); + + private String type = "error"; + private ClaudeError error; + private String request_id; + + public static ClaudeErrorResponse builder() { + return new ClaudeErrorResponse(); + } + + public String getType() { + return type; + } + + public ClaudeErrorResponse type(String type) { + this.type = type; + return this; + } + + public ClaudeError getError() { + return error; + } + + public ClaudeErrorResponse error(ClaudeError error) { + this.error = error; + return this; + } + + public String getRequest_id() { + return request_id; + } + + public ClaudeErrorResponse requestId(String requestId) { + this.request_id = requestId; + return this; + } + + public String toJson() { + try { + return om.writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to serialize ClaudeErrorResponse", e); + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class ClaudeError { + + private String type; + private String message; + + public static ClaudeError builder() { + return new ClaudeError(); + } + + public String getType() { + return type; + } + + public ClaudeError type(String type) { + this.type = type; + return this; + } + + public String getMessage() { + return message; + } + + public ClaudeError message(String message) { + this.message = message; + return this; + } + + @Override + public String toString() { + return "ClaudeError{" + + "type='" + type + '\'' + + ", message='" + message + '\'' + + '}'; + } + } + + @Override + public String toString() { + return "ClaudeErrorResponse{" + + "type='" + type + '\'' + + ", error=" + error + + ", request_id='" + request_id + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java new file mode 100644 index 0000000000..fa5279afe4 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMRequest.java @@ -0,0 +1,108 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClaudeLLMRequest extends AbstractLLMRequest { + + private static final Logger log = LoggerFactory.getLogger(ClaudeLLMRequest.class); + + public static final String X_API_KEY = "x-api-key"; + + public ClaudeLLMRequest(Exchange exchange) { + super(exchange); + + exchange.getRequest().getHeader().setValue( "Accept-Encoding","identity"); + } + + public void setMaxOutputTokens(int maxOutputTokens) { + + // Thinking needs a certain number of tokens + if (maxOutputTokens < 2048 && isThinking()) { + log.info("maxOutputTokens is {}. Too low for thinking. Disabling thinking.", maxOutputTokens); + disableThinking(); + } + + json.put("max_tokens", maxOutputTokens); + + if (isThinking()) { + var thinking = (ObjectNode) json.path("thinking"); + if (!thinking.path("budget_tokens").isNull()) { + var budgetTokens = thinking.path("budget_tokens").asInt(); + if (budgetTokens >= maxOutputTokens) { + // budget_tokens must be smaller than max_tokens + // value might vary between models + thinking.put("budget_tokens", Math.min(maxOutputTokens / 2, 1024)); + } + } + + } + } + + @Override + public long estimateInputTokens() { + // System prompt + long tokens = json.path("system").asText().length() / 4; + + // Messages + for (var message : json.path("messages")) { + var content = message.path("content"); + if (content.isTextual()) { + tokens += content.asText().length() / 4; + } else if (content.isArray()) { + for (var block : content) { + var type = block.path("type").asText(); + if (type.equals("text")) { + tokens += block.path("text").asText().length() / 4; + } else if (type.equals("image")) { + tokens += 1000; + } + } + } + } + return tokens; + } + + private boolean isThinking() { + var thinking = json.path("thinking"); + return thinking.isObject() && "enabled".equals(thinking.path("type").asText()); + } + + private void disableThinking() { + var thinking = json.putObject("thinking"); + thinking.put("type", "disabled"); + } + + @Override + public long getRequestedMaxOutputTokens() { + return json.path("max_tokens").asLong(0); + } + + @Override + public String getApiKey() { + return exchange.getRequest().getHeader().getFirstValue(X_API_KEY); + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(X_API_KEY); + exchange.getRequest().getHeader().add(X_API_KEY, apiKey); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java new file mode 100644 index 0000000000..8d534643ea --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeLLMResponse.java @@ -0,0 +1,101 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser.SSEEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class ClaudeLLMResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(ClaudeLLMResponse.class); + + private Usage usage; + + private final StringBuffer inputJson = new StringBuffer(); + + private String tool; + + public ClaudeLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange,postProcessor); + } + + @Override + public Set getTerminalEvents() { + return Set.of("message_stop"); + } + + @Override + public void process(SSEEvent event) { + log.debug("Event: {}", event); + + if ("content_block_start".equals(event.name())) { + var cbs = ContentBlockStart.from(event.json()); + if (cbs.getToolUse() != null) { + tool = cbs.getToolUse().getName(); + } + } + if ("message_delta".equals(event.name())) { + var md = MessageDelta.from(event.json()); + log.debug("Message delta: {}", md); + if (md.getUsage() != null) { + usage = md.getUsage(); + if (tool != null) + log.debug("Tool {} with {}", tool, inputJson.toString()); + } + } + if ("content_block_delta".equals(event.name())) { + var cbd = ContentBlockDelta.from(event.json()); + if (cbd.isInputJsonDelta()) { + inputJson.append(cbd.getPartialJson()); + } + } + } + + Usage extractUsage() { + + var usage = json.path("usage"); + + var inputTokens = getInputTokens(usage); + var outputTokens = getOutputTokens(usage); + var totalTokens = inputTokens + outputTokens; + return new Usage(inputTokens, outputTokens, totalTokens); + + } + + protected static int getOutputTokens(JsonNode usage) { + return usage.path("output_tokens").asInt(0); + } + + protected static int getInputTokens(JsonNode usage) { + return usage.path("input_tokens").asInt(0); + } + + @Override + public Usage getUsage() { + if (usage != null) + return usage; + return usage = extractUsage(); + } + +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java new file mode 100644 index 0000000000..a296575058 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ClaudeProvider.java @@ -0,0 +1,47 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.util.function.Consumer; + +/** + * @description (Experimental) Anthroic Claude provider configuration + * Use to configure a LLM gateway to use the anthropic API + */ +@MCElement( name="claude") +public class ClaudeProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) { + return new ClaudeLLMRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new ClaudeLLMResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new ClaudeErrorCreator(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java new file mode 100644 index 0000000000..5e5a0648bb --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockDelta.java @@ -0,0 +1,53 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +public class ContentBlockDelta { + + private int index; + private String deltaType; + private String partialJson; + + public static ContentBlockDelta from(ObjectNode on) { + var cbd = new ContentBlockDelta(); + + cbd.index = on.path("index").asInt(); + + JsonNode delta = on.path("delta"); + cbd.deltaType = delta.path("type").asText(null); + cbd.partialJson = delta.path("partial_json").asText(""); + + return cbd; + } + + public boolean isInputJsonDelta() { + return "input_json_delta".equals(deltaType); + } + + public int getIndex() { + return index; + } + + public String getDeltaType() { + return deltaType; + } + + public String getPartialJson() { + return partialJson; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java new file mode 100644 index 0000000000..bdf2be207b --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ContentBlockStart.java @@ -0,0 +1,37 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +public class ContentBlockStart { + + private ToolUse toolUse; + + public static ContentBlockStart from(ObjectNode on) { + var cbs = new ContentBlockStart(); + var cb = (ObjectNode) on.path("content_block"); + + if ("tool_use".equals(cb.path("type").asText())) { + cbs.toolUse = ToolUse.from(cb); + } + + return cbs; + } + + public ToolUse getToolUse() { + return toolUse; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java new file mode 100644 index 0000000000..4aa68fa737 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/MessageDelta.java @@ -0,0 +1,87 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; + +public class MessageDelta { + + private String stopReason; + private int inputTokens; + private int outputTokens; + private int cacheCreationInputTokens; + private int cacheReadInputTokens; + + private Usage usage; + + public static MessageDelta from(ObjectNode on) { + var md = new MessageDelta(); + + JsonNode delta = on.path("delta"); + md.stopReason = delta.path("stop_reason").asText(null); + + JsonNode u = on.path("usage"); + if (u.isObject()) { + md.inputTokens = u.path("input_tokens").asInt(0); + md.outputTokens = u.path("output_tokens").asInt(0); + md.cacheCreationInputTokens = u.path("cache_creation_input_tokens").asInt(0); + md.cacheReadInputTokens = u.path("cache_read_input_tokens").asInt(0); + + // Cache tokens (cache_creation_input_tokens and cache_read_input_tokens) are billable according to Claude's pricing model + int effectiveInputTokens = md.inputTokens + md.cacheCreationInputTokens + md.cacheReadInputTokens; + md.usage = new Usage(effectiveInputTokens,md.outputTokens, effectiveInputTokens + md.outputTokens); + + } + + return md; + } + + public String getStopReason() { + return stopReason; + } + + public int getInputTokens() { + return inputTokens; + } + + public int getOutputTokens() { + return outputTokens; + } + + public int getCacheCreationInputTokens() { + return cacheCreationInputTokens; + } + + public int getCacheReadInputTokens() { + return cacheReadInputTokens; + } + + public Usage getUsage() { + return usage; + } + + @Override + public String toString() { + return "MessageDelta{" + + "stopReason='" + stopReason + '\'' + + ", inputTokens=" + inputTokens + + ", outputTokens=" + outputTokens + + ", cacheCreationInputTokens=" + cacheCreationInputTokens + + ", cacheReadInputTokens=" + cacheReadInputTokens + + '}'; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java new file mode 100644 index 0000000000..5694468d9e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/claude/ToolUse.java @@ -0,0 +1,36 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.claude; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ToolUse { + + private static final Logger log = LoggerFactory.getLogger(ToolUse.class); + + private String name; + + public static ToolUse from(ObjectNode on) { + var tu = new ToolUse(); + tu.name = on.path("name").asText(); + return tu; + } + + public String getName() { + return name; + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java new file mode 100644 index 0000000000..1b86f0f39b --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleErrorCreator.java @@ -0,0 +1,114 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.core.http.Response; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMErrorCreator; + +import java.util.Collection; + +import static com.predic8.membrane.core.http.Header.WWW_AUTHENTICATE; +import static com.predic8.membrane.core.http.Response.*; + +public class GoogleErrorCreator extends AbstractLLMErrorCreator { + + @Override + public Response invalidRequestError(String message) { + return badRequest().json( + envelope(400, message, "INVALID_ARGUMENT") + ).build(); + } + + public Response tokenLimitExceeded(long tokenRequired, + long tokenRemaining, + long tokenResetInSeconds) { + + var visibleRemaining = Math.max(0, tokenRemaining); + + return statusCode(429).json( + envelope( + 429, + """ + Token rate limit exceeded. + Request requires %d tokens but only %d remain. + Retry after %d seconds. + """ + .formatted(tokenRequired, visibleRemaining, tokenResetInSeconds) + .trim(), + "RESOURCE_EXHAUSTED" + ) + ).build(); + } + + public Response modelNotAllowed(String model, + Collection allowedModels) { + + return badRequest().json( + envelope( + 400, + "Model '%s' is not allowed. Allowed models: %s." + .formatted(model, String.join(", ", allowedModels)), + "INVALID_ARGUMENT" + ) + ).build(); + } + + public Response authenticationFailed() { + return unauthorized() + .header(WWW_AUTHENTICATE, "Bearer") + .json( + envelope( + 401, + "Invalid API key.", + "UNAUTHENTICATED" + ) + ).build(); + } + + public Response inputTokensExceeded(long maxTokens, + long estimatedTokens) { + + return badRequest().json( + envelope( + 400, + """ + The input token count (%d) exceeds the maximum allowed (%d). + """ + .formatted(estimatedTokens, maxTokens) + .trim(), + "INVALID_ARGUMENT" + ) + ).build(); + } + + private String envelope(int code, + String message, + String status) { + + return createJson(new ErrorEnvelope( + new ErrorBody(code, message, status) + )); + } + + private record ErrorEnvelope(ErrorBody error) { + } + + private record ErrorBody( + int code, + String message, + String status + ) { + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java new file mode 100644 index 0000000000..bd60b10617 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMRequest.java @@ -0,0 +1,148 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest; + +public class GoogleLLMRequest extends AbstractLLMRequest { + + /** + * x-goog-api-key is correct it is not google + */ + public static final String X_GOOG_API_KEY = "x-goog-api-key"; + + public GoogleLLMRequest(Exchange exchange) { + super(exchange); + } + + @Override + public String getModel() { + + var uri = exchange.getRequest().getUri(); + + if (uri == null) { + return null; + } + + // Example: + // /v1beta/models/gemini-2.5-pro:generateContent + int modelsIndex = uri.indexOf("/models/"); + if (modelsIndex < 0) { + return null; + } + + var modelPart = uri.substring(modelsIndex + "/models/".length()); + + // Support both ':' and URL-encoded '%3A' / '%3a' as separator before the action suffix + // (e.g. ':generateContent' or '%3AgenerateContent'). + int colonIndex = modelPart.indexOf(':'); + if (colonIndex < 0) { + colonIndex = modelPart.toLowerCase().indexOf("%3a"); + } + if (colonIndex >= 0) { + return modelPart.substring(0, colonIndex); + } + + return modelPart; + } + + @Override + public String getApiKey() { + return exchange.getRequest().getHeader().getFirstValue(X_GOOG_API_KEY); + } + + @Override + public void setApiKey(String apiKey) { + exchange.getRequest().getHeader().removeFields(X_GOOG_API_KEY); + exchange.getRequest().getHeader().add(X_GOOG_API_KEY, apiKey); + } + + @Override + public long getRequestedMaxOutputTokens() { + return json.path("generationConfig") + .path("maxOutputTokens") + .asLong(0); + } + + public long estimateInputTokens() { + if (json == null || json.isNull()) { + return 0; + } + + long chars = countText(json.path("systemInstruction")); + + var contents = json.path("contents"); + if (contents.isArray()) { + for (JsonNode content : contents) { + chars += countText(content.path("parts")); + } + } + + // Safety margin for JSON structure, roles, metadata, etc. + return Math.max(1, Math.round(chars / 4.0 * 1.15)); + } + + private long countText(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + + if (node.isTextual()) { + return node.asText().length(); + } + + if (node.isObject()) { + long chars = 0; + + JsonNode text = node.get("text"); + if (text != null && text.isTextual()) { + chars += text.asText().length(); + } + + JsonNode parts = node.get("parts"); + if (parts != null) { + chars += countText(parts); + } + + return chars; + } + + if (node.isArray()) { + long chars = 0; + for (JsonNode child : node) { + chars += countText(child); + } + return chars; + } + + return 0; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + getGenerationConfig().put("maxOutputTokens", maxOutputTokens); + } + + private ObjectNode getGenerationConfig() { + var gc = json.get("generationConfig"); + if (gc instanceof ObjectNode objectNode) { + return objectNode; + } + return json.putObject("generationConfig"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java new file mode 100644 index 0000000000..abf1c0a592 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleLLMResponse.java @@ -0,0 +1,58 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; + +import java.util.Set; +import java.util.function.Consumer; + +public class GoogleLLMResponse extends AbstractLLMResponse { + + public GoogleLLMResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + var usage = json.path("usageMetadata"); + + int inputTokens = usage.path("promptTokenCount").asInt(0); + int thoughtsTokens = usage.path("thoughtsTokenCount").asInt(0); + int candidatesTokenCount = usage.path("candidatesTokenCount").asInt(0); + int outputTokens = thoughtsTokens + candidatesTokenCount; + int totalTokens = usage.path("totalTokenCount").asInt(inputTokens + outputTokens); + + return new Usage( + inputTokens, + outputTokens, + totalTokens + ); + } + + @Override + public Set getTerminalEvents() { + return Set.of("response.completed","response.incompleted"); + } + + @Override + public void process(SSEParser.SSEEvent event) { + + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java new file mode 100644 index 0000000000..b1b36ea1df --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/google/GoogleProvider.java @@ -0,0 +1,47 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.google; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; + +import java.util.function.Consumer; + +/** + * @description (Experimental)Google AI provider configuration + * Use to configure a LLM gateway to use the Google LLM API + */ +@MCElement( name="google",id = "google-ai-provider") +public class GoogleProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) { + return new GoogleLLMRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange request, Consumer postProcessor) { + return new GoogleLLMResponse(request, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new GoogleErrorCreator(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java new file mode 100644 index 0000000000..b49e7440fc --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/AbstractOpenAiLLMRequest.java @@ -0,0 +1,101 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.fasterxml.jackson.databind.JsonNode; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMRequest; + +public abstract class AbstractOpenAiLLMRequest extends AbstractLLMRequest { + + public AbstractOpenAiLLMRequest(Exchange exchange) { + super(exchange); + } + + @Override + public long estimateInputTokens() { + + long chars = countText(json.path("input")); + + chars += estimateChatCompletitions(); + + // system instructions + chars += countText(json.path("system")); + + // tools/functions contribute significantly + chars += countJsonSize(json.path("tools")); + chars += countJsonSize(json.path("functions")); + + // safety margin for JSON structure and tokenizer variance + return Math.max(1, Math.round(chars / 4.0 * 1.15)); + } + + private long estimateChatCompletitions() { + long chars = 0; + // Chat Completions API + var messages = json.path("messages"); + if (messages.isArray()) { + for (var message : messages) { + chars += countText(message.path("content")); + // roles also consume tokens + chars += message.path("role").asText("").length(); + } + } + return chars; + } + + private long countText(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + + if (node.isTextual()) { + return node.asText().length(); + } + + if (node.isArray()) { + long chars = 0; + for (JsonNode child : node) { + chars += countText(child); + } + return chars; + } + + if (node.isObject()) { + + // OpenAI content blocks: + // { "type": "text", "text": "..." } + long chars = 0; + + var text = node.get("text"); + if (text != null && text.isTextual()) { + chars += text.asText().length(); + } + + chars += countText(node.get("content")); + + return chars; + } + + return 0; + } + + private long countJsonSize(JsonNode node) { + if (node == null || node.isMissingNode() || node.isNull()) { + return 0; + } + return node.toString().length(); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java new file mode 100644 index 0000000000..8c6e474398 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIChatCompletionsRequest.java @@ -0,0 +1,29 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsRequest; + +public class OpenAIChatCompletionsRequest extends ChatCompletionsRequest { + public OpenAIChatCompletionsRequest(Exchange exchange) { + super(exchange); + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_completion_tokens", maxOutputTokens); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java new file mode 100644 index 0000000000..e55d40bd47 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAIProvider.java @@ -0,0 +1,59 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMProvider; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMRequest; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsErrorCreator; +import com.predic8.membrane.core.interceptor.llmgateway.provider.chatcompletions.ChatCompletionsResponse; + +import java.util.function.Consumer; + +/** + * @description OpenAI provider configuration + * Use to configure a LLM gateway to use the OpenAI API + */ +@MCElement( name="openai") +public class OpenAIProvider implements LLMProvider { + + @Override + public LLMRequest getLLMRequest(Exchange exchange) { + if (isResponsesApi(exchange)) { + return new OpenAiLLMResponsesRequest(exchange); + } + return new OpenAIChatCompletionsRequest(exchange); + } + + @Override + public LLMResponse getLLMResponse(Exchange exchange, Consumer postProcessor) { + if (isResponsesApi(exchange)) { + return new OpenAiLLMResponsesResponse(exchange,postProcessor); + } + return new ChatCompletionsResponse(exchange, postProcessor); + } + + @Override + public LLMErrorCreator getErrorCreator() { + return new ChatCompletionsErrorCreator(); + } + + static boolean isResponsesApi(Exchange exchange) { + return exchange.getRequest().getUri().startsWith("/v1/responses"); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java new file mode 100644 index 0000000000..3caa187c88 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesRequest.java @@ -0,0 +1,51 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.predic8.membrane.core.exchange.Exchange; + +import java.util.List; + +import static java.util.Collections.emptyList; + +public class OpenAiLLMResponsesRequest extends AbstractOpenAiLLMRequest { + + public OpenAiLLMResponsesRequest(Exchange exchange) { + super(exchange); + } + + public List getTools() { + var tools = getToolsNode(); + if (tools == null) + return emptyList(); + return tools.valueStream() + .filter(n -> "function".equals(n.path("type").asText(""))) + .map(n -> n.path("name").asText("")) + .filter(name -> !name.isEmpty()) + .toList(); + } + + @Override + public long getRequestedMaxOutputTokens() { + if (json.has("max_output_tokens")) + return json.get("max_output_tokens").asLong(); + return -1; + } + + @Override + public void setMaxOutputTokens(int maxOutputTokens) { + json.put("max_output_tokens", maxOutputTokens); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java new file mode 100644 index 0000000000..15263fbd55 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/provider/openai/OpenAiLLMResponsesResponse.java @@ -0,0 +1,75 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.provider.openai; + +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.llmgateway.AbstractLLMEvent; +import com.predic8.membrane.core.interceptor.llmgateway.provider.AbstractLLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.provider.LLMResponse; +import com.predic8.membrane.core.interceptor.llmgateway.store.Usage; +import com.predic8.membrane.core.util.http.SSEParser; +import com.predic8.membrane.core.util.json.JsonUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.function.Consumer; + +public class OpenAiLLMResponsesResponse extends AbstractLLMResponse { + + private static final Logger log = LoggerFactory.getLogger(OpenAiLLMResponsesResponse.class); + + public OpenAiLLMResponsesResponse(Exchange exchange, Consumer postProcessor) { + super(exchange, postProcessor); + } + + @Override + public Usage getUsage() { + + var usage = json.path("usage"); + + // For streamed response.completed events + if (usage.isMissingNode() || usage.isNull()) { + usage = json.path("response").path("usage"); + } + + var inputTokens = getInputTokens(usage); + var outputTokens = getOutputTokens(usage); + var totalTokens = usage.path("total_tokens").asInt(inputTokens + outputTokens); + return new Usage(inputTokens, outputTokens, totalTokens); + + } + + @Override + public Set getTerminalEvents() { + return Set.of("response.completed", "response.incomplete"); + } + + @Override + protected void processTerminalEvent(SSEParser.SSEEvent terminal) { + json = JsonUtil.getJsonObject(terminal.data()) + .orElse(JsonNodeFactory.instance.objectNode() + .put("error", "No JSON object response from model.")); + } + + @Override + public void process(SSEParser.SSEEvent e) { + log.debug("Event: {}", e.name()); + log.debug("Data: {}", e.data()); + var event = AbstractLLMEvent.create(e); + log.debug("Event: {}", event); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java new file mode 100644 index 0000000000..c764e17ac9 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiStore.java @@ -0,0 +1,43 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.core.router.Router; + +import java.util.Optional; + +/** + * @TODO + * - Store .status, .error, .model, .stop_reason + */ +public interface AiApiStore { + + default void init(Router router) { + } + + void store(AiApiUser user, Usage usage); + + Optional getUser(String token); + + /** + * Checks if the user has enough tokens to make the request. + * @param user The user to check + * @return Estimated number of tokens that the user has left after this request + */ + long checkLimit(AiApiUser user, long inputTokens, long outputTokens); + + long getRemainingResetTime(); +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java new file mode 100644 index 0000000000..da8b792680 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/AiApiUser.java @@ -0,0 +1,103 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCElement; + +import java.util.concurrent.atomic.AtomicLong; + +import static java.lang.Long.MAX_VALUE; + +@MCElement(name = "users", component = false, id="ai-api-users") +public class AiApiUser { + + private String name; + private String apiKey; + + private long tokens = 0; + + private final AtomicLong tokensUsedInPeriod = new AtomicLong(); + + /** + * Updates the store with the number of tokens used in this call + * @param usage The number of tokens used + */ + public void addTokensUsedInPeriod(Usage usage) { + tokensUsedInPeriod.addAndGet(usage.totalTokens()); + } + + public void resetTokensUsedInPeriod() { + tokensUsedInPeriod.set(0); + } + + /** + * Checks if the user has enough tokens to make the request. + * @param tokensNeededForRequest The number of tokens that the user needs to make the request + * @return The estimated number of tokens that the user has left after this request + */ + public long checkLimit(long tokensNeededForRequest) { + if (tokens == 0) + return MAX_VALUE; + return this.tokens - tokensUsedInPeriod.get() - tokensNeededForRequest; + } + + public String getName() { + return name; + } + + /** + * @description Name of the API user, group or cost center. + * @param name of the user + */ + @MCAttribute() + public void setName(String name) { + this.name = name; + } + + public String getApiKey() { + return apiKey; + } + + /** + * @description API key to authenticate the user at the llm gateway + * @default (not set) + * @param apikey to authenticate the user + */ + @MCAttribute() + public void setApiKey(String apikey) { + this.apiKey = apikey; + } + + + public long getTokens() { + return tokens; + } + + /** + * @description Number of tokens that the user has available within the current period. + * @default 0 (no limit) + * @param tokens available tokens + */ + @MCAttribute + public void setTokens(long tokens) { + this.tokens = tokens; + } + + @Override + public String toString() { + return "user(name: %s)".formatted(name); + } +} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java new file mode 100644 index 0000000000..7541c08a2c --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/JDBCAiApiUsageStore.java @@ -0,0 +1,93 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCElement; +import com.predic8.membrane.core.router.Router; +import com.predic8.membrane.core.util.jdbc.AbstractJdbcSupport; + +import java.sql.SQLException; +import java.util.Optional; + +/** + * @description Stores AI API usage in a database (experimental). + */ +@MCElement(name = "jdbcAiApiUsageStore") +public class JDBCAiApiUsageStore extends AbstractJdbcSupport implements AiApiStore { + + // @TODO GENERATED ALWAYS AS IDENTITY is PostgreSQL specific + private static final String CREATE_TABLE_SQL = """ + CREATE TABLE IF NOT EXISTS ai_api_usage ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + username VARCHAR(255) NOT NULL, + input_tokens INT NOT NULL, + output_tokens INT NOT NULL, + total_tokens INT NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """; + + private static final String INSERT_SQL = """ + INSERT INTO ai_api_usage ( + username, + input_tokens, + output_tokens, + total_tokens + ) VALUES (?, ?, ?, ?) + """; + + @Override + public void init(Router router) { + super.init(router); + createTablesIfNotExist(); + } + + @Override + public void store(AiApiUser user, com.predic8.membrane.core.interceptor.llmgateway.store.Usage usage) { + try (var connection = getConnection(); var ps = connection.prepareStatement(INSERT_SQL)) { + ps.setString(1, user.getName()); + ps.setInt(2, usage.inputTokens()); + ps.setInt(3, usage.outputTokens()); + ps.setInt(4, usage.totalTokens()); + + ps.executeUpdate(); + } catch (SQLException e) { + throw new RuntimeException("Could not store AI API usage.", e); + } + } + + @Override + public Optional getUser(String token) { + return Optional.empty(); + } + + @Override + public long checkLimit(AiApiUser user, long inputTokens, long outputTokens) { + return 0; + } + + @Override + public long getRemainingResetTime() { + return 0; + } + + private void createTablesIfNotExist() { + try (var connection = getConnection(); var ps = connection.prepareStatement(CREATE_TABLE_SQL)) { + ps.executeUpdate(); + } catch (SQLException e) { + throw new RuntimeException("Could not create AI API usage table.", e); + } + } +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java new file mode 100644 index 0000000000..106892c39f --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/SimpleAiApiStore.java @@ -0,0 +1,129 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.annot.MCChildElement; +import com.predic8.membrane.annot.MCElement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.GuardedBy; +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import static java.time.Instant.now; + +/** + * @description Simple store for the LLM Gateway that stores limits in memory. Users and keys can + * be configured in the configuration file. + */ +@MCElement(name="simpleStore",component = false, id="simple-ai-api-store") +public class SimpleAiApiStore implements AiApiStore { + + private static final Logger log = LoggerFactory.getLogger(SimpleAiApiStore.class); + + @GuardedBy("lock") + private List users = Collections.emptyList(); + + private boolean logUsage = true; + + private final Object lock = new Object(); + + @GuardedBy("lock") + private Instant nextReset; + + private long limitResetPeriod = 60; + + @Override + public void store(AiApiUser user, Usage usage) { + if (logUsage) + log.info("user: {} {}",user.getName(),usage.toString()); + user.addTokensUsedInPeriod(usage); + } + + @Override + public Optional getUser(String token) { + synchronized (lock) { + return users.stream().filter(u -> u.getApiKey().equals(token)).findFirst(); + } + } + + @Override + public long checkLimit(AiApiUser user, long inputTokens, long outputTokens) { + if (user == null) + return 0; // anonymous user gets no tokens + + synchronized (lock) { + var now = now(); + if (nextReset == null || now.isAfter(nextReset)) { + nextReset = now.plusSeconds(limitResetPeriod); + log.info("Resetting AI API token usage limit."); + users.forEach(AiApiUser::resetTokensUsedInPeriod); + } + } + + return user.checkLimit(inputTokens + outputTokens); + } + + @Override + public long getRemainingResetTime() { + synchronized (lock) { + return nextReset == null ? 0 : (nextReset.toEpochMilli() - now().toEpochMilli()) / 1000; + } + } + + + /** + * List of users that can be used for authentication. + * @param users User list + */ + @MCChildElement(allowForeign = true,order = 10) + public void setUsers(List users) { + synchronized (lock) { + this.users = users; + } + } + + public List getUsers() { + synchronized (lock) { + return List.copyOf(users); + } + } + + public long getLimitResetPeriod() { + return limitResetPeriod; + } + + /** + * @description The period in seconds after which the token limit is reset. + * @param limitResetPeriod in seconds, e.g. 3600 for 1 hour + */ + @MCAttribute + public void setLimitResetPeriod(long limitResetPeriod) { + this.limitResetPeriod = limitResetPeriod; + } + + public boolean isLogUsage() { + return logUsage; + } + + public void setLogUsage(boolean logUsage) { + this.logUsage = logUsage; + } +} + diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java new file mode 100644 index 0000000000..3bcc626858 --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/llmgateway/store/Usage.java @@ -0,0 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.interceptor.llmgateway.store; + +public record Usage(int inputTokens, int outputTokens, int totalTokens) {} diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java index 319636fe29..ed9f1608d8 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeToolSupport.java @@ -1,29 +1,32 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.predic8.membrane.core.exchange.AbstractExchange; -import com.predic8.membrane.core.interceptor.mcp.MCPUtil.InvalidToolArgumentsException; import com.predic8.membrane.core.mcp.MCPToolsCall; import com.predic8.membrane.core.mcp.MCPToolsCallResponse; import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; +import java.util.*; import static com.predic8.membrane.core.interceptor.mcp.ExchangeUtils.matchesExchangeFilter; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalBooleanArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalIntArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalSizeArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getOptionalStringArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.getRequiredLongArgument; -import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.rejectUnexpectedArguments; +import static com.predic8.membrane.core.interceptor.mcp.MCPUtil.*; import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.integer; import static com.predic8.membrane.core.interceptor.mcp.McpSchemaBuilder.string; import static java.lang.Integer.MAX_VALUE; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java index 274e150d3d..5e3c02c846 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/ExchangeUtils.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.AbstractExchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java index 148965c53e..b3c5b866df 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MCPUtil.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.AbstractExchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java index addf83e551..dd9b291c9a 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpPayloadSanitizer.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.http.Header; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java index 14539ca1cc..d03b7d1b05 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSchemaBuilder.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import java.util.Collections; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java index 615e22d543..0bff5dbcab 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionContext.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPInitialize.ClientInfo; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java index e67394c480..d607e51b7f 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpSessionManager.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPInitialize; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java index 49d14cbb4f..f07b51b8fd 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolDefinition.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.mcp.MCPToolsListResponse; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java index b8bcf4acf3..58910c945c 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolHandler.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.predic8.membrane.core.exchange.Exchange; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java index 1dde3ef18a..0480bc715e 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/McpToolRegistry.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import java.util.Collection; diff --git a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java index a22da0473f..4fc1bab70e 100644 --- a/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java +++ b/core/src/main/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServer.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.core.JsonProcessingException; @@ -9,14 +23,7 @@ import com.predic8.membrane.core.interceptor.mcp.MCPUtil.InvalidToolArgumentsException; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; import com.predic8.membrane.core.jsonrpc.JSONRPCResponse; -import com.predic8.membrane.core.mcp.MCPInitialize; -import com.predic8.membrane.core.mcp.MCPInitializeResponse; -import com.predic8.membrane.core.mcp.MCPInitialized; -import com.predic8.membrane.core.mcp.MCPPing; -import com.predic8.membrane.core.mcp.MCPToolsCall; -import com.predic8.membrane.core.mcp.MCPToolsCallResponse; -import com.predic8.membrane.core.mcp.MCPToolsList; -import com.predic8.membrane.core.mcp.MCPToolsListResponse; +import com.predic8.membrane.core.mcp.*; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,13 +43,7 @@ import static com.predic8.membrane.core.interceptor.mcp.McpSessionContext.McpSessionState.INITIALIZED; import static com.predic8.membrane.core.interceptor.mcp.McpSessionContext.McpSessionState.READY; import static com.predic8.membrane.core.jsonrpc.JSONRPCRequest.parse; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INTERNAL_ERROR; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INVALID_PARAMS; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_INVALID_REQUEST; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_METHOD_NOT_FOUND; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.ERR_PARSE_ERROR; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.error; -import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.success; +import static com.predic8.membrane.core.jsonrpc.JSONRPCResponse.*; /** * @description MCP Server for Membrane. It allows querying Membrane's internal state and operation from an LLM diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java index 65f3b3b86b..f9b802c163 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -15,7 +29,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.util.List; import java.util.Map; import java.util.Objects; diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java index ebe1a2513e..02d95507fb 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponse.java @@ -1,10 +1,24 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; -import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonPropertyOrder; +import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializerProvider; diff --git a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java index 158fa9e087..922c391ef1 100644 --- a/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/jsonrpc/JSONRPCUtil.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import com.fasterxml.jackson.databind.JsonNode; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java index a8d94ca230..9aa79a1ca1 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialize.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java index c3227bae7a..a49610a9ef 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitializeResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java index 27d63ee716..f39698807a 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPInitialized.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java index eeefa8e421..3e106d3aa3 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPNotification.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java index b7aa3c8ce1..3746a156ba 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPPing.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java index 1d47fcf638..059766bc63 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPRequest.java @@ -1,9 +1,21 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; -import java.util.Objects; - import static java.util.Objects.requireNonNull; /** diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java index f9a27bd1d6..faaba2786f 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCResponse; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java index 1724b5022e..51055b2ed8 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCall.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java index 026b032375..e221eced60 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsCallResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.*; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java index c0beba054c..dbada1138f 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsList.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java index ad78539949..48d72fbef9 100644 --- a/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java +++ b/core/src/main/java/com/predic8/membrane/core/mcp/MCPToolsListResponse.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java b/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java index b936ec3385..b3950b727a 100644 --- a/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java +++ b/core/src/main/java/com/predic8/membrane/core/security/AbstractSecurityScheme.java @@ -13,7 +13,7 @@ limitations under the License. */ package com.predic8.membrane.core.security; -import com.predic8.membrane.core.exchange.*; +import com.predic8.membrane.core.exchange.Exchange; import java.util.*; @@ -58,4 +58,5 @@ public boolean hasScope(String scope) { public Set getScopes() { return scopes; } + } diff --git a/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java new file mode 100644 index 0000000000..405312ba4e --- /dev/null +++ b/core/src/main/java/com/predic8/membrane/core/util/http/SSEParser.java @@ -0,0 +1,176 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.util.http; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.http.Chunk; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +public final class SSEParser { + + private static final Logger log = LoggerFactory.getLogger(SSEParser.class); + + private final Set terminalEventNames; + private final StringBuilder buffer = new StringBuilder(); + + private final List events = new ArrayList<>(); + + private String eventName; + private final StringBuilder data = new StringBuilder(); + + private boolean terminalFound; + + public SSEParser(Set terminalEventNames) { + this.terminalEventNames = terminalEventNames; + } + + public boolean parse(Chunk chunk) { + if (terminalFound) { + return true; + } + + log.debug("Parsing SSE chunk: {}", chunk); + + buffer.append(chunk.toString()); + + int lineEnd; + while ((lineEnd = findLineEnd(buffer)) >= 0) { + String line = readLine(buffer, lineEnd); + + if (line.isEmpty()) { + var event = buildEvent(); + resetEvent(); + + if (event != null) { + events.add(event); + + if ((event.name() != null && terminalEventNames.contains(event.name())) || "[DONE]".equals(event.data())) { + terminalFound = true; + return true; + } + } + + continue; + } + + parseLine(line); + } + + return false; + } + + public List getEvents() { + return List.copyOf(events); + } + + public Optional getTerminalEvent() { + if (!terminalFound || events.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(events.getLast()); + } + + private SSEEvent buildEvent() { + if (eventName == null && data.isEmpty()) { + return null; + } + + return new SSEEvent(eventName, data.isEmpty() ? null : data.toString()); + } + + private void resetEvent() { + eventName = null; + data.setLength(0); + } + + private void parseLine(String line) { + if (line.startsWith(":")) { + return; + } + + int colon = line.indexOf(':'); + + String field = colon >= 0 ? line.substring(0, colon) : line; + String value = colon >= 0 ? line.substring(colon + 1) : ""; + + if (value.startsWith(" ")) { + value = value.substring(1); + } + + switch (field) { + case "event" -> eventName = value; + + case "data" -> { + if (!data.isEmpty()) { + data.append('\n'); + } + data.append(value); + } + + default -> { + // ignore id, retry, unknown fields + } + } + } + + private static int findLineEnd(StringBuilder buffer) { + for (int i = 0; i < buffer.length(); i++) { + char c = buffer.charAt(i); + if (c == '\n' || c == '\r') { + return i; + } + } + return -1; + } + + private static String readLine(StringBuilder buffer, int lineEnd) { + String line = buffer.substring(0, lineEnd); + + int removeUntil = lineEnd + 1; + + if (lineEnd + 1 < buffer.length() + && buffer.charAt(lineEnd) == '\r' + && buffer.charAt(lineEnd + 1) == '\n') { + removeUntil++; + } + + buffer.delete(0, removeUntil); + return line; + } + + public record SSEEvent(String name, String data) { + + private static final ObjectMapper om = new ObjectMapper(); + + public ObjectNode json() { + try { + return (ObjectNode) om.readTree(data); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + } + +} \ No newline at end of file diff --git a/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java b/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java index 6103666e1b..df865bbda5 100644 --- a/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java +++ b/core/src/main/java/com/predic8/membrane/core/util/jdbc/AbstractJdbcSupport.java @@ -14,12 +14,16 @@ package com.predic8.membrane.core.util.jdbc; -import com.predic8.membrane.annot.*; -import com.predic8.membrane.core.router.*; -import com.predic8.membrane.core.util.*; +import com.predic8.membrane.annot.MCAttribute; +import com.predic8.membrane.core.router.Router; +import com.predic8.membrane.core.util.ConfigurationException; -import javax.sql.*; -import java.util.*; +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Map; + +import static com.predic8.membrane.core.util.ExceptionUtil.getRootCause; public abstract class AbstractJdbcSupport { @@ -53,6 +57,19 @@ public void init(Router router) { getDatasourceIfNull(); } + // @TODO make subclasses use this method + public Connection getConnection() { + try { + return datasource.getConnection(); + } catch (SQLException e) { + var root = getRootCause(e); + if (root instanceof ClassNotFoundException) { + throw new ConfigurationException("JDBC driver not found. Please add the JDBC driver to the classpath: " + root.getMessage()); + } + throw new RuntimeException(e); + } + } + private void getDatasourceIfNull() { if (datasource != null) return; diff --git a/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java b/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java index 5fb73f8092..644c5ca414 100644 --- a/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java +++ b/core/src/main/java/com/predic8/membrane/core/util/json/JsonUtil.java @@ -14,13 +14,30 @@ package com.predic8.membrane.core.util.json; -import com.fasterxml.jackson.databind.*; -import com.fasterxml.jackson.databind.node.*; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.predic8.membrane.core.http.Message; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.math.*; +import java.io.InputStream; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.Optional; + +import static com.predic8.membrane.core.http.MimeType.APPLICATION_JSON; +import static java.util.Optional.empty; public class JsonUtil { + private static final Logger log = LoggerFactory.getLogger(JsonUtil.class); + + + private static final ObjectMapper om = new ObjectMapper(); + private static final JsonNodeFactory FACTORY = JsonNodeFactory.instance; /** @@ -75,4 +92,60 @@ public static JsonNode scalarAsJson(String value) { return FACTORY.textNode(value); } + + /** + * Get JSON object from message body. + * The caller must deal with the possibility that the body is not a JSON object or + * there are parsing errors. + * @param jsonString String with a JSON body + * @return JSON object or empty if the body is not a JSON object or there are parsing errors + */ + public static Optional getJsonObject(String jsonString) { + try { + var node = om.readTree(jsonString); + if (node instanceof ObjectNode on) { + return Optional.of(on); + } + log.debug("Expected JSON Object but got: {}",node.getNodeType()); + } catch (Exception e) { + log.debug("Error reading JSON: {}", e.getMessage()); + } + return empty(); + } + + /** + * Get JSON object from message body. + * The caller must deal with the possibility that the body is not a JSON object or + * there are parsing errors. + * @param msg With a JSON body + * @return JSON object or empty if the body is not a JSON object or there are parsing errors + */ + public static Optional getJsonObject(Message msg) { + return getJsonObjectFromSteam(msg.getBodyAsStreamDecoded()); + } + + private static Optional getJsonObjectFromSteam(InputStream obj) { + try { + var node = om.readTree(obj); + if (node instanceof ObjectNode on) { + return Optional.of(on); + } + log.debug("Expected JSON Object but got: {}",node.getNodeType()); + } catch (Exception e) { + log.debug("Error reading JSON: {}", e.getMessage()); + } + return empty(); + } + + + public static void setJsonBody(Message msg, ObjectNode json) { + try { + if (!msg.isJSON()) { + msg.getHeader().setContentType(APPLICATION_JSON); + } + msg.setBodyContent(om.writeValueAsBytes(json)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } } diff --git a/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java b/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java index 33fa0184f4..b52986e458 100644 --- a/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java +++ b/core/src/test/java/com/predic8/membrane/core/interceptor/mcp/MembraneMCPServerTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.interceptor.mcp; import com.fasterxml.jackson.databind.JsonNode; diff --git a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java index 935ae66c12..f5efd94d6e 100644 --- a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java +++ b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCRequestTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import org.junit.jupiter.api.Test; diff --git a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java index 97eed97b58..7c0a85e98c 100644 --- a/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java +++ b/core/src/test/java/com/predic8/membrane/core/jsonrpc/JSONRPCResponseTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.jsonrpc; import org.junit.jupiter.api.Test; diff --git a/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java b/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java index 02d6dadce4..0abbb75f66 100644 --- a/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java +++ b/core/src/test/java/com/predic8/membrane/core/mcp/MCPInitializeTest.java @@ -1,3 +1,17 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + package com.predic8.membrane.core.mcp; import com.predic8.membrane.core.jsonrpc.JSONRPCRequest; diff --git a/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java new file mode 100644 index 0000000000..7738321f85 --- /dev/null +++ b/core/src/test/java/com/predic8/membrane/core/util/http/SSEParserTest.java @@ -0,0 +1,165 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.core.util.http; + +import com.predic8.membrane.core.http.Chunk; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; + +class SSEParserTest { + + @Test + void parsesSingleEvent() { + var parser = new SSEParser(Set.of("done")); + + assertFalse(parser.parse(chunk(""" + event: message + data: hello + + """))); + + var events = parser.getEvents(); + + assertEquals(1, events.size()); + assertEquals("message", events.getFirst().name()); + assertEquals("hello", events.getFirst().data()); + assertTrue(parser.getTerminalEvent().isEmpty()); + } + + @Test + void parsesMultilineData() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + event: message + data: first + data: second + + """)); + + assertEquals("first\nsecond", parser.getEvents().getFirst().data()); + } + + @Test + void parsesEventSplitAcrossChunks() { + var parser = new SSEParser(Set.of("done")); + + assertFalse(parser.parse(chunk(""" + event: mes"""))); + + assertFalse(parser.parse(chunk(""" + sage + data: hel"""))); + + assertFalse(parser.parse(chunk(""" + lo + + """))); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void returnsTrueWhenTerminalEventIsFound() { + var parser = new SSEParser(Set.of("done")); + + assertTrue(parser.parse(chunk(""" + event: done + data: {"usage":{"total_tokens":42}} + + """))); + + var terminal = parser.getTerminalEvent(); + + assertTrue(terminal.isPresent()); + assertEquals("done", terminal.get().name()); + assertEquals("{\"usage\":{\"total_tokens\":42}}", terminal.get().data()); + } + + @Test + void ignoresChunksAfterTerminalEvent() { + var parser = new SSEParser(Set.of("done")); + + assertTrue(parser.parse(chunk(""" + event: done + data: final + + """))); + + assertTrue(parser.parse(chunk(""" + event: message + data: ignored + + """))); + + assertEquals(1, parser.getEvents().size()); + assertEquals("done", parser.getEvents().getFirst().name()); + } + + @Test + void ignoresCommentsAndUnknownFields() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + : comment + id: 123 + retry: 1000 + event: message + data: hello + + """)); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void supportsCrLfLineEndings() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk("event: message\r\ndata: hello\r\n\r\n")); + + var event = parser.getEvents().getFirst(); + + assertEquals("message", event.name()); + assertEquals("hello", event.data()); + } + + @Test + void returnsUnmodifiableEventsList() { + var parser = new SSEParser(Set.of("done")); + + parser.parse(chunk(""" + event: message + data: hello + + """)); + + assertThrows(UnsupportedOperationException.class, + () -> parser.getEvents().add(new SSEParser.SSEEvent("x", "y"))); + } + + private static Chunk chunk(String content) { + return new Chunk(content.getBytes()); + } +} \ No newline at end of file diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java new file mode 100644 index 0000000000..77c674ee1e --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/AbstractAiTutorialTest.java @@ -0,0 +1,169 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway; + +import com.predic8.membrane.core.exchange.Exchange; +import com.predic8.membrane.core.interceptor.AbstractInterceptor; +import com.predic8.membrane.core.interceptor.Outcome; +import com.predic8.membrane.core.interceptor.flow.ReturnInterceptor; +import com.predic8.membrane.core.interceptor.templating.StaticInterceptor; +import com.predic8.membrane.core.proxies.ServiceProxy; +import com.predic8.membrane.core.proxies.ServiceProxyKey; +import com.predic8.membrane.core.router.DefaultRouter; +import com.predic8.membrane.examples.util.DistributionExtractingTestcase; +import com.predic8.membrane.examples.util.Process2; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.function.Consumer; + +import static com.predic8.membrane.core.http.MimeType.APPLICATION_JSON; + +/** + * Base class for AI tutorial tests. Starts a local Membrane mock of the upstream LLM API + * so tests run without a real API key and without network access to the LLM provider. + * + *

The tutorial YAML's {@code target.url} is rewritten to point at the mock server + * before Membrane starts. Subclasses override {@link #getTutorialDir()} and + * {@link #getTutorialYaml()} to select the tutorial under test. + * + *

JUnit 5 lifecycle ordering guarantees that {@code DistributionExtractingTestcase.init()} + * (superclass {@code @BeforeEach}) runs first and sets {@code baseDir}, allowing + * {@link #startGateway()} to use {@code replaceInFile2()} safely. + */ +public abstract class AbstractAiTutorialTest extends DistributionExtractingTestcase { + + protected static final int MOCK_LLM_PORT = 3100; + + /** + * Value substituted for the {@code <>} placeholder in tutorial + * YAMLs before Membrane starts. Tests that verify upstream key-substitution assert against + * this constant instead of the raw placeholder text. + */ + protected static final String TEST_API_KEY = "test-upstream-key"; + + protected Process2 process; + protected volatile String lastRequestBody; + protected volatile String lastRequestApiKey; + + private DefaultRouter mockRouter; + + protected abstract String getTutorialDir(); + protected abstract String getTutorialYaml(); + + @Override + protected String getExampleDirName() { + return "../tutorials/%s".formatted(getTutorialDir()); + } + + @Override + protected String getParameters() { + return "-c %s".formatted(getTutorialYaml()); + } + + /** + * Runs after {@code DistributionExtractingTestcase.init()} sets {@code baseDir}. + * Starts the mock, patches the YAML, then starts Membrane. + */ + @BeforeEach + void startGateway() throws Exception { + startMockLlmApi(); + replaceInFile2(getTutorialYaml(), getUpstreamApiUrl(), mockApiUrl()); + replaceInFile2(getTutorialYaml(), "<>", TEST_API_KEY); + process = startServiceProxyScript(); + } + + @AfterEach + void stopGateway() { + if (process != null) + process.killScript(); + if (mockRouter != null) + mockRouter.stop(); + } + + /** + * The upstream API URL used in the tutorial YAML (to be replaced by the mock URL). + */ + protected String getUpstreamApiUrl() { + return "https://api.anthropic.com"; + } + + protected String mockApiUrl() { + return "http://localhost:" + MOCK_LLM_PORT; + } + + /** + * The HTTP header name from which the upstream API key is read when capturing + * requests in the mock. Defaults to {@code "x-api-key"} (Claude). Override to + * {@code "authorization"} for OpenAI or {@code "x-goog-api-key"} for Google. + */ + protected String getApiKeyHeader() { + return "x-api-key"; + } + + /** + * Content-Type the mock LLM server sends back. Defaults to {@code "application/json"} + * for regular responses. Override to {@code "text/event-stream"} in streaming test classes. + */ + protected String mockContentType() { + return APPLICATION_JSON; + } + + private void startMockLlmApi() throws Exception { + var si = new StaticInterceptor(); + si.setSrc(mockResponse()); + si.setContentType(mockContentType()); + + var sp = new ServiceProxy(new ServiceProxyKey(MOCK_LLM_PORT), null, 0); + sp.getFlow().add(new BodyCaptureInterceptor( + body -> lastRequestBody = body, + apiKey -> lastRequestApiKey = apiKey, + getApiKeyHeader())); + sp.getFlow().add(si); + sp.getFlow().add(new ReturnInterceptor()); + + mockRouter = new DefaultRouter(); + mockRouter.add(sp); + mockRouter.start(); + } + + private static class BodyCaptureInterceptor extends AbstractInterceptor { + + private final Consumer bodySink; + private final Consumer apiKeySink; + private final String apiKeyHeader; + + BodyCaptureInterceptor(Consumer bodySink, Consumer apiKeySink, String apiKeyHeader) { + this.bodySink = bodySink; + this.apiKeySink = apiKeySink; + this.apiKeyHeader = apiKeyHeader; + } + + @Override + public Outcome handleRequest(Exchange exc) { + bodySink.accept(exc.getRequest().getBodyAsStringDecoded()); + apiKeySink.accept(exc.getRequest().getHeader().getFirstValue(apiKeyHeader)); + return Outcome.CONTINUE; + } + } + + protected String mockResponse() { + return """ + {"id":"msg_mock","type":"message","role":"assistant",\ + "content":[{"type":"text","text":"I am a mock."}],\ + "model":"claude-sonnet-4-0","stop_reason":"end_turn",\ + "usage":{"input_tokens":10,"output_tokens":5}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..3cde3fa976 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/BasicClaudeLLMGatewayTutorialTest.java @@ -0,0 +1,114 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.claude; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for {@code distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures a Claude LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code max_tokens} in the forwarded request is capped to 200
  • + *
+ * + *

The upstream Anthropic API is replaced by a local mock server so no real API key is needed. + */ +public class BasicClaudeLLMGatewayTutorialTest extends AbstractAiTutorialTest { + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/claude"; + } + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")) + .body("content[0].type", equalTo("text")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the Claude error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("tokens")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code max_tokens} to 200 before forwarding to the upstream. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "test-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("max_tokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java new file mode 100644 index 0000000000..3514870774 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/claude/SharingApiKeysTutorialTest.java @@ -0,0 +1,223 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.claude; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 250 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code claude-sonnet-4-0}, {@code claude-opus-4-0}, {@code claude-haiku-3-5}. + */ +public class SharingApiKeysTutorialTest extends AbstractAiTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/claude"; + } + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", BOB) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", "invalid-key") + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(401) + .body("type", equalTo("error")) + .body("error.type", equalTo("authentication_error")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo(ALICE))); + assertThat(lastRequestApiKey, equalTo(TEST_API_KEY)); + } + + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("wrong-model.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("gpt-5")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(400) + .body("type", equalTo("error")) + .body("error.type", equalTo("invalid_request_error")) + .body("error.message", containsString("prompt is too long")) + .body("error.message", containsString("100 maximum")); + // @formatter:on + } + + /** + * Alice has a budget of 250 tokens. Each request with {@code max-output.json} projects + * 7 (input estimate) + 200 (capped max_tokens) = 207 tokens. The mock returns 15 tokens + * of actual usage per call, so the running total grows by 15 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 250 - 0   - 207 =  43  → forwarded; used becomes 15
+     *   2nd: 250 - 15  - 207 =  28  → forwarded; used becomes 30
+     *   3rd: 250 - 30  - 207 =  13  → forwarded; used becomes 45
+     *   4th: 250 - 45  - 207 =  -2  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("x-api-key", ALICE) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(429) + .body("type", equalTo("error")) + .body("error.type", equalTo("rate_limit_error")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("x-api-key", BOB) + .header("anthropic-version", "2023-06-01") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/messages") + .then() + .statusCode(200) + .body("type", equalTo("message")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java new file mode 100644 index 0000000000..4e39f7ae6c --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/AbstractGoogleTutorialTest.java @@ -0,0 +1,58 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; + +/** + * Base class for Google Gemini LLM-Gateway tutorial tests. + * + *

Overrides the upstream URL and the API-key header so the mock captures + * the {@code x-goog-api-key} header that Google uses. The mock response is + * formatted as a Gemini {@code generateContent} reply and reports 100 total + * tokens (50 prompt + 50 candidates) per call. + */ +public abstract class AbstractGoogleTutorialTest extends AbstractAiTutorialTest { + + /** URL prefix used in both Google tutorial YAML files. */ + @Override + protected String getUpstreamApiUrl() { + return "https://generativelanguage.googleapis.com"; + } + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/google"; + } + + /** Google authenticates via the {@code x-goog-api-key} header. */ + @Override + protected String getApiKeyHeader() { + return "x-goog-api-key"; + } + + /** + * Minimal Gemini {@code generateContent} reply with 50 prompt + 50 candidates = 100 total + * tokens. The higher per-request cost keeps the token-budget exhaustion test to three + * successful requests before alice's 500-token allowance runs out. + */ + @Override + protected String mockResponse() { + return """ + {"candidates":[{"content":{"parts":[{"text":"I am a mock."}],"role":"model"},\ + "finishReason":"STOP"}],\ + "usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":50,"totalTokenCount":100}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..16f52d470b --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/BasicGoogleLLMGatewayTutorialTest.java @@ -0,0 +1,109 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for + * {@code distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures a Google Gemini LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code generationConfig.maxOutputTokens} in the forwarded + * request is capped to 200
  • + *
+ * + *

The upstream Google Gemini API is replaced by a local mock server so no real API key is needed. + */ +public class BasicGoogleLLMGatewayTutorialTest extends AbstractGoogleTutorialTest { + + private static final String GEMINI_ENDPOINT = + LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent"; + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the Google error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("exceeds the maximum allowed")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code generationConfig.maxOutputTokens} to 200 before forwarding. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "test-key") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_ENDPOINT) + .then() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("generationConfig.maxOutputTokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java new file mode 100644 index 0000000000..79b1a71e3e --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/google/SharingApiKeysGoogleTutorialTest.java @@ -0,0 +1,219 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.google; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 500 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code gemini-2.5-pro}, {@code gemini-2.5-flash}, {@code gemini-2.5-flash-lite}, + * {@code gemini-2.0-flash}, {@code gemini-2.0-flash-lite}. + * + *

For Google Gemini the model is part of the URL path + * ({@code /v1beta/models/:generateContent}), not the request body. + */ +public class SharingApiKeysGoogleTutorialTest extends AbstractGoogleTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + private static final String GEMINI_FLASH_ENDPOINT = + LOCALHOST_2000 + "/v1beta/models/gemini-2.5-flash:generateContent"; + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", "invalid-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(401) + .body("error.status", equalTo("UNAUTHENTICATED")) + .body("error.message", containsString("Invalid API key")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + * For Google Gemini, the key is carried in the {@code x-goog-api-key} header. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .log().ifValidationFails() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo(ALICE))); + assertThat(lastRequestApiKey, equalTo(TEST_API_KEY)); + } + + /** + * For Google Gemini the model is extracted from the URL path. Sending a request to + * {@code /v1beta/models/gpt-5:generateContent} uses model {@code gpt-5}, which is not + * in the allowed list, so the gateway rejects it with 400. + */ + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1beta/models/gpt-5:generateContent") + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("gpt-5")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(400) + .body("error.status", equalTo("INVALID_ARGUMENT")) + .body("error.message", containsString("exceeds the maximum allowed")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects + * 9 (input estimate) + 200 (capped maxOutputTokens) = 209 tokens. The mock returns + * 100 tokens of actual usage per call, so the running total grows by 100 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 500 - 0   - 209 = 291  → forwarded; used becomes 100
+     *   2nd: 500 - 100 - 209 = 191  → forwarded; used becomes 200
+     *   3rd: 500 - 200 - 209 =  91  → forwarded; used becomes 300
+     *   4th: 500 - 300 - 209 =  -9  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("x-goog-api-key", ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(429) + .body("error.status", equalTo("RESOURCE_EXHAUSTED")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("x-goog-api-key", BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(GEMINI_FLASH_ENDPOINT) + .then() + .statusCode(200) + .body("candidates[0].content.parts[0].text", equalTo("I am a mock.")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java new file mode 100644 index 0000000000..54136f4c2f --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/AbstractOpenAiTutorialTest.java @@ -0,0 +1,61 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import com.predic8.membrane.tutorials.ai.llmgateway.AbstractAiTutorialTest; + +/** + * Base class for OpenAI LLM-Gateway tutorial tests. + * + *

Overrides the upstream URL and the API-key header so the mock captures + * the {@code Authorization} header that OpenAI uses instead of {@code x-api-key}. + * The mock response is formatted as an OpenAI Responses-API reply and reports + * 100 total tokens (50 input + 50 output) per call. + */ +public abstract class AbstractOpenAiTutorialTest extends AbstractAiTutorialTest { + + @Override + protected String getTutorialDir() { + return "ai/llm-gateway/openai"; + } + + @Override + protected String getUpstreamApiUrl() { + return "https://api.openai.com"; + } + + /** + * OpenAI authenticates via {@code Authorization: Bearer }. + * The full header value (including the "Bearer " prefix) is captured. + */ + @Override + protected String getApiKeyHeader() { + return "authorization"; + } + + /** + * Minimal OpenAI Responses-API reply with 50 input + 50 output = 100 total tokens. + * The higher per-request cost (vs. the default Claude mock) keeps the token-budget + * exhaustion test to three successful requests before alice's 500-token allowance runs out. + */ + @Override + protected String mockResponse() { + return """ + {"id":"resp_mock","object":"response","model":"gpt-5-nano",\ + "output":[{"type":"message","role":"assistant",\ + "content":[{"type":"output_text","text":"I am a mock."}]}],\ + "usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}"""; + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..6dd96ee098 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/BasicOpenAiLLMGatewayTutorialTest.java @@ -0,0 +1,105 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * Integration test for + * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}. + * + *

The tutorial configures an OpenAI LLM gateway with: + *

    + *
  • {@code maxInputTokens: 100} — requests whose estimated input exceeds 100 tokens are rejected
  • + *
  • {@code maxOutputTokens: 200} — {@code max_output_tokens} in the forwarded request is capped to 200
  • + *
+ * + *

The upstream OpenAI API is replaced by a local mock server so no real API key is needed. + */ +public class BasicOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest { + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** + * A request within the token limits is forwarded to the upstream and its response is returned. + */ + @Test + void simpleRequestIsForwarded() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + /** + * A request whose message content exceeds {@code maxInputTokens} (100) is rejected by the + * gateway before reaching the upstream. The response uses the OpenAI error format. + */ + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("context_length_exceeded")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * When the request asks for more output tokens than {@code maxOutputTokens} (200) allows, + * the gateway rewrites {@code max_output_tokens} to 200 before forwarding to the upstream. + * The mock captures the forwarded body so we can verify the value was actually capped. + */ + @Test + void outputTokensAreCappedBeforeForwarding() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer test-key") + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + + assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200)); + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java new file mode 100644 index 0000000000..88a6d380ad --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/SharingApiKeysOpenAiTutorialTest.java @@ -0,0 +1,208 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +/** + * Integration tests for + * {@code distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml}. + * + *

The tutorial demonstrates sharing a single upstream API key between multiple users, + * each identified by their own gateway key and subject to individual token budgets: + *

    + *
  • alice — key {@code abc123}, budget 500 tokens
  • + *
  • bob — key {@code qwertz}, budget 10 000 tokens
  • + *
+ * Additional gateway limits: {@code maxInputTokens=100}, {@code maxOutputTokens=200}, + * allowed models: {@code gpt-5.4}, {@code gpt-5-nano}, {@code gpt-5-mini}. + */ +public class SharingApiKeysOpenAiTutorialTest extends AbstractOpenAiTutorialTest { + + private static final String ALICE = "abc123"; + private static final String BOB = "qwertz"; + + @Override + protected String getTutorialYaml() { + return "20-Sharing-API-Keys.yaml"; + } + + @Test + void aliceCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + @Test + void bobCanSendRequest() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } + + @Test + void unknownApiKeyIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer invalid-key") + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(401) + .body("error.code", equalTo("invalid_authentication")); + // @formatter:on + } + + /** + * The gateway is configured with its own upstream {@code apiKey}. When a user request + * arrives carrying the user-facing key (e.g. alice's {@code abc123}), the gateway must + * replace it with the configured upstream key before forwarding to the LLM provider. + * For OpenAI, the key is carried in the {@code Authorization: Bearer } header. + */ + @Test + void userApiKeyIsReplacedWithGatewayApiKey() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + + assertThat(lastRequestApiKey, not(equalTo("Bearer " + ALICE))); + assertThat(lastRequestApiKey, equalTo("Bearer " + TEST_API_KEY)); + } + + @Test + void wrongModelIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("wrong-model.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("model_not_allowed")) + .body("error.message", containsString("gpt-4")) + .body("error.message", containsString("not allowed")); + // @formatter:on + } + + @Test + void inputTokenLimitExceededIsRejected() throws IOException { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-input.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(400) + .body("error.type", equalTo("invalid_request_error")) + .body("error.code", equalTo("context_length_exceeded")) + .body("error.message", containsString("maximum context length")) + .body("error.message", containsString("100")); + // @formatter:on + } + + /** + * Alice has a budget of 500 tokens. Each request with {@code max-output.json} projects + * 9 (input estimate) + 200 (capped max_output_tokens) = 209 tokens. The mock returns + * 100 tokens of actual usage per call, so the running total grows by 100 after each response. + * + *

Budget accounting per request: + *

+     *   1st: 500 - 0   - 209 = 291  → forwarded; used becomes 100
+     *   2nd: 500 - 100 - 209 = 191  → forwarded; used becomes 200
+     *   3rd: 500 - 200 - 209 =  91  → forwarded; used becomes 300
+     *   4th: 500 - 300 - 209 =  -9  → rejected with 429
+     * 
+ * + * Bob's separate budget of 10 000 tokens is unaffected, so he can still send requests + * after alice is blocked. + */ + @Test + void alicesTokenBudgetIsExhaustedWhileBobIsUnaffected() throws IOException { + for (int i = 0; i < 3; i++) { + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200); + // @formatter:on + } + + // Alice's budget is now exhausted + // @formatter:off + given() + .contentType("application/json") + .header("Authorization", "Bearer " + ALICE) + .body(readFileFromBaseDir("max-output.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(429) + .body("error.type", equalTo("rate_limit_error")) + .body("error.code", equalTo("token_limit_exceeded")); + + // Bob's budget is independent — he can still send requests + given() + .contentType("application/json") + .header("Authorization", "Bearer " + BOB) + .body(readFileFromBaseDir("simple.json")) + .when() + .post(LOCALHOST_2000 + "/v1/responses") + .then() + .statusCode(200) + .body("object", equalTo("response")); + // @formatter:on + } +} diff --git a/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java new file mode 100644 index 0000000000..679cfca6a7 --- /dev/null +++ b/distribution/src/test/java/com/predic8/membrane/tutorials/ai/llmgateway/openai/StreamingOpenAiLLMGatewayTutorialTest.java @@ -0,0 +1,135 @@ +/* Copyright 2026 predic8 GmbH, www.predic8.com + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +package com.predic8.membrane.tutorials.ai.llmgateway.openai; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; + +import static io.restassured.path.json.JsonPath.from; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests for the streaming (SSE) path of + * {@code distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml}. + * + *

The mock upstream returns {@code Content-Type: text/event-stream} with three + * SSE events so the gateway's SSE processing path is exercised end-to-end without + * a real OpenAI connection: + * + *

    + *
  • {@code response.created} — initial acknowledgement
  • + *
  • {@code response.output_text.delta} — incremental text chunk
  • + *
  • {@code response.completed} — terminal event carrying usage statistics
  • + *
+ * + *

Because RestAssured does not handle server-sent events well, these tests use the + * Java {@link java.net.http.HttpClient} directly — the same approach used in + * {@code ServerSentEventsTutorialTest}. + */ +public class StreamingOpenAiLLMGatewayTutorialTest extends AbstractOpenAiTutorialTest { + + private static final String RESPONSES_ENDPOINT = LOCALHOST_2000 + "/v1/responses"; + + @Override + protected String getTutorialYaml() { + return "10-Basic-LLM-Gateway.yaml"; + } + + /** Tell the mock server to respond as a finite SSE stream. */ + @Override + protected String mockContentType() { + return "text/event-stream"; + } + + /** + * A minimal but complete SSE body: one delta event followed by the terminal + * {@code response.completed} event that carries the usage node the gateway + * reads for token accounting. + */ + @Override + protected String mockResponse() { + return """ + event: response.created + data: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress","model":"gpt-5-nano"}} + + event: response.output_text.delta + data: {"type":"response.output_text.delta","item_id":"msg_mock","output_index":0,"content_index":0,"delta":"I am a mock."} + + event: response.completed + data: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed","model":"gpt-5-nano","output":[{"type":"message","id":"msg_mock","status":"completed","role":"assistant","content":[{"type":"output_text","text":"I am a mock."}]}],"usage":{"input_tokens":50,"output_tokens":50,"total_tokens":100}}} + + """; + } + + /** + * The gateway must forward a streaming request and pass the {@code text/event-stream} + * response through to the client intact. The response body must contain the SSE events + * emitted by the upstream, including the delta text. + */ + @Test + void streamingResponseIsForwarded() throws IOException, InterruptedException { + var response = sendStreamingRequest("stream.json"); + + assertEquals(200, response.statusCode()); + assertTrue(response.headers().firstValue("content-type").orElse("").contains("text/event-stream"), + "Expected Content-Type text/event-stream"); + assertTrue(response.body().contains("response.output_text.delta"), + "SSE body must contain the delta event name"); + assertTrue(response.body().contains("I am a mock."), + "SSE body must contain the delta text"); + assertTrue(response.body().contains("response.completed"), + "SSE body must contain the terminal event"); + } + + /** + * When the request carries {@code "max_output_tokens": 500} and the gateway is + * configured with {@code maxOutputTokens: 200}, the gateway must rewrite the field + * to 200 before forwarding — even for streaming requests. + * + *

The mock captures the forwarded request body so we can assert the capped value. + */ + @Test + void streamingOutputTokensAreCappedBeforeForwarding() throws IOException, InterruptedException { + var response = sendStreamingRequest("max-output-stream.json"); + + assertEquals(200, response.statusCode()); + assertThat(from(lastRequestBody).getInt("max_output_tokens"), equalTo(200)); + } + + // ------------------------------------------------------------------------- + + private HttpResponse sendStreamingRequest(String fixture) throws IOException, InterruptedException { + var request = HttpRequest.newBuilder() + .uri(URI.create(RESPONSES_ENDPOINT)) + .timeout(Duration.ofSeconds(10)) + .header("Content-Type", "application/json") + .header("Authorization", "Bearer test-key") + .POST(HttpRequest.BodyPublishers.ofString(readFileFromBaseDir(fixture))) + .build(); + + try (var client = HttpClient.newHttpClient()) { + return client.send(request, HttpResponse.BodyHandlers.ofString()); + } + } +} diff --git a/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..ddaaaedcf1 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,28 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (Antropic Claude) +# +# Replace <> with your Claude API key. +# +# 1. Hello World +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @simple.json http://localhost:2000/v1/messages +# Check the response and the Membrane logs. +# +# 2. Exceed the input token limit +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-input.json http://localhost:2000/v1/messages +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -v -H "Content-Type: application/json" -H "x-api-key: <>" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages +# Check the Membrane log for limiting max tokens to 200 + +api: + port: 2000 + flow: + - llmGateway: + claude: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://api.anthropic.com \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..3a6a54f2f4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/20-Sharing-API-Keys.yaml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (Claude) +# +# Replace <> with your Claude API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @simple.json http://localhost:2000/v1/messages +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @wrong-model.json http://localhost:2000/v1/messages +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @max-input.json http://localhost:2000/v1/messages +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-api-key: abc123" -H "anthropic-version: 2023-06-01" -d @max-output.json http://localhost:2000/v1/messages +# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json + +api: + port: 2000 + flow: + - llmGateway: + claude: {} + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - claude-sonnet-4-0 + - claude-opus-4-0 + - claude-haiku-3-5 + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 250 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + - request: + - log: {} + target: + url: https://api.anthropic.com diff --git a/distribution/tutorials/ai/llm-gateway/claude/max-input.json b/distribution/tutorials/ai/llm-gateway/claude/max-input.json new file mode 100644 index 0000000000..a51d79d50e --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/max-input.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/max-output.json b/distribution/tutorials/ai/llm-gateway/claude/max-output.json new file mode 100644 index 0000000000..b3746f34c6 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/max-output.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 500, + "messages": [ + { + "role": "user", + "content": "Explain in detail who you are?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd b/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/claude/membrane.sh b/distribution/tutorials/ai/llm-gateway/claude/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/simple.json b/distribution/tutorials/ai/llm-gateway/claude/simple.json new file mode 100644 index 0000000000..bd6b974408 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/simple.json @@ -0,0 +1,10 @@ +{ + "model": "claude-sonnet-4-0", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json b/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json new file mode 100644 index 0000000000..d149716e51 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/claude/wrong-model.json @@ -0,0 +1,10 @@ +{ + "model": "gpt-5", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..2cbf4c236d --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,28 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (Google Gemini) +# +# Replace <> with your Google API key. +# +# 1. Hello World +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @simple.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check the response and the Membrane logs. +# +# 2. Exceed the input token limit +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @max-input.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: <>" -d @max-output.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check the Membrane log for limiting max tokens to 200 + +api: + port: 2000 + flow: + - llmGateway: + google: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://generativelanguage.googleapis.com \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..4a9ef00ba4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/20-Sharing-API-Keys.yaml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (Google Gemini) +# +# Replace <> with your Gemini API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @simple.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @simple.json http://localhost:2000/v1beta/models/gpt-5:generateContent +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @max-input.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "x-goog-api-key: abc123" -d @max-output.json http://localhost:2000/v1beta/models/gemini-2.5-flash:generateContent +# Check Membrane log: totalTokens should not exceed 200 even though it was requested in max-output.json + +api: + port: 2000 + flow: + - llmGateway: + google: {} + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - gemini-2.5-pro + - gemini-2.5-flash + - gemini-2.5-flash-lite + - gemini-2.0-flash + - gemini-2.0-flash-lite + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 500 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + target: + url: https://generativelanguage.googleapis.com diff --git a/distribution/tutorials/ai/llm-gateway/google/max-input.json b/distribution/tutorials/ai/llm-gateway/google/max-input.json new file mode 100644 index 0000000000..017608297f --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/max-input.json @@ -0,0 +1,11 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" + } + ] + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/max-output.json b/distribution/tutorials/ai/llm-gateway/google/max-output.json new file mode 100644 index 0000000000..615c6db3a0 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/max-output.json @@ -0,0 +1,14 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Explain in detail who you are?" + } + ] + } + ], + "generationConfig": { + "maxOutputTokens": 500 + } +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/membrane.cmd b/distribution/tutorials/ai/llm-gateway/google/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/google/membrane.sh b/distribution/tutorials/ai/llm-gateway/google/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/google/simple.json b/distribution/tutorials/ai/llm-gateway/google/simple.json new file mode 100644 index 0000000000..3bf6c67b2e --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/google/simple.json @@ -0,0 +1,11 @@ +{ + "contents": [ + { + "parts": [ + { + "text": "Who are you?" + } + ] + } + ] +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml new file mode 100644 index 0000000000..0074494b40 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/10-Basic-LLM-Gateway.yaml @@ -0,0 +1,27 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Basic LLM Gateway (OpenAI) +# +# Replace <> with your OpenAI API key. +# +# 1. Hello World +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @simple.json http://localhost:2000/v1/responses +# +# 2. Exceed the input token limit +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-input.json http://localhost:2000/v1/responses +# Returns an error because the request exceeds maxInputTokens. +# +# 3. Exceed the output token limit +# curl -H "Content-Type: application/json" -H "Authorization: Bearer <>" -d @max-output.json http://localhost:2000/v1/responses +# Check the max_output_tokens field in the response and the Membrane log + +api: + port: 2000 + flow: + - llmGateway: + openai: {} + policies: + maxInputTokens: 100 + maxOutputTokens: 200 + target: + url: https://api.openai.com diff --git a/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml new file mode 100644 index 0000000000..8aa3e72f4d --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/20-Sharing-API-Keys.yaml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://www.membrane-api.io/v7.2.1.json +# +# Tutorial: Sharing LLM API Keys (OpenAI) +# +# Replace <> with your OpenAI API key. +# +# Requests: +# +# 1. Hello AI +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @simple.json http://localhost:2000/v1/responses +# Check: Successful response +# +# 2. Token Limit Exceeded +# Repeat the previous request until you receive: 429 Token Limit Exceeded +# User alice is blocked after the limit is exceeded. Bob should still be able to send requests. +# +# 3. Wrong Model +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @wrong-model.json http://localhost:2000/v1/responses +# Check: Error response +# +# 4. Max. Input Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @max-input.json http://localhost:2000/v1/responses +# Check: Error response +# +# 5. Requested Max. Output Tokens Exceeded +# curl -v -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d @max-output.json http://localhost:2000/v1/responses +# Check: Field max_output_tokens in the response + +api: + port: 2000 + flow: + - llmGateway: + apiKey: <> + policies: + # Limits per request + maxInputTokens: 100 + maxOutputTokens: 200 + models: + - gpt-5.4 + - gpt-5-nano + - gpt-5-mini + openai: {} + simpleStore: + # User-facing API keys for the LLM Gateway + users: + - name: alice + apiKey: abc123 + tokens: 500 # Token limit for alice + - name: bob + apiKey: qwertz + tokens: 10000 + # Time in seconds after which the token limit is reset + limitResetPeriod: 60 + target: + url: https://api.openai.com/ diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-input.json b/distribution/tutorials/ai/llm-gateway/openai/max-input.json new file mode 100644 index 0000000000..e4b0e90985 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-input.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you, where do you get your information from, how do you answer questions, why were you created, what kinds of problems can you solve, where do you go when you search for information, how do you decide what is important, what do you know about programming, science, history, languages, and technology, how do you explain difficult concepts to people, why do people use AI assistants, what happens when you do not know an answer, and why should someone trust the answers you provide?" +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json new file mode 100644 index 0000000000..0a747d70e4 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-output-stream.json @@ -0,0 +1,6 @@ +{ + "model": "gpt-5-nano", + "input": "Explain in detail who you are?", + "max_output_tokens": 500, + "stream": true +} diff --git a/distribution/tutorials/ai/llm-gateway/openai/max-output.json b/distribution/tutorials/ai/llm-gateway/openai/max-output.json new file mode 100644 index 0000000000..cc7e04017f --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/max-output.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-5-nano", + "input": "Explain in detail who you are?", + "max_output_tokens": 500 +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd b/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd new file mode 100644 index 0000000000..8d2d64e9cf --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/membrane.cmd @@ -0,0 +1,24 @@ +@echo off +setlocal EnableExtensions + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" + +set "dir=%SCRIPT_DIR%" + +:search_up +if exist "%dir%\LICENSE.txt" if exist "%dir%\scripts\run-membrane.cmd" goto found +for %%A in ("%dir%\..") do set "next=%%~fA" +if /I "%next%"=="%dir%" goto notfound +set "dir=%next%" +goto search_up + +:found +set "MEMBRANE_HOME=%dir%" +set "MEMBRANE_CALLER_DIR=%SCRIPT_DIR%" +call "%MEMBRANE_HOME%\scripts\run-membrane.cmd" %* +exit /b %ERRORLEVEL% + +:notfound +>&2 echo Could not locate Membrane root. Ensure directory structure is correct. +exit /b 1 diff --git a/distribution/tutorials/ai/llm-gateway/openai/membrane.sh b/distribution/tutorials/ai/llm-gateway/openai/membrane.sh new file mode 100755 index 0000000000..195dae51ec --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/membrane.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Default: ./proxies.xml (next to this script); fallback -> $MEMBRANE_HOME/conf/proxies.xml +# JAVA_OPTS: relative -D paths are auto-resolved against $MEMBRANE_HOME (absolute/URI unchanged). +# Examples: +# export JAVA_OPTS='-Dlog4j.configurationFile=examples/logging/access/log4j2_access.xml' +# export JAVA_OPTS='-Dlog4j.configurationFile=/abs/path/log4j2.xml' + +SCRIPT_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) + +dir="$SCRIPT_DIR" +while [ "$dir" != "/" ]; do + if [ -f "$dir/LICENSE.txt" ] && [ -f "$dir/scripts/run-membrane.sh" ]; then + export MEMBRANE_HOME="$dir" + export MEMBRANE_CALLER_DIR="$SCRIPT_DIR" + exec sh "$dir/scripts/run-membrane.sh" "$@" + fi + dir=$(dirname "$dir") +done + +echo "Could not locate Membrane root. Ensure directory structure is correct." >&2 +exit 1 \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/simple.json b/distribution/tutorials/ai/llm-gateway/openai/simple.json new file mode 100644 index 0000000000..ab3c4b7bde --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/simple.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you?" +} \ No newline at end of file diff --git a/distribution/tutorials/ai/llm-gateway/openai/stream.json b/distribution/tutorials/ai/llm-gateway/openai/stream.json new file mode 100644 index 0000000000..1c75ce00aa --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/stream.json @@ -0,0 +1,5 @@ +{ + "model": "gpt-5-nano", + "input": "Who are you?", + "stream": true +} diff --git a/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json b/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json new file mode 100644 index 0000000000..7a551564a2 --- /dev/null +++ b/distribution/tutorials/ai/llm-gateway/openai/wrong-model.json @@ -0,0 +1,4 @@ +{ + "model": "gpt-4", + "input": "Who are you?" +} \ No newline at end of file