diff --git a/.env.example b/.env.example index 6ec7ef8..14f0039 100644 --- a/.env.example +++ b/.env.example @@ -17,6 +17,10 @@ GEMINI_API_KEY=xxxxx # Get your key from: https://console.scaleway.com/ SCALEWAY_API_KEY=xxxxx +# OpenRouter API Key (OpenAI compatible) +# Get your key from: https://openrouter.ai/keys +OPENROUTER_API_KEY=sk-or-xxxxx + # Optional: Set to 'true' to enable verbose output during tests INTEGRATION_TEST_VERBOSE=false diff --git a/Readme.md b/Readme.md index 71f661b..6ebbe1f 100644 --- a/Readme.md +++ b/Readme.md @@ -1,5 +1,8 @@ # PHP LLM - Agentic AI Framework for PHP +[![Latest Version](https://img.shields.io/packagist/v/soukicz/llm.svg)](https://packagist.org/packages/soukicz/llm) +[![License](https://img.shields.io/packagist/l/soukicz/llm.svg)](https://packagist.org/packages/soukicz/llm) + Build powerful **AI agents** that can use tools, self-correct, and take autonomous actions. A unified PHP framework for Large Language Models with support for Anthropic Claude, OpenAI GPT, Google Gemini, and more. > **What is Agentic AI?** Agents that can call functions, validate outputs, iterate on responses, and make decisions autonomously - not just generate text. @@ -24,7 +27,8 @@ composer require soukicz/llm - 📝 **Built-in Tools** - TextEditorTool for file manipulation, embeddings API, and more - ✅ **Self-Correcting** - Validate and refine outputs with feedback loops - 📸 **Multimodal** - Process images and PDFs alongside text (with caching support) -- 🧠 **Reasoning Models** - Advanced thinking with o3 and o4-mini reasoning models +- 🧠 **Reasoning Models** - OpenAI reasoning models, Anthropic extended thinking, and Gemini thinking +- 📐 **Structured Output** - JSON Schema enforced responses across Anthropic, OpenAI, and Gemini - 📡 **Streaming** - Real-time response streaming with optional listener for live progress updates - ⚡ **Async & Caching** - Fast, cost-effective operations with prompt caching - 💾 **State Persistence** - Save and resume conversations with thread IDs @@ -44,13 +48,18 @@ All LLM clients in this library are **asynchronous by default** using Guzzle Pro - **Agent Client** (`LLMAgentClient`) - High-level orchestrator that handles multi-turn conversations, automatic tool calling, feedback loops, and retries. Use this for building agents that need to iterate or use tools. ### Model Versions -Anthropic and OpenAI models require explicit version constants: +Many Anthropic and OpenAI models pin an explicit version constant: +```php +run( client: $client, request: new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: new LLMConversation([ LLMMessage::createFromUserString('What is PHP?') ]), @@ -119,7 +128,7 @@ $client = new AnthropicClient( apiKey: 'sk-ant-xxxxx', cache: $cache, customHttpMiddleware: null, - betaFeatures: [] // e.g., ['text-editor-20250116'] for TextEditorTool + betaFeatures: [] // Optional Anthropic beta feature flags ); // OpenAI (organization parameter is required) @@ -254,19 +263,19 @@ Use advanced reasoning for complex problems: ```php use Soukicz\Llm\Config\ReasoningEffort; use Soukicz\Llm\Config\ReasoningBudget; -use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet; -use Soukicz\Llm\Client\OpenAI\Model\GPT5; +use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet; +use Soukicz\Llm\Client\OpenAI\Model\GPT54; -// Control reasoning with effort level (for supported models) +// Control reasoning with effort level (OpenAI, Anthropic, and Gemini) $request = new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new GPT54(GPT54::VERSION_2026_03_05), conversation: $conversation, - reasoningConfig: ReasoningEffort::HIGH // LOW, MEDIUM, or HIGH + reasoningConfig: ReasoningEffort::HIGH // NONE, MINIMAL, LOW, MEDIUM, HIGH, or EXTRA_HIGH ); -// Or use token-based budget control (for supported models) +// Or use token-based budget control (Anthropic only) $request = new LLMRequest( - model: new GPT5(GPT5::VERSION_2025_08_07), + model: new AnthropicClaude46Sonnet(), conversation: $conversation, reasoningConfig: new ReasoningBudget(10000) // Max reasoning tokens ); @@ -274,6 +283,37 @@ $request = new LLMRequest( **→ [Reasoning Models Documentation](docs/guides/reasoning.md)** +### 📐 Structured Output + +Force responses to match a JSON Schema and get them back as a PHP array - supported by Anthropic, OpenAI, and Gemini: + +```php +use Soukicz\Llm\Config\StructuredOutputConfig; + +$response = $agentClient->run($client, new LLMRequest( + model: new AnthropicClaude46Sonnet(), + conversation: new LLMConversation([ + LLMMessage::createFromUserString('Extract user data: John Doe, age 30, email john@example.com') + ]), + structuredOutputConfig: new StructuredOutputConfig([ + 'type' => 'object', + 'properties' => [ + 'name' => ['type' => 'string'], + 'age' => ['type' => 'integer'], + 'email' => ['type' => 'string'], + ], + 'required' => ['name', 'age', 'email'], + 'additionalProperties' => false, + ]), +)); + +$data = $response->getLastStructuredData(); // ['name' => 'John Doe', 'age' => 30, 'email' => 'john@example.com'] +``` + +> **Tip:** Strict schema validation is enabled by default - pass `strict: false` to relax it. + +**→ [Structured Output Documentation](docs/guides/structured-output.md)** + ## Advanced Features ### 📝 TextEditorTool - Built-in File Manipulation @@ -281,22 +321,21 @@ $request = new LLMRequest( Empower agents to read, write, and manage files with the built-in TextEditorTool: ```php -use Soukicz\Llm\Tool\TextEditorTool; -use Soukicz\Llm\Tool\TextEditorStorageFilesystem; +use Soukicz\Llm\Tool\TextEditor\TextEditorTool; +use Soukicz\Llm\Tool\TextEditor\TextEditorStorageFilesystem; // Create filesystem storage with sandboxing $storage = new TextEditorStorageFilesystem('/safe/workspace/path'); $textEditorTool = new TextEditorTool($storage); -// Enable for Anthropic Claude with beta features +// Works out of the box with Anthropic Claude - no beta flags needed on modern models $client = new AnthropicClient( apiKey: 'sk-ant-xxxxx', - cache: $cache, - betaFeatures: ['text-editor-20250116'] // Required for TextEditorTool + cache: $cache ); $response = $agentClient->run($client, new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: new LLMConversation([ LLMMessage::createFromUserString('Create a PHP file with a hello world function') ]), @@ -333,11 +372,17 @@ Built-in interfaces for logging and monitoring: ```php use Soukicz\Llm\Log\LLMLogger; +use Soukicz\Llm\LLMRequest; +use Soukicz\Llm\LLMResponse; // Implement custom logger class MyLogger implements LLMLogger { - public function log(LLMRequest $request, LLMResponse $response): void { - // Log requests, responses, costs, tokens, etc. + public function requestStarted(LLMRequest $request): void { + echo "Request started\n"; + } + + public function requestFinished(LLMResponse $response): void { + // Log responses, costs, tokens, etc. $cost = ($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0); echo "Cost: $" . $cost . "\n"; echo "Tokens: {$response->getInputTokens()} in, {$response->getOutputTokens()} out\n"; @@ -365,10 +410,10 @@ $request = new LLMRequest( // Custom stop sequences to halt generation stopSequences: ['END', '---'], - // Reasoning configuration (for o3/o4-mini models) + // Reasoning configuration (OpenAI reasoning models, Anthropic extended thinking, Gemini thinking) reasoningConfig: ReasoningEffort::HIGH, - // OR - reasoningConfig: new ReasoningBudget(10000), + // OR token-based budget (Anthropic only): + // reasoningConfig: new ReasoningBudget(10000), // Optional: Stream responses for real-time progress // streamListener: new CallableStreamListener(fn($e) => print($e->delta)), @@ -380,14 +425,14 @@ $cost = ($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ? echo "Cost: $" . $cost . "\n"; echo "Input tokens: " . $response->getInputTokens() . "\n"; echo "Output tokens: " . $response->getOutputTokens() . "\n"; -echo "Stop reason: " . $response->getStopReason()->value . "\n"; // END_TURN, TOOL_USE, MAX_TOKENS, STOP_SEQUENCE +echo "Stop reason: " . $response->getStopReason()->value . "\n"; // FINISHED, TOOL_USE, LENGTH, SAFETY ``` ## Supported Providers -- **Anthropic (Claude)** - Claude 3.5, 3.7, 4.0, 4.1, and 4.5 series models -- **OpenAI (GPT)** - GPT-4o, GPT-4.1, o3 and o4-mini (reasoning), and GPT-5 series models -- **Google Gemini** - Gemini 2.0 and 2.5 series models +- **Anthropic (Claude)** - Claude 3.5 through 4.6 series models +- **OpenAI (GPT)** - GPT-4o, GPT-4.1, o3 and o4-mini (reasoning), and GPT-5 through GPT-5.4 series models +- **Google Gemini** - Gemini 2.0 through 3.x series models - **OpenAI-Compatible** - OpenRouter, local servers (Ollama, llama-server), and more - **AWS Bedrock** - Via separate package ([`soukicz/llm-aws-bedrock`](https://github.com/soukicz/llm-aws-bedrock)) @@ -406,7 +451,8 @@ echo "Stop reason: " . $response->getStopReason()->value . "\n"; // END_TURN, TO - [Feedback Loops](docs/guides/feedback-loops.md) - Self-correcting agents and validation - [Multimodal Support](docs/guides/multimodal.md) - Images, PDFs, and caching - [Streaming](docs/guides/streaming.md) - Real-time response streaming with progress listeners -- [Reasoning Models](docs/guides/reasoning.md) - o3/o4-mini with effort and budget control +- [Reasoning Models](docs/guides/reasoning.md) - Reasoning and extended thinking with effort and budget control +- [Structured Output](docs/guides/structured-output.md) - JSON Schema enforced responses ### Advanced Features - [Caching](docs/guides/caching.md) - Prompt caching and cost reduction @@ -465,6 +511,8 @@ $response = $agentClient->run($client, new LLMRequest( ### Self-Correcting JSON Parser ```php // Agent that validates and corrects its own output +$iterations = 0; + $response = $agentClient->run( client: $client, request: new LLMRequest( @@ -473,7 +521,11 @@ $response = $agentClient->run( LLMMessage::createFromUserString('Extract user data as JSON: John Doe, age 30, email john@example.com') ]) ), - feedbackCallback: function ($response) { + feedbackCallback: function ($response) use (&$iterations) { + if (++$iterations >= 3) { + return null; // Limit retry attempts + } + $text = $response->getLastText(); json_decode($text); @@ -484,8 +536,7 @@ $response = $agentClient->run( } return null; // Valid JSON, stop iteration - }, - maxIterations: 3 // Limit retry attempts + } ); ``` @@ -498,7 +549,7 @@ $chartData = base64_encode(file_get_contents('/sales-chart.png')); $reportData = base64_encode(file_get_contents('/quarterly-report.pdf')); $response = $agentClient->run($client, new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: new LLMConversation([ LLMMessage::createFromUser(new LLMMessageContents([ new LLMMessageText('Analyze these documents and summarize the key insights'), @@ -582,7 +633,7 @@ This project is open-sourced software licensed under the BSD-3-Clause license. ## Links -- [Documentation](docs/) - Full documentation +- [Documentation](https://soukicz.github.io/php-llm/) - Full documentation - [GitHub](https://github.com/soukicz/llm) - Source code - [Packagist](https://packagist.org/packages/soukicz/llm) - Composer package diff --git a/docs/examples/best-practices.md b/docs/examples/best-practices.md index b6f0bd2..b0adb8a 100644 --- a/docs/examples/best-practices.md +++ b/docs/examples/best-practices.md @@ -135,6 +135,8 @@ For example, if an agent needs to fetch data from three different sources, runni ```php wait(); +$responses = Utils::all($promises)->wait(); // Process results foreach ($responses as $response) { diff --git a/docs/examples/index.md b/docs/examples/index.md index 8a43b7c..c52c24c 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -4,7 +4,7 @@ Practical, copy-paste ready examples to help you get started with PHP LLM and bu ## Getting Started -- **[Quick Start](quick-start.md)** - Get up and running in minutes with basic examples for simple synchronous requests, conversation management, and streaming responses. +- **[Quick Start](quick-start.md)** - Get up and running in minutes with basic examples for simple synchronous requests, async requests, and conversation management. ## Core Functionality @@ -24,7 +24,7 @@ Practical, copy-paste ready examples to help you get started with PHP LLM and bu These examples cover: - **Basic usage**: Simple requests, conversations -- **Advanced features**: Tools, multimodal, [streaming](../guides/streaming.md), caching, reasoning models +- **Advanced features**: Tools, multimodal, caching, reasoning models (see also the [streaming guide](../guides/streaming.md)) - **Production patterns**: Error handling, logging, retries, resilience - **Best practices**: Security, performance, cost optimization - **Real-world scenarios**: Practical code you can adapt to your needs diff --git a/docs/examples/logging-debugging.md b/docs/examples/logging-debugging.md index 82069d5..0c3151f 100644 --- a/docs/examples/logging-debugging.md +++ b/docs/examples/logging-debugging.md @@ -12,36 +12,32 @@ use Soukicz\Llm\MarkdownFormatter; $formatter = new MarkdownFormatter(); -// Format response +// Format a response (includes the request parameters, the full conversation and stats) $markdown = $formatter->responseToMarkdown($response); echo $markdown; -// Format request -$markdown = $formatter->requestToMarkdown($request); +// The same method also accepts a request (e.g. before a response is available) +$markdown = $formatter->responseToMarkdown($request); echo $markdown; ``` **Sample Output:** ```markdown -## Request -**Model:** claude-sonnet-4-5-20250929 -**Temperature:** 1.0 -**Messages:** 2 - -### User + - **Model:** claude-sonnet-4-6 + - **Temperature:** 0 + - **Max tokens:** 4096 +## User: What is the capital of France? ---- +## Assistant: +The capital of France is Paris. + +---------------------- -## Response -**Stop Reason:** end_turn -**Input Tokens:** 15 -**Output Tokens:** 8 -**Cost:** $0.000345 +##### Total stats -### Assistant -The capital of France is Paris. +Finished in 1.823s, prompt tokens: 15, completion tokens: 8, maximum completion tokens: 4096, total tokens: 23, price: $0.000 ``` ## Custom Logger @@ -64,7 +60,7 @@ readonly class LLMFileLogger implements LLMLogger { } public function requestStarted(LLMRequest $request): void { - $markdown = $this->formatter->requestToMarkdown($request); + $markdown = $this->formatter->responseToMarkdown($request); file_put_contents($this->logPath, $markdown . "\n\n", FILE_APPEND); } @@ -143,8 +139,8 @@ $agentClient = new LLMAgentClient($logger); **Sample Log Output:** ``` -[2025-01-15 10:23:45] llm.INFO: LLM Request Started {"model":"claude-sonnet-4-5-20250929","messages":1} -[2025-01-15 10:23:47] llm.INFO: LLM Request Finished {"model":"claude-sonnet-4-5-20250929","input_tokens":15,"output_tokens":8,"cost":0.000345,"response_time_ms":1823} +[2026-06-12 10:23:45] llm.INFO: LLM Request Started {"model":"claude-sonnet-4-6","messages":1} +[2026-06-12 10:23:47] llm.INFO: LLM Request Finished {"model":"claude-sonnet-4-6","input_tokens":15,"output_tokens":8,"cost":0.000345,"response_time_ms":1823} ``` ## HTTP Middleware Logging @@ -190,7 +186,7 @@ try { } catch (LLMClientException $e) { // Log error details error_log("LLM Error: " . $e->getMessage()); - error_log("Request: " . $formatter->requestToMarkdown($request)); + error_log("Request: " . $formatter->responseToMarkdown($request)); // Check if it's a rate limit if ($e->getCode() === 429) { @@ -210,13 +206,16 @@ class PerformanceLogger implements LLMLogger { private array $timings = []; public function requestStarted(LLMRequest $request): void { - $this->timings[spl_object_id($request)] = microtime(true); + // Key by the conversation thread ID: the LLMRequest available in + // requestFinished() is a different (cloned) object, so spl_object_id() + // would not match between the two callbacks + $this->timings[$request->getConversation()->getThreadId()] = microtime(true); } public function requestFinished(LLMResponse $response): void { - $requestId = spl_object_id($response->getRequest()); - $duration = isset($this->timings[$requestId]) - ? (microtime(true) - $this->timings[$requestId]) * 1000 + $threadId = $response->getRequest()->getConversation()->getThreadId(); + $duration = isset($this->timings[$threadId]) + ? (microtime(true) - $this->timings[$threadId]) * 1000 : $response->getTotalTimeMs(); $totalTokens = $response->getInputTokens() + $response->getOutputTokens(); @@ -230,7 +229,7 @@ class PerformanceLogger implements LLMLogger { $totalCost ); - unset($this->timings[$requestId]); + unset($this->timings[$threadId]); } } ``` @@ -238,8 +237,8 @@ class PerformanceLogger implements LLMLogger { **Sample Output:** ``` -Request claude-sonnet-4-5-20250929: 1823ms, 23 tokens, $0.000345 -Request gpt-5-2025-08-07: 956ms, 45 tokens, $0.000890 +Request claude-sonnet-4-6: 1823ms, 23 tokens, $0.000345 +Request gpt-5.4-2026-03-05: 956ms, 45 tokens, $0.000890 Request gemini-2.5-pro: 1245ms, 31 tokens, $0.000520 ``` @@ -253,15 +252,15 @@ class DebugLogger implements LLMLogger { public function requestStarted(LLMRequest $request): void { echo "=== REQUEST STARTED ===\n"; echo "Model: " . $request->getModel()->getCode() . "\n"; - echo "Temperature: " . ($request->getTemperature() ?? 'default') . "\n"; - echo "Max Tokens: " . ($request->getMaxTokens() ?? 'default') . "\n"; + echo "Temperature: " . $request->getTemperature() . "\n"; + echo "Max Tokens: " . $request->getMaxTokens() . "\n"; echo "Messages: " . count($request->getConversation()->getMessages()) . "\n"; echo "Tools: " . count($request->getTools()) . "\n\n"; } public function requestFinished(LLMResponse $response): void { echo "=== REQUEST FINISHED ===\n"; - echo "Stop Reason: " . $response->getStopReason() . "\n"; + echo "Stop Reason: " . $response->getStopReason()->value . "\n"; echo "Response Time: " . $response->getTotalTimeMs() . "ms\n"; echo "Input Tokens: " . $response->getInputTokens() . "\n"; echo "Output Tokens: " . $response->getOutputTokens() . "\n"; @@ -279,14 +278,14 @@ class DebugLogger implements LLMLogger { ``` === REQUEST STARTED === -Model: claude-sonnet-4-5-20250929 -Temperature: 1.0 -Max Tokens: 2048 +Model: claude-sonnet-4-6 +Temperature: 0 +Max Tokens: 4096 Messages: 1 Tools: 0 === REQUEST FINISHED === -Stop Reason: end_turn +Stop Reason: finished Response Time: 1823ms Input Tokens: 15 Output Tokens: 8 @@ -321,7 +320,7 @@ class JSONLogger implements LLMLogger { 'output_cost' => $outputCost, 'total_cost' => $inputCost + $outputCost, 'response_time_ms' => $response->getTotalTimeMs(), - 'stop_reason' => $response->getStopReason(), + 'stop_reason' => $response->getStopReason()->value, ]; file_put_contents( @@ -336,9 +335,9 @@ class JSONLogger implements LLMLogger { **Sample Log Output (llm.json):** ```json -{"timestamp":"2025-01-15T10:23:47+00:00","model":"claude-sonnet-4-5-20250929","input_tokens":15,"output_tokens":8,"total_tokens":23,"input_cost":0.000045,"output_cost":0.0003,"total_cost":0.000345,"response_time_ms":1823,"stop_reason":"end_turn"} -{"timestamp":"2025-01-15T10:24:12+00:00","model":"gpt-5-2025-08-07","input_tokens":22,"output_tokens":45,"total_tokens":67,"input_cost":0.00011,"output_cost":0.00078,"total_cost":0.00089,"response_time_ms":956,"stop_reason":"stop"} -{"timestamp":"2025-01-15T10:25:03+00:00","model":"gemini-2.5-pro","input_tokens":18,"output_tokens":31,"total_tokens":49,"input_cost":0.00009,"output_cost":0.00043,"total_cost":0.00052,"response_time_ms":1245,"stop_reason":"STOP"} +{"timestamp":"2026-06-12T10:23:47+00:00","model":"claude-sonnet-4-6","input_tokens":15,"output_tokens":8,"total_tokens":23,"input_cost":0.000045,"output_cost":0.0003,"total_cost":0.000345,"response_time_ms":1823,"stop_reason":"finished"} +{"timestamp":"2026-06-12T10:24:12+00:00","model":"gpt-5.4-2026-03-05","input_tokens":22,"output_tokens":45,"total_tokens":67,"input_cost":0.00011,"output_cost":0.00078,"total_cost":0.00089,"response_time_ms":956,"stop_reason":"finished"} +{"timestamp":"2026-06-12T10:25:03+00:00","model":"gemini-2.5-pro","input_tokens":18,"output_tokens":31,"total_tokens":49,"input_cost":0.00009,"output_cost":0.00043,"total_cost":0.00052,"response_time_ms":1245,"stop_reason":"finished"} ``` This format is ideal for log aggregation tools like ELK stack, Splunk, or DataDog. diff --git a/docs/examples/multimodal.md b/docs/examples/multimodal.md index 9493519..dad686a 100644 --- a/docs/examples/multimodal.md +++ b/docs/examples/multimodal.md @@ -73,12 +73,12 @@ function analyzeUIScreenshot(string $screenshotPath): array { conversation: new LLMConversation([ LLMMessage::createFromUser(new LLMMessageContents([ new LLMMessageText( - 'Analyze this UI screenshot and provide:\n' . - '1. Accessibility issues (contrast, font sizes, etc.)\n' . - '2. Layout problems (alignment, spacing, overlapping)\n' . - '3. Responsive design concerns\n' . - '4. UX improvement suggestions\n\n' . - 'Format as a structured list with severity levels.' + "Analyze this UI screenshot and provide:\n" . + "1. Accessibility issues (contrast, font sizes, etc.)\n" . + "2. Layout problems (alignment, spacing, overlapping)\n" . + "3. Responsive design concerns\n" . + "4. UX improvement suggestions\n\n" . + "Format as a structured list with severity levels." ), new LLMMessageImage('base64', 'image/png', $imageData) ])) @@ -194,13 +194,13 @@ function reviewContract(string $contractPdfPath): array { conversation: new LLMConversation([ LLMMessage::createFromUser(new LLMMessageContents([ new LLMMessageText( - 'Review this contract and provide:\n' . - '1. Key terms (parties, dates, amounts)\n' . - '2. Obligations and responsibilities\n' . - '3. Termination clauses\n' . - '4. Potential red flags or unusual clauses\n' . - '5. Missing standard clauses\n\n' . - 'Format as a structured report.' + "Review this contract and provide:\n" . + "1. Key terms (parties, dates, amounts)\n" . + "2. Obligations and responsibilities\n" . + "3. Termination clauses\n" . + "4. Potential red flags or unusual clauses\n" . + "5. Missing standard clauses\n\n" . + "Format as a structured report." ), new LLMMessagePdf('base64', $pdfData) ])) @@ -233,13 +233,13 @@ function summarizeResearchPaper(string $paperPdfPath): string { conversation: new LLMConversation([ LLMMessage::createFromUser(new LLMMessageContents([ new LLMMessageText( - 'Summarize this research paper. Include:\n' . - '- Research question/hypothesis\n' . - '- Methodology\n' . - '- Key findings\n' . - '- Conclusions\n' . - '- Limitations\n\n' . - 'Write for a technical but non-specialist audience (max 500 words).' + "Summarize this research paper. Include:\n" . + "- Research question/hypothesis\n" . + "- Methodology\n" . + "- Key findings\n" . + "- Conclusions\n" . + "- Limitations\n\n" . + "Write for a technical but non-specialist audience (max 500 words)." ), new LLMMessagePdf('base64', $pdfData) ])) @@ -476,8 +476,9 @@ try { run( client: $anthropic, request: new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: new LLMConversation([ LLMMessage::createFromUserString('What is PHP?') ]), @@ -60,7 +60,7 @@ use Soukicz\Llm\LLMResponse; $promise = $agentClient->runAsync( client: $anthropic, request: new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: new LLMConversation([ LLMMessage::createFromUserString('Explain async programming') ]), @@ -90,18 +90,17 @@ $conversation = new LLMConversation([ $response = $agentClient->run( client: $anthropic, request: new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: $conversation, ) ); echo "AI: " . $response->getLastText() . "\n"; // "4" -// Add AI response to conversation (returns new instance) -$conversation = $conversation->withMessage($response->getLastMessage()); - +// The response's conversation already includes the assistant reply +// (and any tool use/result messages), so continue from there. // Add user's follow-up question (returns new instance) -$conversation = $conversation->withMessage( +$conversation = $response->getConversation()->withMessage( LLMMessage::createFromUserString('What about 2 * 2?') ); @@ -109,7 +108,7 @@ $conversation = $conversation->withMessage( $response = $agentClient->run( client: $anthropic, request: new LLMRequest( - model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + model: new AnthropicClaude46Sonnet(), conversation: $conversation, ) ); @@ -126,14 +125,14 @@ PHP LLM provides a unified interface across multiple LLM providers. Simply swap ```php run( client: $openai, request: new LLMRequest( - model: new GPT5(GPT5::VERSION_2025_08_07), + model: new GPT54(GPT54::VERSION_2026_03_05), conversation: $conversation, ) ); @@ -172,7 +171,7 @@ $client = new OpenAICompatibleClient( $response = $agentClient->run( client: $client, request: new LLMRequest( - model: new LocalModel('anthropic/claude-3.5-sonnet'), + model: new LocalModel('anthropic/claude-haiku-4.5'), conversation: $conversation, ) ); diff --git a/docs/examples/state-management.md b/docs/examples/state-management.md index 7aa296e..5b134fa 100644 --- a/docs/examples/state-management.md +++ b/docs/examples/state-management.md @@ -8,6 +8,8 @@ Save and resume AI agent conversations using JSON serialization. Conversation st **Immutability**: Remember that `LLMConversation` is immutable - use `withMessage()` to add messages, which returns a new instance. +**Thread ID**: Every `LLMConversation` has a thread ID - an auto-generated UUID, or a value you pass as the second constructor argument (`new LLMConversation([], 'my-thread-id')`). It is preserved by `withMessage()` and JSON serialization, and `getThreadId()` makes it a convenient key for persisting conversations. + ## Saving Conversations ```php @@ -165,7 +167,7 @@ $_SESSION['conversation'] = json_encode($conversation); // Load from session $conversation = isset($_SESSION['conversation']) ? LLMConversation::fromJson(json_decode($_SESSION['conversation'], true)) - : new LLMConversation(); + : new LLMConversation([]); ``` ## Conversation History Management @@ -188,10 +190,10 @@ function trimConversation(LLMConversation $conversation, int $maxMessages): LLMC return $conversation; } - // Keep most recent messages + // Keep most recent messages (preserve the thread ID) $trimmedMessages = array_slice($messages, -$maxMessages); - return new LLMConversation($trimmedMessages); + return new LLMConversation($trimmedMessages, $conversation->getThreadId()); } $conversation = trimConversation($conversation, 20); // Keep last 20 messages @@ -259,7 +261,7 @@ class ChatService { } // Create new conversation - $conversation = new LLMConversation(); + $conversation = new LLMConversation([]); $this->saveConversation($userId, $conversation); return $conversation; @@ -281,9 +283,9 @@ class ChatService { ) ); - // Add AI response (immutable - returns new instance) - $conversation = $conversation->withMessage($response->getLastMessage()); - $this->saveConversation($userId, $conversation); + // The response's conversation already includes the assistant reply + // (and any tool use/result messages) - persist that + $this->saveConversation($userId, $response->getConversation()); return $response->getLastText(); } diff --git a/docs/examples/tools-and-function-calling.md b/docs/examples/tools-and-function-calling.md index 1440404..89e6afe 100644 --- a/docs/examples/tools-and-function-calling.md +++ b/docs/examples/tools-and-function-calling.md @@ -16,10 +16,12 @@ The LLM decides when to use tools based on your prompts and the tool description ```php // weather logic + inputSchema: [ + 'type' => 'object', + 'properties' => [ + 'city' => [ + 'type' => 'string', + 'description' => 'City name (e.g., "London", "New York")', + ], + ], + 'required' => ['city'], + ], + handler: function (array $input): LLMMessageContents { + // ... call your weather API here ... + return LLMMessageContents::fromArrayData([ + 'city' => $input['city'], + 'temperature' => 18, + 'description' => 'partly cloudy', + ]); + } ), // Stock price tool @@ -214,7 +232,14 @@ $tools = [ ], 'required' => ['ticker'], ], - handler: fn($input) => // stock API logic + handler: function (array $input): LLMMessageContents { + // ... call your stock API here ... + return LLMMessageContents::fromArrayData([ + 'ticker' => $input['ticker'], + 'price' => 187.42, + 'currency' => 'USD', + ]); + } ), // Calculator @@ -237,60 +262,40 @@ $response = $agentClient->run($client, $request); ## Multi-Step Tool Usage -Handle conversations where the LLM uses tools multiple times: +You don't need to handle the tool-use loop yourself. `LLMAgentClient::run()` does it automatically: whenever the LLM stops to call a tool (`StopReason::TOOL_USE`), the agent client executes the matching tool handler, appends the tool result to the conversation, and sends a follow-up request - repeating until the LLM produces a final answer. Calls to unknown tools are answered with an error result so the LLM can recover. ```php run( client: $client, request: new LLMRequest( model: $model, - conversation: $conversation, + conversation: new LLMConversation([ + LLMMessage::createFromUserString('Calculate 50 * 30, then add 100 to the result') + ]), tools: [$calculator] ) ); -// Check if the LLM used a tool -if ($response->getLastMessage()->hasToolUse()) { - // Add the assistant's response (including tool use) to conversation - $conversation = $conversation->withMessage($response->getLastMessage()); - - // Execute the tool and add result - foreach ($response->getLastMessage()->getContents()->getToolUses() as $toolUse) { - $tool = $tools[$toolUse->getName()] ?? null; - - if ($tool) { - $result = $tool->handle($toolUse->getInput()); - $conversation = $conversation->withMessage( - LLMMessage::createFromUser( - new LLMMessageContents([ - new LLMMessageToolResult( - toolUseId: $toolUse->getId(), - content: $result, - isError: false - ) - ]) - ) - ); - } - } +// The final answer, after any number of intermediate tool calls +echo $response->getLastText(); +``` - // Continue the conversation - $finalResponse = $agentClient->run( - client: $client, - request: new LLMRequest( - model: $model, - conversation: $conversation, - tools: [$calculator] - ) - ); +If you need to inspect the intermediate steps, the conversation returned with the response contains every tool call and tool result: - echo $finalResponse->getLastText(); +```php +getConversation()->getMessages() as $message) { + foreach ($message->getContents() as $content) { + if ($content instanceof LLMMessageToolUse) { + echo "Tool call: " . $content->getName() . " " . json_encode($content->getInput()) . "\n"; + } elseif ($content instanceof LLMMessageToolResult) { + echo "Tool result for call " . $content->getId() . "\n"; + } + } } ``` @@ -424,11 +429,13 @@ Test tools independently before using with LLMs: // Unit test a tool $calculator = new CallbackToolDefinition(...); +// The handler returns LLMMessageContents, which supports array access; +// the first item is an LLMMessageArrayData holding the returned array $result = $calculator->handle(['expression' => '2 + 2']); -assert($result->toArray()['result'] === 4); +assert($result[0]->getData()['result'] === 4); $result = $calculator->handle(['expression' => 'invalid']); -assert(isset($result->toArray()['error'])); +assert(isset($result[0]->getData()['error'])); ``` ## See Also diff --git a/docs/guides/batch-processing.md b/docs/guides/batch-processing.md index 6d83204..b2fd8f7 100644 --- a/docs/guides/batch-processing.md +++ b/docs/guides/batch-processing.md @@ -11,7 +11,7 @@ Batch processing allows you to: - Save costs (often 50% cheaper than real-time) - Handle large-scale operations -**Note:** Batch processing support varies by provider. Check provider-specific documentation. +**Note:** Batch processing support varies by provider. Both `AnthropicClient` and `OpenAIClient` implement batches; Gemini does not. ## LLMBatchClient Interface @@ -28,6 +28,9 @@ interface LLMBatchClient { } ``` +- `createBatch()` takes an array of `LLMRequest` objects **keyed by your custom ID** and returns the provider's batch ID. +- `retrieveBatch()` returns `null` while the batch is still in progress. Once finished, it returns an array mapping each custom ID to the response text content. + ## Basic Usage ### Submit Batch @@ -43,10 +46,10 @@ use Soukicz\Llm\Message\LLMMessage; /** @var LLMBatchClient $client */ $client = new OpenAIClient('sk-xxxxx', 'org-xxxxx'); -// Prepare multiple requests +// Prepare multiple requests, keyed by a custom ID of your choice $requests = []; for ($i = 0; $i < 1000; $i++) { - $requests[] = new LLMRequest( + $requests["document-$i"] = new LLMRequest( model: new GPT5(GPT5::VERSION_2025_08_07), conversation: new LLMConversation([ LLMMessage::createFromUserString("Summarize document $i") @@ -63,13 +66,14 @@ echo "Batch created: $batchId\n"; ```php retrieveBatch($batchId); +// Returns null while the batch is in progress, +// or an array of [custom ID => response text] when finished +$results = $client->retrieveBatch($batchId); -if ($batch !== null) { - // Batch information available - // Check provider-specific documentation for exact response format - var_dump($batch); +if ($results !== null) { + foreach ($results as $customId => $text) { + echo "$customId: $text\n"; + } } ``` @@ -87,9 +91,9 @@ $client = new OpenAIClient('sk-xxxxx', 'org-xxxxx'); // Prepare batch of classification tasks $texts = [ - 'This product is amazing!', - 'Terrible service, would not recommend.', - 'It\'s okay, nothing special.', + 'review-1' => 'This product is amazing!', + 'review-2' => 'Terrible service, would not recommend.', + 'review-3' => 'It\'s okay, nothing special.', // ... 1000s more ]; @@ -101,7 +105,7 @@ $requests = array_map( ]) ), $texts -); +); // array_map preserves the string keys, which become custom IDs // Submit batch $batchId = $client->createBatch($requests); @@ -109,20 +113,17 @@ $batchId = $client->createBatch($requests); // Poll until complete do { sleep(60); // Wait 1 minute - $batch = $client->retrieveBatch($batchId); + $results = $client->retrieveBatch($batchId); +} while ($results === null); - if ($batch !== null) { - // Check provider-specific response format for status - echo "Batch retrieved\n"; - break; - } -} while (true); - -// Process batch results -// Note: Exact format depends on provider implementation -var_dump($batch); +// Process batch results: custom ID => response text +foreach ($results as $customId => $text) { + echo "$customId: $text\n"; +} ``` +The same code works with `AnthropicClient` — only the client and model classes change. + ## Async Polling Use async operations for efficient polling: @@ -232,27 +233,33 @@ $batchId = $client->createBatch($requests); ## Error Handling +There is no dedicated batch exception class. Batch creation fails with a Guzzle HTTP exception (e.g. `GuzzleHttp\Exception\ClientException`), and `retrieveBatch()` throws a `\RuntimeException` when the batch itself failed (e.g. OpenAI produced only an error file) or returned an unexpected status: + ```php createBatch($requests); -} catch (BatchCreationException $e) { - // Handle batch creation error +} catch (GuzzleException $e) { + // Handle batch creation error (invalid request, rate limit, ...) echo "Failed to create batch: " . $e->getMessage(); // Retry with smaller batch size - $smallerBatches = array_chunk($requests, 500); + $smallerBatches = array_chunk($requests, 500, preserve_keys: true); foreach ($smallerBatches as $batch) { - $batchId = $client->createBatch($batch); + $batchIds[] = $client->createBatch($batch); } } // Retrieve batch results -$batch = $client->retrieveBatch($batchId); -if ($batch !== null) { - // Process batch results according to provider-specific format - // Check provider documentation for exact structure - processResults($batch); +try { + $results = $client->retrieveBatch($batchId); + if ($results !== null) { + processResults($results); // custom ID => response text + } +} catch (\RuntimeException $e) { + echo "Batch failed: " . $e->getMessage(); } ``` @@ -273,10 +280,16 @@ echo "Savings: $" . ($realTimeCost - $batchCost); // $50 ## Provider Support -- ✅ **OpenAI** - Full batch API support -- ⚠️ **Anthropic** - Check current API documentation -- ⚠️ **Google Gemini** - Check current API documentation -- ❌ **OpenAI-compatible** - Varies by provider +- ✅ **OpenAI** - `OpenAIClient` implements `LLMBatchClient` (uploads a JSONL file via `/files` and creates a `/batches` job with a 24h completion window) +- ✅ **Anthropic** - `AnthropicClient` implements `LLMBatchClient` (uses the `/v1/messages/batches` API) +- ❌ **Google Gemini** - Not supported by `GeminiClient` +- ⚠️ **OpenAI-compatible** - `OpenAICompatibleClient` inherits the batch methods, but the provider must support the OpenAI files and batches endpoints + +## Implementation Notes + +- Custom IDs are the array keys you pass to `createBatch()` and they identify each result returned by `retrieveBatch()`. +- `retrieveBatch()` extracts only the **text content** of each response. Tool calls, structured output and other content types are not decoded. +- For OpenAI, when a completed batch produced only an error file, `retrieveBatch()` throws a `\RuntimeException` with the error details (or returns an empty array when the batch is older than 3 days). ## Limitations diff --git a/docs/guides/caching.md b/docs/guides/caching.md index 5efccaf..d4c956d 100644 --- a/docs/guides/caching.md +++ b/docs/guides/caching.md @@ -13,7 +13,7 @@ All PHP LLM clients support caching at the HTTP request level. When enabled: ## File Cache -The built-in `FileCache` stores responses on the filesystem: +The built-in `FileCache` stores responses on the filesystem. The directory must already exist — the constructor throws a `RuntimeException` otherwise: ```php redis->get($key); - return $value !== false ? $value : null; - } + public function fetch(RequestInterface $request): ?ResponseInterface { + $json = $this->redis->get($this->getCacheKey($request)); - public function set(string $key, string $value): void { - $this->redis->setex($key, $this->ttl, $value); + return $json !== false ? $this->responseFromJson($json) : null; } - public function has(string $key): bool { - return $this->redis->exists($key) > 0; + public function store(RequestInterface $request, ResponseInterface $response): void { + $this->redis->setex($this->getCacheKey($request), $this->ttl, $this->responseToJson($response)); } - public function delete(string $key): void { - $this->redis->del($key); + public function invalidate(RequestInterface $request): void { + $this->redis->del($this->getCacheKey($request)); } } ``` @@ -109,15 +124,17 @@ $client = new AnthropicClient('sk-xxxxx', $cache); ## Cache Keys -Cache keys are generated from: -- API endpoint -- Model name and version -- Request parameters (temperature, maxTokens, etc.) -- Conversation messages -- Tool definitions +Cache keys are a SHA-512 hash of the HTTP request: +- Request URL (API endpoint, including the model for Gemini) +- HTTP method +- Request body (model, temperature, maxTokens, conversation messages, tool definitions, ...) + +Any change to the request body produces a new cache key. **Important:** Always use exact model versions to prevent stale cached responses. +**Security caveat:** The cache key does **not** include request headers, so API keys are not part of the key. Identical requests share cache entries regardless of which credentials were used. The cache is intended for development, testing and request deduplication — do not rely on it for multi-tenant isolation. + ## Best Practices ### Use Exact Model Versions @@ -207,38 +224,45 @@ $client = new AnthropicClient('sk-xxxxx', null); ## Monitoring Cache Performance -Track cache hit rates: +Track cache hit rates by decorating another cache: ```php cache->get($key); - if ($value !== null) { + public function fetch(RequestInterface $request): ?ResponseInterface { + $response = $this->cache->fetch($request); + if ($response !== null) { $this->hits++; } else { $this->misses++; } - return $value; + + return $response; + } + + public function store(RequestInterface $request, ResponseInterface $response): void { + $this->cache->store($request, $response); } - public function set(string $key, string $value): void { - $this->cache->set($key, $value); + public function invalidate(RequestInterface $request): void { + $this->cache->invalidate($request); } public function getHitRate(): float { $total = $this->hits + $this->misses; return $total > 0 ? $this->hits / $total : 0; } - - // Implement other interface methods... } ``` @@ -255,31 +279,36 @@ echo "Cache hit rate: " . ($cache->getHitRate() * 100) . "%\n"; ### Manual Cleanup +`invalidate()` removes the entry for a specific PSR-7 HTTP request. Since you usually don't have the underlying HTTP request at hand, the simplest cleanup for `FileCache` is to delete the cache files: + ```php delete($cacheKey); - -// Clear all cache (FileCache example) -array_map('unlink', glob('/tmp/llm-cache/*')); +// Clear all cache (FileCache stores one .json file per entry) +array_map('unlink', glob('/tmp/llm-cache/*.json')); ``` ### Automatic Expiration -Implement TTL in custom cache: +Implement TTL in a custom cache by extending `AbstractCache`: ```php directory = $directory; - $this->ttl = $ttlSeconds; + private function getPath(RequestInterface $request): string { + return $this->directory . '/' . md5($this->getCacheKey($request)) . '.json'; } - public function get(string $key): ?string { - $file = $this->getFilePath($key); + public function fetch(RequestInterface $request): ?ResponseInterface { + $file = $this->getPath($request); if (!file_exists($file)) { return null; @@ -288,13 +317,20 @@ class TTLFileCache implements CacheInterface { // Check if expired if (time() - filemtime($file) > $this->ttl) { unlink($file); + return null; } - return file_get_contents($file); + return $this->responseFromJson(file_get_contents($file)); } - // Implement other methods... + public function store(RequestInterface $request, ResponseInterface $response): void { + file_put_contents($this->getPath($request), $this->responseToJson($response), LOCK_EX); + } + + public function invalidate(RequestInterface $request): void { + @unlink($this->getPath($request)); + } } ``` @@ -306,17 +342,18 @@ Example cost calculation: run($client, $request); -echo "Cost: $" . $response1->getTokenUsage()->getTotalCost() . "\n"; +echo "Cost: $" . ($response1->getInputPriceUsd() + $response1->getOutputPriceUsd()) . "\n"; -// Cached request - no cost ($0.00) +// Identical request - served from the cache, no API call is made $response2 = $agentClient->run($client, $request); -echo "Cost: $" . $response2->getTokenUsage()->getTotalCost() . "\n"; // 100% savings on repeated requests! ``` +Note that the reported price is calculated from the token counts in the response, so a cached response still reports the original cost — but no API call is made and nothing is billed. + ## See Also - [Configuration Guide](configuration.md) - Client configuration diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index dad7fa7..8b51d22 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -6,19 +6,20 @@ Configure your AI agent requests with various parameters to control behavior, ou ```php print($event->delta) ), @@ -71,8 +72,8 @@ $conversation = new LLMConversation([ Controls randomness in responses (0.0 to 1.0): -- **0.0** - Deterministic, focused responses -- **0.5** - Balanced (default for most models) +- **0.0** - Deterministic, focused responses (the library default) +- **0.5** - Balanced - **1.0** - Creative, varied responses ```php @@ -176,18 +177,44 @@ $request = new LLMRequest( Or implement `StreamListenerInterface` for a reusable class-based listener. See [Streaming Guide](streaming.md) for full documentation and practical examples. -**Note:** Streaming bypasses the response cache. When a listener is present, the request always goes to the API. +**Note:** Streaming works with the response cache. On a cache hit, the cached response is replayed through the stream listener, and a completed live stream is stored in the cache for future requests. + +### structuredOutputConfig + +Force the model to return JSON matching a schema: + +```php + 'object', + 'properties' => [ + 'name' => ['type' => 'string'], + ], + 'required' => ['name'], + ]), +); + +$data = $agentClient->run($client, $request)->getLastStructuredData(); +``` + +See [Structured Output Guide](structured-output.md) for full documentation. ## Reasoning Parameters -For reasoning models (o3, o4): +### reasoningConfig -### reasoningEffort +The `reasoningConfig` parameter accepts either a `ReasoningEffort` enum case or a `ReasoningBudget` instance. When left at `null` (the default), the provider's default behavior is used. -Control computational effort: +Control computational effort with `ReasoningEffort` (works with Anthropic, OpenAI, and Gemini): ```php push(Middleware::log($logger, $formatter)); +$loggingMiddleware = function (callable $handler) { + return function (RequestInterface $request, array $options) use ($handler) { + return $handler($request, $options)->then( + function (ResponseInterface $response) use ($request) { + error_log($request->getMethod() . ' ' . $request->getUri() . ' - ' . $response->getStatusCode()); + + return $response; + } + ); + }; +}; $client = new AnthropicClient( apiKey: 'sk-xxxxx', cache: $cache, - handler: $stack + customHttpMiddleware: $loggingMiddleware ); ``` +See [Logging & Debugging](../examples/logging-debugging.md) for a complete middleware example. + ## Provider-Specific Configuration ### Gemini Safety Settings @@ -303,10 +343,10 @@ use Soukicz\Llm\Client\Universal\LocalModel; $client = new OpenAICompatibleClient( apiKey: 'your-api-key', - baseUrl: 'https://api.openrouter.ai/v1' + baseUrl: 'https://openrouter.ai/api/v1' ); -$model = new LocalModel('anthropic/claude-3.5-sonnet'); +$model = new LocalModel('anthropic/claude-haiku-4.5'); ``` ## Configuration Best Practices @@ -318,7 +358,7 @@ $model = new LocalModel('anthropic/claude-3.5-sonnet'); 5. **Use higher temperature** for creative tasks 6. **Set stopSequences** for structured outputs 7. **Configure safety settings** appropriately for your use case -8. **Use reasoning budgets** in production +8. **Limit reasoning costs in production** - `ReasoningBudget` on Anthropic, lower `ReasoningEffort` elsewhere ## Example: Complete Configuration @@ -358,6 +398,7 @@ $response = $agentClient->run($client, $request); ## See Also - [Reasoning Models](reasoning.md) - Reasoning-specific configuration +- [Structured Output](structured-output.md) - JSON Schema constrained responses - [Streaming](streaming.md) - Real-time response streaming - [Tools Guide](tools.md) - Tool configuration - [Caching Guide](caching.md) - Cache configuration diff --git a/docs/guides/feedback-loops.md b/docs/guides/feedback-loops.md index db4d525..fc9732b 100644 --- a/docs/guides/feedback-loops.md +++ b/docs/guides/feedback-loops.md @@ -259,10 +259,12 @@ feedbackCallback: function (LLMResponse $response) use (&$attempt): ?LLMMessage ### With Tools -Validate tool outputs in feedback loops: +Feedback loops and tools combine naturally: `LLMAgentClient` first runs the tool loop to completion, and **only then** invokes the feedback callback on the final response. The callback therefore never sees a pending tool call — but you can inspect the conversation history to verify which tools were actually used: ```php run( client: $anthropic, request: new LLMRequest( @@ -271,11 +273,16 @@ $response = $agentClient->run( tools: [$calculatorTool], ), feedbackCallback: function (LLMResponse $response): ?LLMMessage { - // Ensure the agent used the calculator tool - if (!$response->hasToolCalls()) { - return LLMMessage::createFromUserString('Please use the calculator tool for this calculation'); + // Ensure the agent used the calculator tool at some point in the conversation + foreach ($response->getConversation()->getMessages() as $message) { + foreach ($message->getContents() as $content) { + if ($content instanceof LLMMessageToolUse && $content->getName() === 'calculator') { + return null; // Tool was used - accept the response + } + } } - return null; + + return LLMMessage::createFromUserString('Please use the calculator tool for this calculation'); } ); ``` @@ -286,15 +293,15 @@ Validate reasoning model outputs: ```php run( client: $openai, request: new LLMRequest( - model: new OpenAIGPTo3(), + model: new GPTo3(GPTo3::VERSION_2025_04_16), conversation: $conversation, - reasoningEffort: ReasoningEffort::HIGH + reasoningConfig: ReasoningEffort::HIGH ), feedbackCallback: function (LLMResponse $response): ?LLMMessage { // Verify mathematical accuracy @@ -356,6 +363,7 @@ return LLMMessage::createFromUserString('The JSON is missing the required "email ## See Also - [Tools Guide](tools.md) - Validate tool usage in feedback loops +- [Structured Output](structured-output.md) - Guarantee JSON shape without re-prompting - [Reasoning Models](reasoning.md) - Combine reasoning with validation - [Examples](../examples/index.md) - More feedback loop examples - [Configuration](configuration.md) - Configure request behavior diff --git a/docs/guides/multimodal.md b/docs/guides/multimodal.md index 6b493d9..eddc509 100644 --- a/docs/guides/multimodal.md +++ b/docs/guides/multimodal.md @@ -151,7 +151,7 @@ $message = LLMMessage::createFromUser(new LLMMessageContents([ **PDF Support:** - ✅ Anthropic (Claude) - All models - ✅ OpenAI (GPT) - GPT-4o and later models -- ❌ Google Gemini - Not currently supported +- ✅ Google Gemini - Supported (sent as inline `application/pdf` data) - ⚠️ OpenAI-compatible - Depends on the underlying model ## Provider-Specific Notes diff --git a/docs/guides/reasoning.md b/docs/guides/reasoning.md index 5718a00..bdec8b6 100644 --- a/docs/guides/reasoning.md +++ b/docs/guides/reasoning.md @@ -1,6 +1,12 @@ # Reasoning Models -Reasoning models like OpenAI's o3 and o4 series spend additional computation time thinking through problems before responding. This makes them particularly effective for complex tasks requiring deep analysis, mathematics, coding, and logical reasoning. +Reasoning models spend additional computation time thinking through problems before responding. This makes them particularly effective for complex tasks requiring deep analysis, mathematics, coding, and logical reasoning. + +All three major providers support reasoning through this library: + +- **Anthropic** - Claude extended thinking (adaptive thinking with effort levels, or an explicit token budget) +- **OpenAI** - Reasoning effort on o-series and GPT-5.x models +- **Google Gemini** - Thinking levels on Gemini 2.5+ models ## Overview @@ -14,11 +20,11 @@ This results in more accurate responses for challenging tasks, at the cost of hi ## Configuring Reasoning -PHP LLM provides two ways to configure reasoning models: +PHP LLM provides two ways to configure reasoning via the `reasoningConfig` parameter of `LLMRequest`. When `reasoningConfig` is left at `null` (the default), the provider's default behavior is used. ### Reasoning Effort -Control how much computational effort the model spends reasoning: +Control how much computational effort the model spends reasoning. `ReasoningEffort` works with all three providers: ```php run( 'A farmer has 17 sheep. All but 9 die. How many sheep are left alive?' ) ]), - reasoningEffort: ReasoningEffort::HIGH + reasoningConfig: ReasoningEffort::HIGH ) ); echo $response->getLastText(); // "9 sheep are left alive" ``` +The same request works with Claude extended thinking: + +```php +run( + client: $anthropic, + request: new LLMRequest( + model: new AnthropicClaude46Sonnet(), + conversation: $conversation, + reasoningConfig: ReasoningEffort::HIGH + ) +); +``` + ## When to Use Reasoning Models **Ideal Use Cases:** @@ -105,18 +150,52 @@ echo $response->getLastText(); // "9 sheep are left alive" ## Supported Models +### Anthropic (Extended Thinking) + +```php +run($client, $request); -$usage = $response->getTokenUsage(); -echo "Input tokens: " . $usage->getInputTokens() . "\n"; -echo "Reasoning tokens: " . $usage->getReasoningTokens() . "\n"; -echo "Output tokens: " . $usage->getOutputTokens() . "\n"; -echo "Total cost: $" . $usage->getTotalCost() . "\n"; +echo "Input tokens: " . $response->getInputTokens() . "\n"; +echo "Output tokens: " . $response->getOutputTokens() . "\n"; +echo "Input cost: $" . $response->getInputPriceUsd() . "\n"; +echo "Output cost: $" . $response->getOutputPriceUsd() . "\n"; +echo "Time: " . $response->getTotalTimeMs() . " ms\n"; ``` +Reasoning tokens are included in the output token count reported by the providers. + ## Combining with Other Features ### With Tools @@ -195,17 +279,20 @@ $response = $agentClient->run( ## Best Practices 1. **Start with MEDIUM effort** - Only increase if needed -2. **Set budgets for production** - Prevent runaway costs +2. **Cap thinking tokens on Anthropic** - Use `ReasoningBudget` to prevent runaway costs 3. **Use for appropriate tasks** - Don't use reasoning models for simple queries -4. **Monitor costs closely** - Track token usage and adjust budgets -5. **Test with o4-mini first** - More cost-effective for development +4. **Monitor costs closely** - Track token usage via `getOutputTokens()` and `getOutputPriceUsd()` +5. **Test with cheaper models first** - o4-mini or Gemini Flash are more cost-effective for development ## Provider Support -- ✅ **OpenAI** - o3, o4-mini (native reasoning support) -- ❌ **Anthropic** - Not available (Claude uses different architecture) -- ❌ **Google Gemini** - Not available -- ⚠️ **OpenAI-compatible** - Depends on provider +| Feature | Anthropic | OpenAI | Gemini | +|---|---|---|---| +| `ReasoningEffort` | ✅ (adaptive extended thinking + effort) | ✅ (`reasoning_effort`) | ✅ (`thinkingLevel` on 3.x, `thinkingBudget` on 2.x) | +| `ReasoningBudget` | ✅ (`thinking.budget_tokens`) | ❌ throws `InvalidArgumentException` | ❌ throws `InvalidArgumentException` | +| Thinking visible in response | ✅ (`LLMMessageReasoning`) | ❌ | ✅ (streaming `THINKING_DELTA`) | + +For OpenAI-compatible providers, support depends on the underlying model. ## See Also diff --git a/docs/guides/streaming.md b/docs/guides/streaming.md index e8d90f0..def88a0 100644 --- a/docs/guides/streaming.md +++ b/docs/guides/streaming.md @@ -211,10 +211,9 @@ $response = $agentClient->run($client, new LLMRequest( $textDeltas = array_filter($events, fn($e) => $e->type === StreamEventType::TEXT_DELTA); assert(count($textDeltas) > 0, 'Expected text deltas'); -// The accumulated text matches the final response +// The accumulated text matches the final response on all providers $streamedText = implode('', array_map(fn($e) => $e->delta, $textDeltas)); -// Note: For Anthropic/OpenAI, this equals $response->getLastText() -// For Gemini, text parts are separate (each chunk is a distinct text part) +assert($streamedText === $response->getLastText()); ``` ### Logging Tool Calls with Timing diff --git a/docs/guides/structured-output.md b/docs/guides/structured-output.md new file mode 100644 index 0000000..b9fed18 --- /dev/null +++ b/docs/guides/structured-output.md @@ -0,0 +1,200 @@ +# Structured Output + +Force the model to respond with JSON matching a schema you define. Instead of parsing free-form text and hoping for the best, structured output guarantees machine-readable responses — ideal for data extraction, classification, and any workflow where the LLM response feeds directly into your application logic. + +## Overview + +Structured output works the same way across all three providers: + +1. Define a JSON Schema describing the response shape +2. Pass it to `LLMRequest` via the `structuredOutputConfig` parameter +3. Read the decoded result with `$response->getLastStructuredData()` + +The library translates your schema to each provider's native structured-output mechanism, so the same request code works with Anthropic, OpenAI, and Gemini. + +## Basic Usage + +Create a `StructuredOutputConfig` with a raw JSON Schema array: + +```php + 'object', + 'properties' => [ + 'name' => ['type' => 'string'], + 'email' => ['type' => 'string'], + ], + 'required' => ['name', 'email'], +]; + +$request = new LLMRequest( + model: $model, + conversation: $conversation, + structuredOutputConfig: new StructuredOutputConfig($schema), +); +``` + +## Reading the Result + +When a request has a `structuredOutputConfig`, the response text is parsed as JSON and stored as structured data. Read it with `getLastStructuredData()`, which returns the decoded array: + +```php +run($client, $request); + +$data = $response->getLastStructuredData(); +echo $data['name']; // "Jane" +echo $data['email']; // "jane@example.com" +``` + +**Note:** With structured output enabled, the assistant message contains structured data instead of plain text — `getLastText()` will throw a `RuntimeException`. Use `getLastStructuredData()` instead. + +The raw JSON string is preserved internally, so structured responses round-trip correctly when you continue the conversation in follow-up requests. + +## Strict Mode + +`StructuredOutputConfig` takes an optional second parameter: + +```php + 'object', + 'properties' => [ + 'name' => ['type' => 'string', 'description' => 'Full name of the person'], + 'email' => ['type' => 'string', 'description' => 'Email address'], + 'phone' => ['type' => ['string', 'null'], 'description' => 'Phone number, null if not mentioned'], + 'topics' => [ + 'type' => 'array', + 'items' => ['type' => 'string'], + 'description' => 'Topics the person wants to discuss', + ], + ], + 'required' => ['name', 'email', 'phone', 'topics'], +]; + +$response = $agentClient->run( + client: $anthropic, + request: new LLMRequest( + model: new AnthropicClaude46Sonnet(), + conversation: new LLMConversation([ + LLMMessage::createFromUserString( + 'Extract the contact information from this email: ' . + '"Hi, this is Jane Novak (jane.novak@example.com). ' . + 'I would like to talk about pricing and the API integration next week."' + ) + ]), + structuredOutputConfig: new StructuredOutputConfig($schema), + ) +); + +$contact = $response->getLastStructuredData(); + +echo $contact['name'] . "\n"; // Jane Novak +echo $contact['email'] . "\n"; // jane.novak@example.com +var_dump($contact['phone']); // NULL +print_r($contact['topics']); // ['pricing', 'API integration'] +``` + +## Combining with Other Features + +### With Tools + +Structured output and tools can be combined in a single request with `LLMAgentClient`. The agent runs the tool loop as usual, and the final response is constrained to your schema: + +```php +run( + client: $anthropic, + request: new LLMRequest( + model: new AnthropicClaude46Sonnet(), + conversation: $conversation, + tools: [$currencyTool], + structuredOutputConfig: new StructuredOutputConfig($schema), + ) +); + +$data = $response->getLastStructuredData(); +``` + +### With Reasoning + +Structured output also works alongside reasoning configuration — for example Anthropic encodes both into the same `output_config`: + +```php + ['string', 'null']` instead of omitting fields +4. **Keep schemas flat where possible** - Deeply nested schemas are harder for models to fill correctly +5. **Use `getLastStructuredData()`** - Don't parse `getLastText()`; it throws for structured responses +6. **Prefer structured output over prompt-engineered JSON** - It removes the need for "respond only with JSON" instructions and feedback-loop re-parsing + +## See Also + +- [Configuration Guide](configuration.md) - All `LLMRequest` parameters +- [Tools Guide](tools.md) - Combine structured output with function calling +- [Feedback Loops](feedback-loops.md) - Validate response content beyond schema shape +- [Reasoning Models](reasoning.md) - Combine structured output with reasoning diff --git a/docs/guides/tools.md b/docs/guides/tools.md index 64beab8..29d08ab 100644 --- a/docs/guides/tools.md +++ b/docs/guides/tools.md @@ -126,44 +126,35 @@ handler: function (array $input): PromiseInterface { **Note:** Tool handlers cannot return plain arrays or scalar values. Always wrap your results in `LLMMessageContents::fromArrayData()`. -**Tip:** If you need to convert `LLMMessageContents` back to a plain array (e.g., for testing), use the `toArray()` method: +**Tip:** If you need to inspect the contents of an `LLMMessageContents` (e.g., for testing), iterate over it or call `getMessages()` — it returns the individual `LLMMessageContent` items: ```php handle(['input' => 'value']); -$array = $result->toArray(); // Converts to plain array +foreach ($result->getMessages() as $content) { + if ($content instanceof LLMMessageArrayData) { + $array = $content->getData(); // The plain array passed to fromArrayData() + } +} ``` ## Built-in Tools ### Text Editor Tool -For building file-manipulation agents with Anthropic models, you can use the `TextEditorTool`. This tool requires a custom storage implementation. +For building file-manipulation agents, use the `TextEditorTool`. The library ships with two ready-to-use storage backends, so no custom code is needed: -**Note:** This is an advanced feature that requires implementing the `TextEditorStorage` interface to handle file operations securely. +- `TextEditorStorageFilesystem` - Works on real files, sandboxed to a base directory (path traversal and symlink escapes are blocked) +- `TextEditorStorageMemory` - Keeps files in memory, ideal for tests or ephemeral workspaces ```php basePath . '/' . $path); - } - public function write(string $path, string $content): void { - // Implement secure file writing - file_put_contents($this->basePath . '/' . $path, $content); - } - - // Implement other required methods... -} - -$storage = new MyTextEditorStorage('/path/to/working/directory'); +// All file operations are restricted to this directory +$storage = new TextEditorStorageFilesystem('/path/to/working/directory'); $textEditorTool = new TextEditorTool($storage); $request = new LLMRequest( @@ -181,7 +172,9 @@ The text editor tool supports: - `str_replace` - Replace text in files - `insert` - Insert text at specific line numbers -**Security Considerations:** When implementing `TextEditorStorage`, ensure proper path validation, access controls, and sandboxing to prevent unauthorized file access. +When used with Anthropic models, the tool is automatically registered as Claude's native text editor tool; with other providers it works as a regular function-calling tool. + +**Custom storage:** For other backends (database, S3, ...), implement the `TextEditorStorage` interface. It defines file operations (`getFileContent`, `setFileContent`, `createFile`, `deleteFile`, `renameFile`, `isFile`) and directory operations (`getDirectoryContent`, `createDirectory`, `deleteDirectory`, `renameDirectory`, `isDirectory`) — see `Soukicz\Llm\Tool\TextEditor\TextEditorStorage` for the exact signatures. Ensure proper path validation and sandboxing to prevent unauthorized file access. ## Input Schema @@ -258,5 +251,6 @@ The `inputSchema` follows JSON Schema specification. Common patterns: ## See Also - [Feedback Loops](feedback-loops.md) - Validate tool outputs +- [Structured Output](structured-output.md) - Combine tools with schema-constrained responses - [Examples](../examples/index.md) - More tool examples - [Provider Documentation](../providers/README.md) - Provider-specific tool features diff --git a/docs/providers/README.md b/docs/providers/README.md index 17e907d..9755afd 100644 --- a/docs/providers/README.md +++ b/docs/providers/README.md @@ -6,10 +6,12 @@ This guide shows how to use each provider client in PHP LLM. All providers share | Client | Implements LLMClient | Implements LLMBatchClient | Constructor Parameters | |--------|---------------------|---------------------------|------------------------| -| `AnthropicClient` | ✅ | ✅ | `apiKey`, `cache`, `handler` | -| `OpenAIClient` | ✅ | ✅ | `apiKey`, `apiOrganization`, `cache`, `handler` | -| `GeminiClient` | ✅ | ❌ | `apiKey`, `cache`, `safetySettings`, `handler` | -| `OpenAICompatibleClient` | ✅ | Varies | `apiKey`, `baseUrl`, `cache`, `handler` | +| `AnthropicClient` | ✅ | ✅ | `apiKey`, `cache`, `customHttpMiddleware`, `betaFeatures` | +| `OpenAIClient` | ✅ | ✅ | `apiKey`, `apiOrganization`, `cache`, `customHttpMiddleware` | +| `GeminiClient` | ✅ | ❌ | `apiKey`, `cache`, `customHttpMiddleware`, `safetySettings` | +| `OpenAICompatibleClient` | ✅ | ✅ (if the endpoint supports it) | `apiKey`, `baseUrl`, `cache`, `customHttpMiddleware` | + +Structured output (`StructuredOutputConfig`) is supported by all three native providers (Anthropic, OpenAI, Gemini). ## Anthropic (Claude) @@ -23,32 +25,47 @@ use Soukicz\Llm\Client\Anthropic\AnthropicClient; $cache = new FileCache(sys_get_temp_dir()); $client = new AnthropicClient( apiKey: 'sk-ant-xxxxx', - cache: $cache, // Optional: CacheInterface - handler: $handlerStack // Optional: Guzzle HandlerStack for middleware + cache: $cache, // Optional: CacheInterface + customHttpMiddleware: $middleware, // Optional: a single Guzzle middleware callable + betaFeatures: ['context-1m-2025-08-07'] // Optional: Anthropic beta feature flags ); ``` ### Model Classes -All Anthropic models require a version constant: +Most Anthropic models require a version constant; the newest models take no constructor arguments: ```php push(Middleware::log($logger, $messageFormatter)); +$middleware = Middleware::log($logger, $messageFormatter); $client = new AnthropicClient( apiKey: 'key', cache: $cache, - handler: $stack + customHttpMiddleware: $middleware ); ``` @@ -392,7 +442,7 @@ $anthropic = new AnthropicClient( $openai = new OpenAIClient( apiKey: getenv('OPENAI_API_KEY'), - apiOrganization: getenv('OPENAI_ORG_ID'), + apiOrganization: getenv('OPENAI_ORG_ID') ?: null, cache: $cache ); ``` @@ -401,5 +451,5 @@ $openai = new OpenAIClient( - [Configuration Guide](../guides/configuration.md) - Request configuration options - [Multimodal Guide](../guides/multimodal.md) - Using images and PDFs -- [Reasoning Guide](../guides/reasoning.md) - OpenAI reasoning models +- [Reasoning Guide](../guides/reasoning.md) - Reasoning configuration for OpenAI, Anthropic, and Gemini - [Batch Processing Guide](../guides/batch-processing.md) - Anthropic and OpenAI batch APIs diff --git a/mkdocs.yml b/mkdocs.yml index 990acb4..3163f10 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,6 +38,7 @@ nav: - Getting Started: - Quick Start: examples/quick-start.md - Examples: + - Examples Overview: examples/index.md - Quick Start: examples/quick-start.md - Best Practices: examples/best-practices.md - Tools & Function Calling: examples/tools-and-function-calling.md @@ -50,7 +51,9 @@ nav: - Caching: guides/caching.md - Batch Processing: guides/batch-processing.md - Multimodal: guides/multimodal.md + - Streaming: guides/streaming.md - Reasoning: guides/reasoning.md + - Structured Output: guides/structured-output.md - Feedback Loops: guides/feedback-loops.md - Providers: - Overview: providers/README.md diff --git a/src/Cache/FileCache.php b/src/Cache/FileCache.php index fba3d04..04b25e8 100644 --- a/src/Cache/FileCache.php +++ b/src/Cache/FileCache.php @@ -22,12 +22,22 @@ public function fetch(RequestInterface $request): ?ResponseInterface { return null; } - return $this->responseFromJson(file_get_contents($path)); + $contents = @file_get_contents($path); + if ($contents === false) { + return null; + } + + try { + return $this->responseFromJson($contents); + } catch (\JsonException) { + // Treat a corrupted cache file as a cache miss + return null; + } } public function store(RequestInterface $request, ResponseInterface $response): void { $key = $this->getCacheKey($request); - file_put_contents($this->getPath($key), $this->responseToJson($response)); + file_put_contents($this->getPath($key), $this->responseToJson($response), LOCK_EX); } public function invalidate(RequestInterface $request): void { diff --git a/src/Client/Anthropic/AnthropicEncoder.php b/src/Client/Anthropic/AnthropicEncoder.php index d4ff4eb..1c7c0c9 100644 --- a/src/Client/Anthropic/AnthropicEncoder.php +++ b/src/Client/Anthropic/AnthropicEncoder.php @@ -196,6 +196,8 @@ public function encodeRequest(LLMRequest $request): array { 'model' => $request->getModel()->getCode(), ]; + $outputConfig = []; + $reasoningConfig = $request->getReasoningConfig(); if ($reasoningConfig) { if ($reasoningConfig instanceof ReasoningBudget) { @@ -208,14 +210,12 @@ public function encodeRequest(LLMRequest $request): array { $options['thinking'] = [ 'type' => 'adaptive', ]; - $outputConfig = $options['output_config'] ?? []; $outputConfig['effort'] = match ($reasoningConfig) { ReasoningEffort::MINIMAL, ReasoningEffort::LOW => 'low', ReasoningEffort::MEDIUM => 'medium', ReasoningEffort::HIGH => 'high', ReasoningEffort::EXTRA_HIGH => 'max', }; - $options['output_config'] = $outputConfig; } } else { throw new \InvalidArgumentException('Unsupported reasoning config type'); @@ -224,11 +224,13 @@ public function encodeRequest(LLMRequest $request): array { $structuredOutputConfig = $request->getStructuredOutputConfig(); if ($structuredOutputConfig !== null) { - $outputConfig = $options['output_config'] ?? []; $outputConfig['format'] = [ 'type' => 'json_schema', 'schema' => self::normalizeSchemaForStrictMode($structuredOutputConfig->getSchema()), ]; + } + + if (!empty($outputConfig)) { $options['output_config'] = $outputConfig; } @@ -298,7 +300,8 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse $outputPrice = $response['usage']['output_tokens'] * ($request->getModel()->getOutputPricePerMillionTokens() / 1000 / 1000); $inputPrice += $cacheInputTokens * ($request->getModel()->getCachedInputPricePerMillionTokens() / 1000 / 1000); - $outputPrice += $cacheReadInputTokens * ($request->getModel()->getCachedOutputPricePerMillionTokens() / 1000 / 1000); + // Cache reads are input tokens, so their cost belongs to the input bucket + $inputPrice += $cacheReadInputTokens * ($request->getModel()->getCachedOutputPricePerMillionTokens() / 1000 / 1000); $request = $request ->withCost( diff --git a/src/Client/Gemini/GeminiEncoder.php b/src/Client/Gemini/GeminiEncoder.php index 4993004..b660b06 100644 --- a/src/Client/Gemini/GeminiEncoder.php +++ b/src/Client/Gemini/GeminiEncoder.php @@ -3,6 +3,7 @@ namespace Soukicz\Llm\Client\Gemini; use Soukicz\Llm\Client\Gemini\Model\GeminiImageModel; +use Soukicz\Llm\Client\Gemini\Model\GeminiModel; use Soukicz\Llm\Client\ModelEncoder; use Soukicz\Llm\Client\ModelResponse; use Soukicz\Llm\Client\StopReason; @@ -10,6 +11,7 @@ use Soukicz\Llm\LLMRequest; use Soukicz\Llm\LLMResponse; use Soukicz\Llm\Message\LLMMessage; +use Soukicz\Llm\Message\LLMMessageArrayData; use Soukicz\Llm\Message\LLMMessageContents; use Soukicz\Llm\Message\LLMMessageImage; use Soukicz\Llm\Message\LLMMessagePdf; @@ -25,6 +27,7 @@ class GeminiEncoder implements ModelEncoder { public function encodeRequest(LLMRequest $request): array { $contents = []; $systemInstruction = null; + $toolNamesById = []; foreach ($request->getConversation()->getMessages() as $message) { if ($message->isSystem()) { @@ -53,7 +56,9 @@ public function encodeRequest(LLMRequest $request): array { ], ]; } elseif ($messageContent instanceof LLMMessageToolUse) { - // Function call in Gemini format + // Function call in Gemini format - remember the name so the matching + // function response can reference it (Gemini correlates by name, not ID) + $toolNamesById[$messageContent->getId()] = $messageContent->getName(); $contents[] = [ 'role' => 'model', 'parts' => [ @@ -69,14 +74,12 @@ public function encodeRequest(LLMRequest $request): array { } elseif ($messageContent instanceof LLMMessageToolResult) { // Function response in Gemini format $contents[] = [ - 'role' => 'function', + 'role' => 'user', 'parts' => [ [ 'function_response' => [ - 'name' => 'function_' . $messageContent->getId(), // Create a name from ID - 'response' => [ - 'content' => $messageContent->getContent(), - ], + 'name' => $toolNamesById[$messageContent->getId()] ?? $messageContent->getId(), + 'response' => self::encodeToolResultResponse($messageContent), ], ], ], @@ -84,8 +87,15 @@ public function encodeRequest(LLMRequest $request): array { continue 2; } elseif ($messageContent instanceof LLMMessageStructuredData) { $parts[] = ['text' => $messageContent->getRawJson()]; + } elseif ($messageContent instanceof LLMMessageArrayData) { + $parts[] = ['text' => json_encode($messageContent->getData(), JSON_THROW_ON_ERROR)]; } elseif ($messageContent instanceof LLMMessagePdf) { - throw new \InvalidArgumentException('PDF content type not supported for Gemini'); + $parts[] = [ + 'inline_data' => [ + 'mime_type' => 'application/pdf', + 'data' => $messageContent->getData(), + ], + ]; } else { throw new \InvalidArgumentException('Unsupported message content type for Gemini'); } @@ -143,6 +153,16 @@ public function encodeRequest(LLMRequest $request): array { $requestData['generationConfig']['thinkingConfig'] = [ 'thinkingBudget' => 0, ]; + } elseif ($model instanceof GeminiModel && !$model->supportsThinkingLevel()) { + // Gemini 2.x models reject thinkingLevel and only accept a token budget + $requestData['generationConfig']['thinkingConfig'] = [ + 'thinkingBudget' => match ($reasoningConfig) { + ReasoningEffort::MINIMAL => 512, + ReasoningEffort::LOW => 1024, + ReasoningEffort::MEDIUM => 8192, + ReasoningEffort::HIGH, ReasoningEffort::EXTRA_HIGH => 24576, + }, + ]; } else { $requestData['generationConfig']['thinkingConfig'] = [ 'thinkingLevel' => match ($reasoningConfig) { @@ -159,18 +179,18 @@ public function encodeRequest(LLMRequest $request): array { } if (!empty($request->getTools())) { - $requestData['tools'] = []; + // Gemini expects all function declarations in a single tools entry + $functionDeclarations = []; foreach ($request->getTools() as $tool) { - $requestData['tools'][] = [ - 'functionDeclarations' => [ - [ - 'name' => $tool->getName(), - 'description' => $tool->getDescription(), - 'parameters' => $tool->getInputSchema(), - ], - ], + $functionDeclarations[] = [ + 'name' => $tool->getName(), + 'description' => $tool->getDescription(), + 'parameters' => $tool->getInputSchema(), ]; } + $requestData['tools'] = [ + ['functionDeclarations' => $functionDeclarations], + ]; } return $requestData; @@ -228,11 +248,7 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse if (isset($response['usageMetadata'])) { $promptTokenCount = $response['usageMetadata']['promptTokenCount']; - if ($stopReason === StopReason::SAFETY && !isset($response['usageMetadata']['candidatesTokenCount'])) { - $outputTokenCount = 0; - } else { - $outputTokenCount = $response['usageMetadata']['candidatesTokenCount']; - } + $outputTokenCount = $response['usageMetadata']['candidatesTokenCount'] ?? 0; $inputPrice = $promptTokenCount * ($model->getInputPricePerMillionTokens() / 1_000_000); $outputPrice = $outputTokenCount * ($model->getOutputPricePerMillionTokens() / 1_000_000); @@ -257,6 +273,27 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse ); } + /** + * Convert tool result contents to a JSON-friendly response payload for Gemini. + */ + private static function encodeToolResultResponse(LLMMessageToolResult $toolResult): array { + $contents = $toolResult->getContent(); + if (count($contents) === 1 && $contents[0] instanceof LLMMessageArrayData) { + return $contents[0]->getData(); + } + + $texts = []; + foreach ($contents as $content) { + if ($content instanceof LLMMessageText) { + $texts[] = $content->getText(); + } elseif ($content instanceof LLMMessageArrayData) { + $texts[] = json_encode($content->getData(), JSON_THROW_ON_ERROR); + } + } + + return ['content' => implode("\n", $texts)]; + } + /** * Normalize a JSON Schema for Gemini by stripping unsupported properties. * Gemini does not support "additionalProperties" — it is silently removed. diff --git a/src/Client/Gemini/Model/Gemini31FlashImagePreview.php b/src/Client/Gemini/Model/Gemini31FlashImagePreview.php index dfcb5c0..d639e90 100644 --- a/src/Client/Gemini/Model/Gemini31FlashImagePreview.php +++ b/src/Client/Gemini/Model/Gemini31FlashImagePreview.php @@ -21,6 +21,10 @@ public function getImageSize(): ?string { return $this->imageSize; } + public function supportsThinkingLevel(): bool { + return true; + } + public function getCode(): string { return 'gemini-3.1-flash-image-preview'; } diff --git a/src/Client/Gemini/Model/Gemini3ProImagePreview.php b/src/Client/Gemini/Model/Gemini3ProImagePreview.php index 29cdda8..91d1544 100644 --- a/src/Client/Gemini/Model/Gemini3ProImagePreview.php +++ b/src/Client/Gemini/Model/Gemini3ProImagePreview.php @@ -20,6 +20,10 @@ public function getImageSize(): ?string { return $this->imageSize; } + public function supportsThinkingLevel(): bool { + return true; + } + public function getCode(): string { return 'gemini-3-pro-image-preview'; } diff --git a/src/Client/Gemini/Model/Gemini3ProPreview.php b/src/Client/Gemini/Model/Gemini3ProPreview.php index 61c28d4..867921b 100644 --- a/src/Client/Gemini/Model/Gemini3ProPreview.php +++ b/src/Client/Gemini/Model/Gemini3ProPreview.php @@ -6,6 +6,10 @@ * @see https://ai.google.dev/gemini-api/docs/pricing */ class Gemini3ProPreview extends GeminiModel { + public function supportsThinkingLevel(): bool { + return true; + } + public function getCode(): string { return 'gemini-3-pro-preview'; } diff --git a/src/Client/Gemini/Model/GeminiModel.php b/src/Client/Gemini/Model/GeminiModel.php index d2f311a..3f6df66 100644 --- a/src/Client/Gemini/Model/GeminiModel.php +++ b/src/Client/Gemini/Model/GeminiModel.php @@ -5,4 +5,11 @@ use Soukicz\Llm\Client\ModelInterface; abstract class GeminiModel implements ModelInterface { + /** + * Whether the model accepts thinkingConfig.thinkingLevel (Gemini 3.x and newer). + * Older models (2.x) only support thinkingConfig.thinkingBudget. + */ + public function supportsThinkingLevel(): bool { + return false; + } } diff --git a/src/Client/LLMAgentClient.php b/src/Client/LLMAgentClient.php index faf1111..cb3103a 100644 --- a/src/Client/LLMAgentClient.php +++ b/src/Client/LLMAgentClient.php @@ -62,34 +62,48 @@ private function processToolUseResponse(LLMResponse $response, LLMClient $client $toolResponseContents = []; foreach ($response->getConversation()->getLastMessage()->getContents() as $content) { - if ($content instanceof LLMMessageToolUse) { - foreach ($request->getTools() as $tool) { - if ($tool->getName() === $content->getName()) { - $input = $content->getInput(); - $noContent = empty($input) && empty($tool->getInputSchema()['required']); - - if (!$noContent) { - try { - Schema::import(json_decode(json_encode($tool->getInputSchema())))->in(json_decode(json_encode($input))); - } catch (Exception $e) { - $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult( - $content->getId(), - LLMMessageContents::fromErrorString('ERROR: Input is not matching expected schema: ' . $e->getMessage()) - )); - continue; - } - } - - $toolResponse = $tool->handle($input); - if ($toolResponse instanceof LLMMessageContents) { - $toolResponse = Create::promiseFor($toolResponse); - } - $toolResponseContents[] = $toolResponse->then(function (LLMMessageContents $response) use ($content) { - return new LLMMessageToolResult($content->getId(), $response); - }); - } + if (!$content instanceof LLMMessageToolUse) { + continue; + } + + $tool = null; + foreach ($request->getTools() as $candidateTool) { + if ($candidateTool->getName() === $content->getName()) { + $tool = $candidateTool; + break; } } + + if ($tool === null) { + $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult( + $content->getId(), + LLMMessageContents::fromErrorString('ERROR: Tool "' . $content->getName() . '" is not available') + )); + continue; + } + + $input = $content->getInput(); + $noContent = empty($input) && empty($tool->getInputSchema()['required']); + + if (!$noContent) { + try { + Schema::import(json_decode(json_encode($tool->getInputSchema())))->in(json_decode(json_encode($input))); + } catch (Exception $e) { + $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult( + $content->getId(), + LLMMessageContents::fromErrorString('ERROR: Input is not matching expected schema: ' . $e->getMessage()) + )); + continue; + } + } + + $toolResponse = $tool->handle($input); + if ($toolResponse instanceof LLMMessageContents) { + $toolResponse = Create::promiseFor($toolResponse); + } + $toolResponseContents[] = $toolResponse->then(function (LLMMessageContents $response) use ($content) { + return new LLMMessageToolResult($content->getId(), $response); + }); } $newRequest = $response->getRequest()->withMessage(LLMMessage::createFromUser(new LLMMessageContents(Utils::unwrap($toolResponseContents)))); diff --git a/src/Client/OpenAI/AbstractOpenAIClient.php b/src/Client/OpenAI/AbstractOpenAIClient.php index 531019e..e6acf55 100644 --- a/src/Client/OpenAI/AbstractOpenAIClient.php +++ b/src/Client/OpenAI/AbstractOpenAIClient.php @@ -225,11 +225,15 @@ public function retrieveBatch(string $batchId): ?array { $result = json_decode($row, true, 512, JSON_THROW_ON_ERROR); $content = ''; foreach ($result['response']['body']['choices'] as $contentPart) { - $content = $contentPart['message']['content']; - if (is_string($content)) { - $content .= $content; - } elseif ($content['type'] === 'text') { - $content .= $content['text']; + $messageContent = $contentPart['message']['content']; + if (is_string($messageContent)) { + $content .= $messageContent; + } elseif (is_array($messageContent)) { + foreach ($messageContent as $part) { + if (($part['type'] ?? null) === 'text') { + $content .= $part['text']; + } + } } } $responses[$result['custom_id']] = $content; diff --git a/src/Client/OpenAI/OpenAIEncoder.php b/src/Client/OpenAI/OpenAIEncoder.php index b4512e3..c030b9e 100644 --- a/src/Client/OpenAI/OpenAIEncoder.php +++ b/src/Client/OpenAI/OpenAIEncoder.php @@ -148,7 +148,7 @@ public function encodeRequest(LLMRequest $request): array { $reasoningConfig = $request->getReasoningConfig(); if ($reasoningConfig) { if ($reasoningConfig instanceof ReasoningEffort) { - $reasoningValue = match ($reasoningConfig) { + $requestData['reasoning_effort'] = match ($reasoningConfig) { ReasoningEffort::NONE => 'none', ReasoningEffort::LOW => 'low', ReasoningEffort::MINIMAL => 'minimal', @@ -156,9 +156,6 @@ public function encodeRequest(LLMRequest $request): array { ReasoningEffort::HIGH => 'high', ReasoningEffort::EXTRA_HIGH => 'xhigh', }; - if ($reasoningValue !== null) { - $requestData['reasoning_effort'] = $reasoningValue; - } } else { throw new InvalidArgumentException('Unsupported reasoning config type'); } @@ -204,8 +201,10 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse if (isset($response['usage'])) { $promptTokens = $response['usage']['prompt_tokens']; $completionTokens = $response['usage']['completion_tokens']; + $cachedTokens = $response['usage']['prompt_tokens_details']['cached_tokens'] ?? 0; - $inputPrice = $promptTokens * ($model->getInputPricePerMillionTokens() / 1_000_000); + $inputPrice = ($promptTokens - $cachedTokens) * ($model->getInputPricePerMillionTokens() / 1_000_000) + + $cachedTokens * ($model->getCachedInputPricePerMillionTokens() / 1_000_000); $outputPrice = $completionTokens * ($model->getOutputPricePerMillionTokens() / 1_000_000); $request = $request->withCost($promptTokens, $completionTokens, $inputPrice, $outputPrice); diff --git a/src/Http/HttpClientFactory.php b/src/Http/HttpClientFactory.php index c3f16f3..3b3737b 100644 --- a/src/Http/HttpClientFactory.php +++ b/src/Http/HttpClientFactory.php @@ -3,6 +3,7 @@ namespace Soukicz\Llm\Http; use GuzzleHttp\Client; +use GuzzleHttp\Exception\ConnectException; use GuzzleHttp\HandlerStack; use GuzzleHttp\Middleware; use GuzzleHttp\Promise\Create; @@ -66,15 +67,21 @@ function (ResponseInterface $response) use ($request, $cache, $requestStart) { } private static function addRetryMiddleware(HandlerStack $handler): void { - $decider = static function (int $retries, RequestInterface $request, ?ResponseInterface $response = null): bool { + $decider = static function (int $retries, RequestInterface $request, ?ResponseInterface $response = null, ?\Throwable $exception = null): bool { + if ($retries >= self::MAX_RETRIES) { + return false; + } + if ($exception instanceof ConnectException) { + return true; + } + return - $retries < self::MAX_RETRIES - && null !== $response + null !== $response && in_array($response->getStatusCode(), [429, 529, 500, 502, 503, 504], true); }; - $delay = static function (int $retries, ResponseInterface $response): int { - if (!$response->hasHeader('Retry-After')) { + $delay = static function (int $retries, ?ResponseInterface $response = null): int { + if ($response === null || !$response->hasHeader('Retry-After')) { return RetryMiddleware::exponentialDelay($retries); } diff --git a/src/LLMConversation.php b/src/LLMConversation.php index c0032b1..cb95278 100644 --- a/src/LLMConversation.php +++ b/src/LLMConversation.php @@ -47,6 +47,10 @@ public static function fromJson(array $data): self { } public function getLastMessage(): LLMMessage { + if (empty($this->messages)) { + throw new \UnderflowException('Conversation has no messages'); + } + return $this->messages[array_key_last($this->messages)]; } } diff --git a/src/LLMRequest.php b/src/LLMRequest.php index e1c1937..06b4f2a 100644 --- a/src/LLMRequest.php +++ b/src/LLMRequest.php @@ -100,7 +100,7 @@ public function withCost(int $inputTokens, int $outputTokens, float $previousInp $clone->previousInputTokens += $inputTokens; $clone->previousOutputTokens += $outputTokens; - if ($outputTokens > $this->previousMaximumOutputTokens) { + if ($outputTokens > $clone->previousMaximumOutputTokens) { $clone->previousMaximumOutputTokens = $outputTokens; } $clone->previousInputCostUSD += $previousInputCostUSD; diff --git a/src/MarkdownFormatter.php b/src/MarkdownFormatter.php index 073fa4a..2e4def7 100644 --- a/src/MarkdownFormatter.php +++ b/src/MarkdownFormatter.php @@ -63,7 +63,7 @@ public function responseToMarkdown(LLMRequest|LLMResponse $requestOrResponse): s if ($message->isUser()) { $markdown .= '## User:' . "\n"; } elseif ($message->isSystem()) { - $markdown .= '## User:' . "\n"; + $markdown .= '## System:' . "\n"; } elseif ($message->isAssistant()) { $markdown .= '## Assistant:' . "\n"; } else { @@ -93,7 +93,7 @@ public function responseToMarkdown(LLMRequest|LLMResponse $requestOrResponse): s $markdown .= '----------------------'; $markdown .= "\n\n"; - $price = $response->getInputPriceUsd() + $response->getOutputPriceUsd(); + $price = ($response->getInputPriceUsd() ?? 0.0) + ($response->getOutputPriceUsd() ?? 0.0); $markdown .= "##### Total stats\n\n"; $markdown .= 'Finished in ' . number_format($response->getTotalTimeMs() / 1000, 3, '.') . 's' . ', prompt tokens: ' . $response->getInputTokens() . diff --git a/src/Message/LLMMessageContents.php b/src/Message/LLMMessageContents.php index b9a428e..f911097 100644 --- a/src/Message/LLMMessageContents.php +++ b/src/Message/LLMMessageContents.php @@ -29,9 +29,13 @@ public function getMessages(): array { } public static function fromJson(array $data): self { + // Current format wraps items to preserve the isError flag; legacy format was a plain list + $items = $data['items'] ?? $data; + $isError = $data['isError'] ?? false; + /** @var LLMMessageContent[] $content */ $content = []; - foreach ($data as $item) { + foreach ($items as $item) { $class = $item['class']; if (!is_subclass_of($class, LLMMessageContent::class)) { throw new InvalidArgumentException("Class $class does not implement LLMMessageContent"); @@ -46,11 +50,14 @@ public static function fromJson(array $data): self { $content[] = $result; } - return new self($content); + return new self($content, $isError); } public function jsonSerialize(): array { - return array_map(static fn(LLMMessageContent $content) => ['class' => $content::class, 'data' => $content], $this->messages); + return [ + 'isError' => $this->isError, + 'items' => array_map(static fn(LLMMessageContent $content) => ['class' => $content::class, 'data' => $content], $this->messages), + ]; } public static function fromString(string $content): self { diff --git a/src/Message/LLMMessageReasoning.php b/src/Message/LLMMessageReasoning.php index dd023d1..d5507a0 100644 --- a/src/Message/LLMMessageReasoning.php +++ b/src/Message/LLMMessageReasoning.php @@ -3,7 +3,7 @@ namespace Soukicz\Llm\Message; class LLMMessageReasoning implements LLMMessageContent { - public function __construct(private string $text, private readonly ?string $signature, private readonly bool $cached = false) { + public function __construct(private readonly string $text, private readonly ?string $signature, private readonly bool $cached = false) { } public function getText(): string { diff --git a/src/Message/LLMMessageText.php b/src/Message/LLMMessageText.php index 187d69d..0170448 100644 --- a/src/Message/LLMMessageText.php +++ b/src/Message/LLMMessageText.php @@ -3,7 +3,7 @@ namespace Soukicz\Llm\Message; class LLMMessageText implements LLMMessageContent { - public function __construct(private string $text, private readonly bool $cached = false) { + public function __construct(private readonly string $text, private readonly bool $cached = false) { } public function getText(): string { diff --git a/src/Stream/GeminiStreamAccumulator.php b/src/Stream/GeminiStreamAccumulator.php index 086e87b..e5c02ae 100644 --- a/src/Stream/GeminiStreamAccumulator.php +++ b/src/Stream/GeminiStreamAccumulator.php @@ -76,7 +76,6 @@ public static function consume(StreamInterface $stream, StreamListenerInterface $allParts = []; $finishReason = null; $usageMetadata = []; - $blockIndex = 0; $listener->onStreamEvent(new StreamEvent( type: StreamEventType::MESSAGE_START, @@ -102,48 +101,59 @@ public static function consume(StreamInterface $stream, StreamListenerInterface $finishReason = $candidate['finishReason']; } - // Process parts + // Process parts. Gemini streams text in many small parts that all belong to + // the same logical block - consecutive parts of the same kind are merged so + // the reconstructed response matches the non-streaming format (a single text + // part) and decodeResponse()/getLastText() see the full text. if (isset($candidate['content']['parts'])) { foreach ($candidate['content']['parts'] as $part) { - if (isset($part['text'])) { - $allParts[] = $part; + $lastIndex = count($allParts) - 1; + if (isset($part['text']) && !isset($part['thought'])) { + if ($lastIndex >= 0 && isset($allParts[$lastIndex]['text']) && !isset($allParts[$lastIndex]['thought'])) { + $allParts[$lastIndex]['text'] .= $part['text']; + } else { + $allParts[] = $part; + $lastIndex++; + } $listener->onStreamEvent(new StreamEvent( type: StreamEventType::TEXT_DELTA, - blockIndex: $blockIndex, + blockIndex: $lastIndex, delta: $part['text'], )); - $blockIndex++; } elseif (isset($part['thought'])) { - $allParts[] = $part; + if ($lastIndex >= 0 && isset($allParts[$lastIndex]['thought']) && is_string($allParts[$lastIndex]['thought']) && is_string($part['thought'])) { + $allParts[$lastIndex]['thought'] .= $part['thought']; + } else { + $allParts[] = $part; + $lastIndex++; + } $listener->onStreamEvent(new StreamEvent( type: StreamEventType::THINKING_DELTA, - blockIndex: $blockIndex, - delta: $part['thought'], + blockIndex: $lastIndex, + delta: is_string($part['thought']) ? $part['thought'] : ($part['text'] ?? ''), )); - $blockIndex++; } elseif (isset($part['functionCall'])) { $allParts[] = $part; + $lastIndex++; $listener->onStreamEvent(new StreamEvent( type: StreamEventType::TOOL_USE_START, - blockIndex: $blockIndex, + blockIndex: $lastIndex, toolName: $part['functionCall']['name'], )); // Emit the full input as a single delta since Gemini sends complete tool calls $inputJson = json_encode($part['functionCall']['args'] ?? [], JSON_THROW_ON_ERROR); $listener->onStreamEvent(new StreamEvent( type: StreamEventType::TOOL_INPUT_DELTA, - blockIndex: $blockIndex, + blockIndex: $lastIndex, delta: $inputJson, toolName: $part['functionCall']['name'], )); $listener->onStreamEvent(new StreamEvent( type: StreamEventType::CONTENT_BLOCK_STOP, - blockIndex: $blockIndex, + blockIndex: $lastIndex, )); - $blockIndex++; } elseif (isset($part['inlineData'])) { $allParts[] = $part; - $blockIndex++; } } } diff --git a/src/Tool/TextEditor/TextEditorTool.php b/src/Tool/TextEditor/TextEditorTool.php index 4ca0fa6..d54a007 100644 --- a/src/Tool/TextEditor/TextEditorTool.php +++ b/src/Tool/TextEditor/TextEditorTool.php @@ -236,14 +236,14 @@ protected function replaceInFile(string $path, string $oldString, string $newStr } if ($matchCount > 1) { - // Find line numbers where old_str appears (matches reference implementation) - $contentLines = explode("\n", $content); + // Find line numbers where old_str appears (works for multi-line strings too) $lineNumbers = []; - foreach ($contentLines as $idx => $line) { - if (str_contains($line, $oldString)) { - $lineNumbers[] = $idx + 1; - } + $offset = 0; + while (($pos = strpos($content, $oldString, $offset)) !== false) { + $lineNumbers[] = substr_count($content, "\n", 0, $pos) + 1; + $offset = $pos + 1; } + $lineNumbers = array_values(array_unique($lineNumbers)); return LLMMessageContents::fromErrorString( "No replacement was performed. Multiple occurrences of old_str `$oldString` in lines [" . implode(', ', $lineNumbers) . "]. Please ensure it is unique" diff --git a/tests/Cache/FileCacheTest.php b/tests/Cache/FileCacheTest.php new file mode 100644 index 0000000..4663d4c --- /dev/null +++ b/tests/Cache/FileCacheTest.php @@ -0,0 +1,95 @@ +cacheDir = sys_get_temp_dir() . '/llm-file-cache-test-' . uniqid(); + mkdir($this->cacheDir); + $this->cache = new FileCache($this->cacheDir); + } + + protected function tearDown(): void { + foreach (glob($this->cacheDir . '/*') ?: [] as $file) { + unlink($file); + } + rmdir($this->cacheDir); + } + + private function createRequest(string $body = '{"prompt":"hello"}'): Request { + return new Request('POST', 'https://api.example.com/v1/messages', [], $body); + } + + public function testConstructorRejectsMissingDirectory(): void { + $this->expectException(\RuntimeException::class); + + new FileCache($this->cacheDir . '/does-not-exist'); + } + + public function testFetchReturnsNullOnMiss(): void { + $this->assertNull($this->cache->fetch($this->createRequest())); + } + + public function testStoreFetchRoundTripPreservesResponse(): void { + $request = $this->createRequest(); + $response = new Response(200, ['Content-Type' => 'application/json', 'X-Custom' => 'abc'], '{"answer":42}'); + + $this->cache->store($request, $response); + $cached = $this->cache->fetch($request); + + $this->assertNotNull($cached); + $this->assertSame(200, $cached->getStatusCode()); + $this->assertSame('{"answer":42}', (string) $cached->getBody()); + $this->assertSame('application/json', $cached->getHeaderLine('Content-Type')); + $this->assertSame('abc', $cached->getHeaderLine('X-Custom')); + } + + public function testDifferentRequestBodiesGetDifferentEntries(): void { + $requestA = $this->createRequest('{"prompt":"a"}'); + $requestB = $this->createRequest('{"prompt":"b"}'); + + $this->cache->store($requestA, new Response(200, [], 'response A')); + $this->cache->store($requestB, new Response(200, [], 'response B')); + + $this->assertSame('response A', (string) $this->cache->fetch($requestA)->getBody()); + $this->assertSame('response B', (string) $this->cache->fetch($requestB)->getBody()); + } + + public function testInvalidateRemovesEntry(): void { + $request = $this->createRequest(); + $this->cache->store($request, new Response(200, [], 'data')); + $this->assertNotNull($this->cache->fetch($request)); + + $this->cache->invalidate($request); + + $this->assertNull($this->cache->fetch($request)); + } + + public function testInvalidateOnMissingEntryIsSilent(): void { + $this->cache->invalidate($this->createRequest()); + + $this->assertNull($this->cache->fetch($this->createRequest())); + } + + public function testCorruptedCacheFileIsTreatedAsMiss(): void { + $request = $this->createRequest(); + $this->cache->store($request, new Response(200, [], 'data')); + + // Corrupt the single stored file + $files = glob($this->cacheDir . '/*.json'); + $this->assertCount(1, $files); + file_put_contents($files[0], 'this is not json {'); + + $this->assertNull($this->cache->fetch($request)); + } +} diff --git a/tests/Client/Anthropic/AnthropicBatchTest.php b/tests/Client/Anthropic/AnthropicBatchTest.php new file mode 100644 index 0000000..9950cfc --- /dev/null +++ b/tests/Client/Anthropic/AnthropicBatchTest.php @@ -0,0 +1,127 @@ + */ + private array $sentRequests = []; + + private function createClientWithResponses(array $responses): AnthropicClient { + $mock = new MockHandler($responses); + $stack = HandlerStack::create($mock); + $this->sentRequests = []; + $history = Middleware::history($this->sentRequests); + $stack->push($history); + + $client = new AnthropicClient('test-api-key'); + + // Inject the mocked HTTP client into the lazily initialized private property + $reflection = new \ReflectionProperty(AnthropicClient::class, 'httpClient'); + $reflection->setValue($client, new Client(['handler' => $stack])); + + return $client; + } + + private function createRequest(string $prompt): LLMRequest { + return new LLMRequest( + model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001), + conversation: new LLMConversation([LLMMessage::createFromUserString($prompt)]), + ); + } + + public function testCreateBatchEncodesRequestsWithCustomIds(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode(['id' => 'msgbatch_123'], JSON_THROW_ON_ERROR)), + ]); + + $batchId = $client->createBatch([ + 'first' => $this->createRequest('Hello'), + 'second' => $this->createRequest('World'), + ]); + + $this->assertSame('msgbatch_123', $batchId); + + $this->assertCount(1, $this->sentRequests); + $sent = $this->sentRequests[0]['request']; + $this->assertSame('POST', $sent->getMethod()); + $this->assertSame('https://api.anthropic.com/v1/messages/batches', (string) $sent->getUri()); + $this->assertSame('test-api-key', $sent->getHeaderLine('x-api-key')); + + $payload = json_decode((string) $sent->getBody(), true, 512, JSON_THROW_ON_ERROR); + $this->assertCount(2, $payload['requests']); + $this->assertSame('first', $payload['requests'][0]['custom_id']); + $this->assertSame('second', $payload['requests'][1]['custom_id']); + $this->assertSame('Hello', $payload['requests'][0]['params']['messages'][0]['content'][0]['text']); + $this->assertSame('claude-haiku-4-5-20251001', $payload['requests'][0]['params']['model']); + } + + public function testRetrieveBatchReturnsNullWhileInProgress(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode(['processing_status' => 'in_progress'], JSON_THROW_ON_ERROR)), + ]); + + $this->assertNull($client->retrieveBatch('msgbatch_123')); + } + + public function testRetrieveBatchReturnsContentKeyedByCustomId(): void { + $statusResponse = json_encode([ + 'processing_status' => 'ended', + 'results_url' => 'https://api.anthropic.com/v1/messages/batches/msgbatch_123/results', + ], JSON_THROW_ON_ERROR); + + // JSONL results: multiple text blocks must be concatenated, non-text blocks skipped + $resultsJsonl = implode("\n", [ + json_encode([ + 'custom_id' => 'first', + 'result' => ['message' => ['content' => [ + ['type' => 'text', 'text' => 'Hello '], + ['type' => 'text', 'text' => 'world'], + ]]], + ], JSON_THROW_ON_ERROR), + json_encode([ + 'custom_id' => 'second', + 'result' => ['message' => ['content' => [ + ['type' => 'thinking', 'thinking' => 'hmm', 'signature' => 'sig'], + ['type' => 'text', 'text' => 'Second answer'], + ]]], + ], JSON_THROW_ON_ERROR), + ]); + + $client = $this->createClientWithResponses([ + new Response(200, [], $statusResponse), + new Response(200, [], $resultsJsonl), + ]); + + $results = $client->retrieveBatch('msgbatch_123'); + + $this->assertSame([ + 'first' => 'Hello world', + 'second' => 'Second answer', + ], $results); + } + + public function testRetrieveBatchThrowsOnUnexpectedStatus(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode(['processing_status' => 'canceling', 'status' => 'canceling'], JSON_THROW_ON_ERROR)), + ]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Unexpected batch status'); + + $client->retrieveBatch('msgbatch_123'); + } +} diff --git a/tests/Client/Anthropic/AnthropicDecoderPricingTest.php b/tests/Client/Anthropic/AnthropicDecoderPricingTest.php new file mode 100644 index 0000000..a272a3e --- /dev/null +++ b/tests/Client/Anthropic/AnthropicDecoderPricingTest.php @@ -0,0 +1,74 @@ +encoder = new AnthropicEncoder(); + } + + private function createRequest(): LLMRequest { + return new LLMRequest( + // Sonnet 4.5: input $3/M, output $15/M, cache write $3.75/M, cache read $0.30/M + model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929), + conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]), + ); + } + + private function decode(array $usage): LLMResponse { + $response = $this->encoder->decodeResponse($this->createRequest(), new ModelResponse([ + 'content' => [['type' => 'text', 'text' => 'Hi there']], + 'usage' => $usage, + 'stop_reason' => 'end_turn', + ], 500)); + + $this->assertInstanceOf(LLMResponse::class, $response); + + return $response; + } + + public function testPricingWithoutCache(): void { + $response = $this->decode([ + 'input_tokens' => 1_000_000, + 'output_tokens' => 100_000, + ]); + + $this->assertEqualsWithDelta(3.0, $response->getInputPriceUsd(), 1e-9); + $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9); + $this->assertSame(1_000_000, $response->getInputTokens()); + $this->assertSame(100_000, $response->getOutputTokens()); + $this->assertSame(StopReason::FINISHED, $response->getStopReason()); + } + + /** + * Cache writes and cache reads are both input-side costs. Cache reads used to be + * misattributed to the output price bucket - this pins the corrected behavior. + */ + public function testCacheTokensAreChargedToInputBucket(): void { + $response = $this->decode([ + 'input_tokens' => 1_000_000, + 'output_tokens' => 100_000, + 'cache_creation_input_tokens' => 1_000_000, + 'cache_read_input_tokens' => 1_000_000, + ]); + + // 1M uncached input ($3.00) + 1M cache write ($3.75) + 1M cache read ($0.30) + $this->assertEqualsWithDelta(7.05, $response->getInputPriceUsd(), 1e-9); + // Output stays pure output: 100k * $15/M + $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9); + } +} diff --git a/tests/Client/Gemini/GeminiEncoderMediaTest.php b/tests/Client/Gemini/GeminiEncoderMediaTest.php index 910dd8d..0d01cd4 100644 --- a/tests/Client/Gemini/GeminiEncoderMediaTest.php +++ b/tests/Client/Gemini/GeminiEncoderMediaTest.php @@ -91,8 +91,7 @@ public function testMixedMediaRequest(): void { $this->assertEquals('What breed is it?', $encoded['contents'][0]['parts'][2]['text']); } - public function testPdfRequestShouldThrowException(): void { - // PDF is not supported by Gemini directly + public function testPdfRequest(): void { $conversation = new LLMConversation([ LLMMessage::createFromUser(new LLMMessageContents([ new LLMMessageText('Analyze this PDF:'), @@ -105,9 +104,11 @@ public function testPdfRequestShouldThrowException(): void { conversation: $conversation ); - $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage('PDF content type not supported for Gemini'); + $encoded = $this->encoder->encodeRequest($request); - $this->encoder->encodeRequest($request); + $this->assertEquals('Analyze this PDF:', $encoded['contents'][0]['parts'][0]['text']); + $this->assertArrayHasKey('inline_data', $encoded['contents'][0]['parts'][1]); + $this->assertEquals('application/pdf', $encoded['contents'][0]['parts'][1]['inline_data']['mime_type']); + $this->assertEquals('base64encodedpdf', $encoded['contents'][0]['parts'][1]['inline_data']['data']); } } diff --git a/tests/Client/Gemini/GeminiEncoderTextTest.php b/tests/Client/Gemini/GeminiEncoderTextTest.php index bcfea80..607562b 100644 --- a/tests/Client/Gemini/GeminiEncoderTextTest.php +++ b/tests/Client/Gemini/GeminiEncoderTextTest.php @@ -7,6 +7,8 @@ use PHPUnit\Framework\TestCase; use Soukicz\Llm\Client\Gemini\GeminiEncoder; use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash; +use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite; +use Soukicz\Llm\Client\Gemini\Model\Gemini3ProPreview; use Soukicz\Llm\Config\ReasoningEffort; use Soukicz\Llm\LLMConversation; use Soukicz\Llm\LLMRequest; @@ -110,7 +112,7 @@ public function testReasoningEffortHigh(): void { ]); $request = new LLMRequest( - model: new Gemini20Flash(), + model: new Gemini3ProPreview(), conversation: $conversation, reasoningConfig: ReasoningEffort::HIGH, ); @@ -127,7 +129,7 @@ public function testReasoningEffortLow(): void { ]); $request = new LLMRequest( - model: new Gemini20Flash(), + model: new Gemini3ProPreview(), conversation: $conversation, reasoningConfig: ReasoningEffort::LOW, ); @@ -143,7 +145,7 @@ public function testReasoningEffortMedium(): void { ]); $request = new LLMRequest( - model: new Gemini20Flash(), + model: new Gemini3ProPreview(), conversation: $conversation, reasoningConfig: ReasoningEffort::MEDIUM, ); @@ -159,7 +161,7 @@ public function testReasoningEffortMinimal(): void { ]); $request = new LLMRequest( - model: new Gemini20Flash(), + model: new Gemini3ProPreview(), conversation: $conversation, reasoningConfig: ReasoningEffort::MINIMAL, ); @@ -175,7 +177,7 @@ public function testReasoningEffortExtraHigh(): void { ]); $request = new LLMRequest( - model: new Gemini20Flash(), + model: new Gemini3ProPreview(), conversation: $conversation, reasoningConfig: ReasoningEffort::EXTRA_HIGH, ); @@ -201,4 +203,32 @@ public function testReasoningEffortNone(): void { $this->assertArrayHasKey('thinkingConfig', $encoded['generationConfig']); $this->assertEquals(0, $encoded['generationConfig']['thinkingConfig']['thinkingBudget']); } + + /** + * Gemini 2.x models reject thinkingLevel ("Thinking level is not supported for this + * model") - reasoning effort must be translated to a token budget instead + */ + public function testReasoningEffortUsesThinkingBudgetOnGemini2x(): void { + $expectedBudgets = [ + ReasoningEffort::MINIMAL->value => 512, + ReasoningEffort::LOW->value => 1024, + ReasoningEffort::MEDIUM->value => 8192, + ReasoningEffort::HIGH->value => 24576, + ReasoningEffort::EXTRA_HIGH->value => 24576, + ]; + + foreach ($expectedBudgets as $effort => $expectedBudget) { + $request = new LLMRequest( + model: new Gemini25FlashLite(), + conversation: new LLMConversation([LLMMessage::createFromUserString('Question')]), + reasoningConfig: ReasoningEffort::from($effort), + ); + + $encoded = $this->encoder->encodeRequest($request); + + $thinkingConfig = $encoded['generationConfig']['thinkingConfig']; + $this->assertArrayNotHasKey('thinkingLevel', $thinkingConfig, "Effort $effort must not produce thinkingLevel on a 2.x model"); + $this->assertSame($expectedBudget, $thinkingConfig['thinkingBudget'], "Wrong budget for effort $effort"); + } + } } diff --git a/tests/Client/Gemini/GeminiEncoderToolsTest.php b/tests/Client/Gemini/GeminiEncoderToolsTest.php index 07639e7..78ff58f 100644 --- a/tests/Client/Gemini/GeminiEncoderToolsTest.php +++ b/tests/Client/Gemini/GeminiEncoderToolsTest.php @@ -133,9 +133,42 @@ public function testFunctionResultMessage(): void { // Check function result structure $functionResult = $encoded['contents'][2]; - $this->assertEquals('function', $functionResult['role']); + $this->assertEquals('user', $functionResult['role']); $this->assertCount(1, $functionResult['parts']); $this->assertArrayHasKey('function_response', $functionResult['parts'][0]); + // Gemini correlates function responses by name, which must match the original function call + $this->assertEquals('get_weather', $functionResult['parts'][0]['function_response']['name']); + $this->assertEquals( + ['temperature' => 22, 'condition' => 'sunny'], + $functionResult['parts'][0]['function_response']['response'] + ); + } + + public function testMultipleToolsInSingleDeclaration(): void { + $conversation = new LLMConversation([ + LLMMessage::createFromUserString('What is the weather like in Prague?'), + ]); + + $makeTool = fn(string $name) => new CallbackToolDefinition( + $name, + 'Description of ' . $name, + ['type' => 'object', 'properties' => [], 'required' => []], + fn(array $input) => [] + ); + + $request = new LLMRequest( + model: new Gemini20Flash(), + conversation: $conversation, + tools: [$makeTool('tool_one'), $makeTool('tool_two')] + ); + + $encoded = $this->encoder->encodeRequest($request); + + // Gemini requires all function declarations in a single tools entry + $this->assertCount(1, $encoded['tools']); + $this->assertCount(2, $encoded['tools'][0]['functionDeclarations']); + $this->assertEquals('tool_one', $encoded['tools'][0]['functionDeclarations'][0]['name']); + $this->assertEquals('tool_two', $encoded['tools'][0]['functionDeclarations'][1]['name']); } public function testCompleteFunctionFlow(): void { diff --git a/tests/Client/Gemini/GeminiStreamingTest.php b/tests/Client/Gemini/GeminiStreamingTest.php index 83f7700..be51e3b 100644 --- a/tests/Client/Gemini/GeminiStreamingTest.php +++ b/tests/Client/Gemini/GeminiStreamingTest.php @@ -78,8 +78,8 @@ public function testStreamingTextResponse(): void { $response = $client->sendRequestAsync($request)->wait(); - // Gemini produces separate text parts per chunk, getLastText() returns the last one - $this->assertEquals(' world!', $response->getLastText()); + // Streamed text chunks are merged so getLastText() returns the complete text + $this->assertEquals('Hello world!', $response->getLastText()); $this->assertEquals(StopReason::FINISHED, $response->getStopReason()); $this->assertEquals(10, $response->getInputTokens()); $this->assertEquals(5, $response->getOutputTokens()); diff --git a/tests/Client/LLMAgentClientTest.php b/tests/Client/LLMAgentClientTest.php index 92a931e..403a5f6 100644 --- a/tests/Client/LLMAgentClientTest.php +++ b/tests/Client/LLMAgentClientTest.php @@ -211,6 +211,157 @@ function (array $input): PromiseInterface { } + /** + * Test that a request for an unknown tool produces an error tool result instead of an empty message + */ + public function testUnknownToolReturnsErrorResult(): void { + $calculatorTool = new CallbackToolDefinition( + 'calculator', + 'Basic calculator for math operations', + [ + 'type' => 'object', + 'properties' => [ + 'expression' => [ + 'type' => 'string', + 'description' => 'Math expression to evaluate', + ], + ], + 'required' => ['expression'], + ], + function (array $input): PromiseInterface { + return Create::promiseFor(LLMMessageContents::fromArrayData(['result' => 4])); + } + ); + + $conversation = new LLMConversation([ + LLMMessage::createFromUserString('What is 2+2?'), + ]); + + $request = new LLMRequest( + model: new GPT41(GPT41::VERSION_2025_04_14), + conversation: $conversation, + tools: [$calculatorTool] + ); + + // Model hallucinates a tool that is not registered + $response1 = $this->createToolUseResponse($request, 'tool-123', 'nonexistent_tool', ['foo' => 'bar']); + + $request2 = $response1->getRequest()->withMessage( + LLMMessage::createFromUser(new LLMMessageContents([ + new LLMMessageToolResult('tool-123', LLMMessageContents::fromErrorString('ERROR: Tool "nonexistent_tool" is not available')), + ])) + ); + $response2 = $this->createFinalResponse($request2, 'I could not use that tool.'); + + $sentRequests = []; + $responseQueue = [$response1, $response2]; + $mockClient = $this->createMock(LLMClient::class); + $mockClient->method('sendRequestAsync') + ->willReturnCallback(function (LLMRequest $sentRequest) use (&$responseQueue, &$sentRequests) { + $sentRequests[] = $sentRequest; + + return Create::promiseFor(array_shift($responseQueue)); + }); + + $agentClient = new LLMAgentClient(); + $finalResponse = $agentClient->run($mockClient, $request); + + $this->assertEquals(StopReason::FINISHED, $finalResponse->getStopReason()); + + // The follow-up request built by the agent must contain an error tool result for the unknown tool + $this->assertCount(2, $sentRequests); + $toolResultMessage = $sentRequests[1]->getLastMessage(); + $contents = $toolResultMessage->getContents(); + $this->assertCount(1, $contents); + $this->assertInstanceOf(LLMMessageToolResult::class, $contents[0]); + $this->assertEquals('tool-123', $contents[0]->getId()); + $this->assertTrue($contents[0]->getContent()->isError()); + } + + /** + * Test that tool input failing schema validation produces an error result + * without executing the tool handler + */ + public function testSchemaValidationFailureSkipsToolExecution(): void { + $handlerCalled = false; + $calculatorTool = new CallbackToolDefinition( + 'calculator', + 'Basic calculator for math operations', + [ + 'type' => 'object', + 'properties' => [ + 'expression' => ['type' => 'string'], + ], + 'required' => ['expression'], + ], + function (array $input) use (&$handlerCalled): PromiseInterface { + $handlerCalled = true; + + return Create::promiseFor(LLMMessageContents::fromArrayData(['result' => 4])); + } + ); + + $request = new LLMRequest( + model: new GPT41(GPT41::VERSION_2025_04_14), + conversation: new LLMConversation([LLMMessage::createFromUserString('What is 2+2?')]), + tools: [$calculatorTool] + ); + + // Model sends an integer where the schema requires a string + $response1 = $this->createToolUseResponse($request, 'tool-123', 'calculator', ['expression' => 42]); + $request2 = $response1->getRequest()->withMessage( + LLMMessage::createFromUser(new LLMMessageContents([ + new LLMMessageToolResult('tool-123', LLMMessageContents::fromErrorString('ERROR: schema mismatch')), + ])) + ); + $response2 = $this->createFinalResponse($request2, 'Sorry, I sent invalid input.'); + + $sentRequests = []; + $responseQueue = [$response1, $response2]; + $mockClient = $this->createMock(LLMClient::class); + $mockClient->method('sendRequestAsync') + ->willReturnCallback(function (LLMRequest $sentRequest) use (&$responseQueue, &$sentRequests) { + $sentRequests[] = $sentRequest; + + return Create::promiseFor(array_shift($responseQueue)); + }); + + $agentClient = new LLMAgentClient(); + $agentClient->run($mockClient, $request); + + $this->assertFalse($handlerCalled, 'Tool handler must not run on schema validation failure'); + + $toolResultMessage = $sentRequests[1]->getLastMessage(); + $contents = $toolResultMessage->getContents(); + $this->assertCount(1, $contents); + $this->assertInstanceOf(LLMMessageToolResult::class, $contents[0]); + $this->assertTrue($contents[0]->getContent()->isError()); + $this->assertStringContainsString( + 'not matching expected schema', + $contents[0]->getContent()->getMessages()[0]->getText() + ); + } + + /** + * Test that a feedback callback returning anything but LLMMessage or null is rejected + */ + public function testFeedbackCallbackMustReturnLLMMessageOrNull(): void { + $request = new LLMRequest( + model: new GPT41(GPT41::VERSION_2025_04_14), + conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]), + ); + + $response = $this->createFinalResponse($request, 'Hi there'); + $mockClient = $this->createMockLLMClient([$response]); + + $agentClient = new LLMAgentClient(); + + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage('Feedback callback must return an instance of LLMMessage'); + + $agentClient->run($mockClient, $request, fn(LLMResponse $r) => 'try again'); + } + /** * Create a mock LLM client that returns predefined responses * diff --git a/tests/Client/OpenAI/OpenAIBatchTest.php b/tests/Client/OpenAI/OpenAIBatchTest.php new file mode 100644 index 0000000..65c164e --- /dev/null +++ b/tests/Client/OpenAI/OpenAIBatchTest.php @@ -0,0 +1,159 @@ + */ + private array $sentRequests = []; + + private function createClientWithResponses(array $responses): OpenAIClient { + $mock = new MockHandler($responses); + $stack = HandlerStack::create($mock); + $this->sentRequests = []; + $stack->push(Middleware::history($this->sentRequests)); + + $client = new OpenAIClient('test-api-key', null); + + $reflection = new \ReflectionProperty(AbstractOpenAIClient::class, 'httpClient'); + $reflection->setValue($client, new Client(['handler' => $stack])); + + return $client; + } + + private function createRequest(string $prompt): LLMRequest { + return new LLMRequest( + model: new GPT4oMini(GPT4oMini::VERSION_2024_07_18), + conversation: new LLMConversation([LLMMessage::createFromUserString($prompt)]), + ); + } + + public function testCreateBatchUploadsJsonlAndCreatesBatch(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode(['id' => 'file-abc'], JSON_THROW_ON_ERROR)), + new Response(200, [], json_encode(['id' => 'batch-xyz'], JSON_THROW_ON_ERROR)), + ]); + + $batchId = $client->createBatch([ + 'first' => $this->createRequest('Hello'), + 'second' => $this->createRequest('World'), + ]); + + $this->assertSame('batch-xyz', $batchId); + $this->assertCount(2, $this->sentRequests); + + // First request uploads the JSONL file + $fileUpload = $this->sentRequests[0]['request']; + $this->assertStringEndsWith('/files', (string) $fileUpload->getUri()); + $uploadBody = (string) $fileUpload->getBody(); + $this->assertStringContainsString('"custom_id":"first"', $uploadBody); + $this->assertStringContainsString('"custom_id":"second"', $uploadBody); + $this->assertStringContainsString('"url":"\/v1\/chat\/completions"', $uploadBody); + + // Second request creates the batch from the uploaded file + $batchCreate = $this->sentRequests[1]['request']; + $this->assertStringEndsWith('/batches', (string) $batchCreate->getUri()); + $batchPayload = json_decode((string) $batchCreate->getBody(), true, 512, JSON_THROW_ON_ERROR); + $this->assertSame('file-abc', $batchPayload['input_file_id']); + $this->assertSame('24h', $batchPayload['completion_window']); + } + + public function testRetrieveBatchReturnsNullWhileNotCompleted(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode(['status' => 'in_progress'], JSON_THROW_ON_ERROR)), + ]); + + $this->assertNull($client->retrieveBatch('batch-xyz')); + } + + /** + * Regression test: content used to be doubled ($content .= $content) instead of accumulated + */ + public function testRetrieveBatchReturnsContentKeyedByCustomId(): void { + $statusResponse = json_encode([ + 'status' => 'completed', + 'output_file_id' => 'file-out', + 'error_file_id' => null, + 'completed_at' => time(), + ], JSON_THROW_ON_ERROR); + + $resultsJsonl = implode("\n", [ + json_encode([ + 'custom_id' => 'first', + 'response' => ['body' => ['choices' => [ + ['message' => ['content' => 'Hello world']], + ]]], + ], JSON_THROW_ON_ERROR), + json_encode([ + 'custom_id' => 'second', + 'response' => ['body' => ['choices' => [ + // Content may also arrive as a list of typed parts + ['message' => ['content' => [ + ['type' => 'text', 'text' => 'Second '], + ['type' => 'text', 'text' => 'answer'], + ]]], + ]]], + ], JSON_THROW_ON_ERROR), + ]); + + $client = $this->createClientWithResponses([ + new Response(200, [], $statusResponse), + new Response(200, [], $resultsJsonl), + ]); + + $results = $client->retrieveBatch('batch-xyz'); + + $this->assertSame([ + 'first' => 'Hello world', + 'second' => 'Second answer', + ], $results); + } + + public function testRetrieveBatchThrowsOnRecentFailure(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode([ + 'status' => 'completed', + 'output_file_id' => null, + 'error_file_id' => 'file-err', + 'completed_at' => time(), + ], JSON_THROW_ON_ERROR)), + new Response(200, [], '{"error": "something went wrong"}'), + ]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Batch failed'); + + $client->retrieveBatch('batch-xyz'); + } + + /** + * Documents current behavior: failures older than three days are swallowed and + * reported as an empty result set (OpenAI error files expire) + */ + public function testRetrieveBatchReturnsEmptyArrayForExpiredFailure(): void { + $client = $this->createClientWithResponses([ + new Response(200, [], json_encode([ + 'status' => 'completed', + 'output_file_id' => null, + 'error_file_id' => 'file-err', + 'completed_at' => time() - 4 * 24 * 60 * 60, + ], JSON_THROW_ON_ERROR)), + ]); + + $this->assertSame([], $client->retrieveBatch('batch-xyz')); + } +} diff --git a/tests/Client/OpenAI/OpenAIDecoderPricingTest.php b/tests/Client/OpenAI/OpenAIDecoderPricingTest.php new file mode 100644 index 0000000..d9410ef --- /dev/null +++ b/tests/Client/OpenAI/OpenAIDecoderPricingTest.php @@ -0,0 +1,76 @@ +encoder = new OpenAIEncoder(); + } + + private function createRequest(): LLMRequest { + return new LLMRequest( + // GPT-5.4: input $2.50/M, output $15/M, cached input $0.25/M + model: new GPT54(GPT54::VERSION_2026_03_05), + conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]), + ); + } + + private function decode(array $usage): LLMResponse { + $response = $this->encoder->decodeResponse($this->createRequest(), new ModelResponse([ + 'choices' => [ + [ + 'message' => ['content' => 'Hi there'], + 'finish_reason' => 'stop', + ], + ], + 'usage' => $usage, + ], 500)); + + $this->assertInstanceOf(LLMResponse::class, $response); + + return $response; + } + + public function testPricingWithoutCachedTokens(): void { + $response = $this->decode([ + 'prompt_tokens' => 1_000_000, + 'completion_tokens' => 100_000, + ]); + + $this->assertEqualsWithDelta(2.5, $response->getInputPriceUsd(), 1e-9); + $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9); + $this->assertSame(StopReason::FINISHED, $response->getStopReason()); + } + + /** + * OpenAI reports cached prompt tokens as a subset of prompt_tokens; they are billed + * at the cached input rate. The discount used to be ignored entirely. + */ + public function testCachedPromptTokensGetDiscountedRate(): void { + $response = $this->decode([ + 'prompt_tokens' => 1_000_000, + 'completion_tokens' => 100_000, + 'prompt_tokens_details' => ['cached_tokens' => 600_000], + ]); + + // 400k uncached * $2.50/M + 600k cached * $0.25/M = 1.00 + 0.15 + $this->assertEqualsWithDelta(1.15, $response->getInputPriceUsd(), 1e-9); + $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9); + // Token counts still report the full prompt size + $this->assertSame(1_000_000, $response->getInputTokens()); + } +} diff --git a/tests/Client/OpenAI/OpenAIEmbeddingsTest.php b/tests/Client/OpenAI/OpenAIEmbeddingsTest.php new file mode 100644 index 0000000..e378192 --- /dev/null +++ b/tests/Client/OpenAI/OpenAIEmbeddingsTest.php @@ -0,0 +1,101 @@ + */ + private array $sentRequests = []; + + private function createClientWithResponses(array $responses): OpenAIClient { + $mock = new MockHandler($responses); + $stack = HandlerStack::create($mock); + $this->sentRequests = []; + $stack->push(Middleware::history($this->sentRequests)); + + $client = new OpenAIClient('test-api-key', null); + + $reflection = new \ReflectionProperty(AbstractOpenAIClient::class, 'httpClient'); + $reflection->setValue($client, new Client(['handler' => $stack])); + + return $client; + } + + /** + * Build an embeddings API response for the given input count. Embeddings are returned + * deliberately out of order to verify the client maps them back via the index field. + */ + private function embeddingsResponse(int $count, int $startValue): Response { + $data = []; + for ($i = $count - 1; $i >= 0; $i--) { + $data[] = [ + 'index' => $i, + 'embedding' => [(float) ($startValue + $i)], + ]; + } + + return new Response(200, [], json_encode([ + 'data' => $data, + 'usage' => ['total_tokens' => $count], + ], JSON_THROW_ON_ERROR)); + } + + /** + * Regression test for parallel batching: results must come back keyed and ordered + * by the original input position even with multiple chunks and out-of-order + * embeddings within each response + */ + public function testResultsPreserveInputOrderAcrossChunks(): void { + // 250 inputs → 3 chunks (100 + 100 + 50) + $texts = []; + for ($i = 0; $i < 250; $i++) { + $texts[] = 'text ' . $i; + } + + $client = $this->createClientWithResponses([ + $this->embeddingsResponse(100, 0), + $this->embeddingsResponse(100, 100), + $this->embeddingsResponse(50, 200), + ]); + + $results = $client->getBatchEmbeddings($texts); + + $this->assertCount(250, $results); + $this->assertSame(range(0, 249), array_keys($results)); + // Each embedding value encodes its global input position + foreach ($results as $position => $embedding) { + $this->assertEquals([$position], $embedding, "Embedding at position $position is misaligned"); + } + } + + public function testRequestPayloadAndChunking(): void { + $texts = array_fill(0, 150, 'hello'); + + $client = $this->createClientWithResponses([ + $this->embeddingsResponse(100, 0), + $this->embeddingsResponse(50, 100), + ]); + + $client->getBatchEmbeddings($texts, 'text-embedding-3-large', 1024); + + $this->assertCount(2, $this->sentRequests); + + $firstPayload = json_decode((string) $this->sentRequests[0]['request']->getBody(), true, 512, JSON_THROW_ON_ERROR); + $this->assertSame('text-embedding-3-large', $firstPayload['model']); + $this->assertSame(1024, $firstPayload['dimensions']); + $this->assertCount(100, $firstPayload['input']); + + $secondPayload = json_decode((string) $this->sentRequests[1]['request']->getBody(), true, 512, JSON_THROW_ON_ERROR); + $this->assertCount(50, $secondPayload['input']); + } +} diff --git a/tests/Http/HttpClientFactoryTest.php b/tests/Http/HttpClientFactoryTest.php new file mode 100644 index 0000000..c41556f --- /dev/null +++ b/tests/Http/HttpClientFactoryTest.php @@ -0,0 +1,192 @@ +mockHandler = new MockHandler(); + $client = HttpClientFactory::createClient($customMiddleware, $cache); + + /** @var HandlerStack $stack */ + $stack = $client->getConfig('handler'); + $stack->setHandler($this->mockHandler); + + return $client; + } + + public function testRetriesRetryableStatusCodesUntilSuccess(): void { + $client = $this->createClient(); + $this->mockHandler->append( + new Response(429, ['Retry-After' => '0']), + new Response(503, ['Retry-After' => '0']), + new Response(200, [], 'ok'), + ); + + $response = $client->get('https://example.com/api'); + + $this->assertSame(200, $response->getStatusCode()); + $this->assertSame('ok', (string) $response->getBody()); + $this->assertSame(0, $this->mockHandler->count(), 'All queued responses should have been consumed'); + } + + public function testGivesUpAfterMaxRetries(): void { + $client = $this->createClient(); + // MAX_RETRIES is 3, so the 4th consecutive error is returned to the caller + $this->mockHandler->append( + new Response(500, ['Retry-After' => '0']), + new Response(500, ['Retry-After' => '0']), + new Response(500, ['Retry-After' => '0']), + new Response(500, ['Retry-After' => '0']), + ); + + $this->expectException(ServerException::class); + + try { + $client->get('https://example.com/api'); + } finally { + $this->assertSame(0, $this->mockHandler->count(), 'Expected exactly 4 attempts (1 + 3 retries)'); + } + } + + public function testDoesNotRetryNonRetryableClientErrors(): void { + $client = $this->createClient(); + $this->mockHandler->append( + new Response(404), + new Response(200), + ); + + $this->expectException(ClientException::class); + + try { + $client->get('https://example.com/api'); + } finally { + $this->assertSame(1, $this->mockHandler->count(), 'A 404 must not be retried'); + } + } + + public function testHonorsNumericRetryAfterHeader(): void { + $client = $this->createClient(); + $this->mockHandler->append( + new Response(429, ['Retry-After' => '1']), + new Response(200), + ); + + $start = microtime(true); + $response = $client->get('https://example.com/api'); + $elapsed = microtime(true) - $start; + + $this->assertSame(200, $response->getStatusCode()); + $this->assertGreaterThan(0.9, $elapsed, 'Retry should have waited for the Retry-After interval'); + } + + public function testHonorsHttpDateRetryAfterHeader(): void { + $client = $this->createClient(); + $this->mockHandler->append( + new Response(429, ['Retry-After' => gmdate('D, d M Y H:i:s \G\M\T', time() + 1)]), + new Response(200), + ); + + $response = $client->get('https://example.com/api'); + + $this->assertSame(200, $response->getStatusCode()); + $this->assertSame(0, $this->mockHandler->count()); + } + + /** + * Regression test: network-level failures (connection reset, DNS, timeout) used to + * propagate immediately without any retry + */ + public function testRetriesConnectExceptions(): void { + $client = $this->createClient(); + $request = new Request('GET', 'https://example.com/api'); + $this->mockHandler->append( + new ConnectException('Connection refused', $request), + new Response(200, [], 'ok'), + ); + + $response = $client->get('https://example.com/api'); + + $this->assertSame(200, $response->getStatusCode()); + $this->assertSame('ok', (string) $response->getBody()); + } + + public function testSuccessfulResponsesAreCachedAndReplayed(): void { + $cache = new InMemoryCache(); + $client = $this->createClient($cache); + $this->mockHandler->append(new Response(200, [], 'fresh')); + + $first = $client->get('https://example.com/api'); + $this->assertSame('fresh', (string) $first->getBody()); + $this->assertSame(1, $cache->count()); + + // Second identical request must be served from cache - the mock queue is empty, + // so hitting the transport again would throw + $second = $client->get('https://example.com/api'); + $this->assertSame('fresh', (string) $second->getBody()); + $this->assertSame(0, $this->mockHandler->count()); + } + + public function testErrorResponsesAreNotCached(): void { + $cache = new InMemoryCache(); + $client = $this->createClient($cache); + $this->mockHandler->append(new Response(404)); + + try { + $client->get('https://example.com/api'); + $this->fail('Expected ClientException'); + } catch (ClientException) { + } + + $this->assertSame(0, $cache->count(), 'Non-2xx responses must not be cached'); + } + + public function testRequestDurationHeaderIsAddedWhenCacheIsActive(): void { + $cache = new InMemoryCache(); + $client = $this->createClient($cache); + $this->mockHandler->append(new Response(200, [], 'ok')); + + $response = $client->get('https://example.com/api'); + + $this->assertTrue($response->hasHeader('X-Request-Duration-ms')); + $this->assertIsNumeric($response->getHeaderLine('X-Request-Duration-ms')); + } + + public function testCustomMiddlewareSeesRequestsAndResponses(): void { + $seen = []; + $middleware = function (callable $handler) use (&$seen): callable { + return function ($request, array $options) use ($handler, &$seen) { + $seen[] = $request->getMethod() . ' ' . $request->getUri(); + + return $handler($request, $options); + }; + }; + + $client = $this->createClient(null, $middleware); + $this->mockHandler->append(new Response(200)); + + $client->get('https://example.com/api'); + + $this->assertSame(['GET https://example.com/api'], $seen); + } +} diff --git a/tests/Integration/IntegrationTestBase.php b/tests/Integration/IntegrationTestBase.php index a3a2874..2b505d2 100644 --- a/tests/Integration/IntegrationTestBase.php +++ b/tests/Integration/IntegrationTestBase.php @@ -8,9 +8,9 @@ use Soukicz\Llm\Cache\CacheInterface; use Soukicz\Llm\Cache\FileCache; use Soukicz\Llm\Client\Anthropic\AnthropicClient; -use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude35Haiku; +use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku; use Soukicz\Llm\Client\Gemini\GeminiClient; -use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash; +use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite; use Soukicz\Llm\Client\LLMClient; use Soukicz\Llm\Client\ModelInterface; use Soukicz\Llm\Client\OpenAI\Model\GPT4oMini; @@ -134,8 +134,8 @@ protected function getAllClients(): array { if (!empty($_ENV['ANTHROPIC_API_KEY'])) { $clients[] = [ 'client' => new AnthropicClient($_ENV['ANTHROPIC_API_KEY'], $this->cache), - 'model' => new AnthropicClaude35Haiku(AnthropicClaude35Haiku::VERSION_20241022), - 'name' => 'Anthropic Claude 3.5 Haiku', + 'model' => new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001), + 'name' => 'Anthropic Claude 4.5 Haiku', ]; } @@ -150,15 +150,15 @@ protected function getAllClients(): array { if (!empty($_ENV['GEMINI_API_KEY'])) { $clients[] = [ 'client' => new GeminiClient($_ENV['GEMINI_API_KEY'], $this->cache), - 'model' => new Gemini20Flash(), - 'name' => 'Google Gemini 2.0 Flash', + 'model' => new Gemini25FlashLite(), + 'name' => 'Google Gemini 2.5 Flash Lite', ]; } if (!empty($_ENV['OPENROUTER_API_KEY'])) { $clients[] = [ 'client' => new OpenAICompatibleClient($_ENV['OPENROUTER_API_KEY'], 'https://openrouter.ai/api/v1', $this->cache), - 'model' => new LocalModel('openrouter/horizon-beta'), + 'model' => new LocalModel('openai/gpt-4o-mini'), 'name' => 'OpenRouter', ]; } diff --git a/tests/Integration/PdfIntegrationTest.php b/tests/Integration/PdfIntegrationTest.php new file mode 100644 index 0000000..7c70326 --- /dev/null +++ b/tests/Integration/PdfIntegrationTest.php @@ -0,0 +1,67 @@ +getAllClients() as $clientData) { + // PDF input is only supported by the three native providers; the + // OpenAI-compatible endpoints (OpenRouter, Scaleway) vary by backing model + if (!in_array($clientData['name'], ['OpenRouter', 'Scaleway Mistral Small'], true)) { + $clients[$clientData['name']] = [$clientData['client'], $clientData['model'], $clientData['name']]; + } + } + + return $clients; + } + + /** + * @dataProvider clientProvider + */ + public function testPdfDocumentUnderstanding($client, $model, $name): void { + $pdfData = base64_encode(file_get_contents(__DIR__ . '/fixtures/test-document.pdf')); + + $request = new LLMRequest( + model: $model, + conversation: new LLMConversation([ + LLMMessage::createFromUser(new LLMMessageContents([ + new LLMMessageText('What is the secret word in this document? Reply with just the word.'), + new LLMMessagePdf('base64', $pdfData), + ])), + ]), + maxTokens: 1000, + ); + + $response = (new LLMAgentClient())->run($client, $request); + + $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0)); + + $this->assertEquals(StopReason::FINISHED, $response->getStopReason(), "$name did not finish cleanly"); + $this->assertContainsIgnoreCase('PINEAPPLE', $response->getLastText(), "$name failed to read the PDF content"); + + if ($this->verbose) { + echo "\n[$name] PDF response: " . $response->getLastText(); + } + } +} diff --git a/tests/Integration/ReasoningIntegrationTest.php b/tests/Integration/ReasoningIntegrationTest.php new file mode 100644 index 0000000..2d092f3 --- /dev/null +++ b/tests/Integration/ReasoningIntegrationTest.php @@ -0,0 +1,126 @@ +markTestSkipped("$envVar is not configured"); + } + + return $_ENV[$envVar]; + } + + private function runReasoningRequest(LLMClient $client, LLMRequest $request): LLMResponse { + $response = (new LLMAgentClient())->run($client, $request); + $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0)); + + $this->assertEquals(StopReason::FINISHED, $response->getStopReason()); + $this->assertStringContainsString('39', $response->getLastText(), 'Expected the correct arithmetic result'); + + return $response; + } + + private function createConversation(): LLMConversation { + // Deliberately simple: this test verifies the reasoning configuration is accepted + // by the API, not the model's problem-solving ability + return new LLMConversation([ + LLMMessage::createFromUserString('What is 17 + 24 - 2? Reply with just the number.'), + ]); + } + + public function testAnthropicReasoningBudgetReturnsThinkingBlocks(): void { + $client = new AnthropicClient($this->requireKey('ANTHROPIC_API_KEY'), $this->cache); + + $response = $this->runReasoningRequest($client, new LLMRequest( + model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001), + conversation: $this->createConversation(), + temperature: 1.0, + maxTokens: 6000, + reasoningConfig: new ReasoningBudget(2048), + )); + + // Extended thinking must surface as reasoning content in the conversation + $reasoningFound = false; + foreach ($response->getConversation()->getMessages() as $message) { + foreach ($message->getContents() as $content) { + if ($content instanceof LLMMessageReasoning) { + $reasoningFound = true; + $this->assertNotSame('', $content->getText()); + } + } + } + $this->assertTrue($reasoningFound, 'Expected at least one reasoning block in the conversation'); + } + + public function testAnthropicReasoningEffort(): void { + $client = new AnthropicClient($this->requireKey('ANTHROPIC_API_KEY'), $this->cache); + + $this->runReasoningRequest($client, new LLMRequest( + model: new AnthropicClaude46Sonnet(), + conversation: $this->createConversation(), + temperature: 1.0, + maxTokens: 6000, + reasoningConfig: ReasoningEffort::LOW, + )); + } + + public function testOpenAIReasoningEffort(): void { + $client = new OpenAIClient($this->requireKey('OPENAI_API_KEY'), null, $this->cache); + + $this->runReasoningRequest($client, new LLMRequest( + model: new GPT54Nano(GPT54Nano::VERSION_2026_03_17), + conversation: $this->createConversation(), + temperature: 1.0, + maxTokens: 6000, + reasoningConfig: ReasoningEffort::LOW, + )); + } + + public function testGeminiReasoningEffort(): void { + $client = new GeminiClient($this->requireKey('GEMINI_API_KEY'), $this->cache); + + $this->runReasoningRequest($client, new LLMRequest( + model: new Gemini25FlashLite(), + conversation: $this->createConversation(), + maxTokens: 6000, + reasoningConfig: ReasoningEffort::LOW, + )); + } +} diff --git a/tests/Integration/StreamingIntegrationTest.php b/tests/Integration/StreamingIntegrationTest.php new file mode 100644 index 0000000..ac0045c --- /dev/null +++ b/tests/Integration/StreamingIntegrationTest.php @@ -0,0 +1,76 @@ +getAllClients() as $clientData) { + $clients[$clientData['name']] = [$clientData['client'], $clientData['model'], $clientData['name']]; + } + + return $clients; + } + + /** + * @dataProvider clientProvider + */ + public function testTextStreamingMatchesFinalText($client, $model, $name): void { + $streamedText = ''; + $eventTypes = []; + $listener = new CallableStreamListener(function (StreamEvent $event) use (&$streamedText, &$eventTypes): void { + $eventTypes[] = $event->type; + if ($event->type === StreamEventType::TEXT_DELTA) { + $streamedText .= $event->delta; + } + }); + + $request = new LLMRequest( + model: $model, + conversation: new LLMConversation([ + LLMMessage::createFromUserString('Reply with one short sentence about the sun.'), + ]), + maxTokens: 1000, + streamListener: $listener, + ); + + $response = (new LLMAgentClient())->run($client, $request); + + $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0)); + + $this->assertEquals(StopReason::FINISHED, $response->getStopReason(), "$name did not finish cleanly"); + $this->assertNotSame('', $streamedText, "$name emitted no TEXT_DELTA events"); + $this->assertSame( + $response->getLastText(), + $streamedText, + "$name: accumulated stream deltas differ from the final response text" + ); + $this->assertContains(StreamEventType::MESSAGE_COMPLETE, $eventTypes, "$name never emitted MESSAGE_COMPLETE"); + + if ($this->verbose) { + echo "\n[$name] Streamed: $streamedText"; + } + } +} diff --git a/tests/Integration/StructuredOutputIntegrationTest.php b/tests/Integration/StructuredOutputIntegrationTest.php index f3c248f..7370a1b 100644 --- a/tests/Integration/StructuredOutputIntegrationTest.php +++ b/tests/Integration/StructuredOutputIntegrationTest.php @@ -7,7 +7,7 @@ use Soukicz\Llm\Client\Anthropic\AnthropicClient; use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku; use Soukicz\Llm\Client\Gemini\GeminiClient; -use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash; +use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite; use Soukicz\Llm\Client\LLMAgentClient; use Soukicz\Llm\Client\LLMClient; use Soukicz\Llm\Client\ModelInterface; @@ -177,8 +177,8 @@ protected function getStructuredOutputClients(): array { if (!empty($_ENV['GEMINI_API_KEY'])) { $clients[] = [ 'client' => new GeminiClient($_ENV['GEMINI_API_KEY'], $this->cache), - 'model' => new Gemini20Flash(), - 'name' => 'Google Gemini 2.0 Flash', + 'model' => new Gemini25FlashLite(), + 'name' => 'Google Gemini 2.5 Flash Lite', ]; } diff --git a/tests/Integration/UniversalLLMIntegrationTest.php b/tests/Integration/UniversalLLMIntegrationTest.php index 3815c7a..77b4142 100644 --- a/tests/Integration/UniversalLLMIntegrationTest.php +++ b/tests/Integration/UniversalLLMIntegrationTest.php @@ -377,7 +377,7 @@ public function testSystemPrompt($client, $model, $name): void { public function testStopSequence($client, $model, $name): void { $conversation = new LLMConversation([ LLMMessage::createFromUserString( - 'Count from 1 to 10 with "STOP" after 5. Like this: 1 2 3 4 5 STOP' + 'Output the numbers 1 to 10 separated by spaces, with no other text. Like this: 1 2 3 ...' ), ]); @@ -386,7 +386,7 @@ public function testStopSequence($client, $model, $name): void { conversation: $conversation, temperature: 0.1, maxTokens: 200, - stopSequences: ['STOP'] + stopSequences: ['8'] ); $response = $this->agentClient->run($client, $request); @@ -401,12 +401,13 @@ public function testStopSequence($client, $model, $name): void { $this->assertEquals(StopReason::FINISHED, $response->getStopReason(), "Expected stop reason to be FINISHED for $name, but got: " . $response->getStopReason()->value); - // Should contain numbers 1-5 + // Should contain numbers up to the stop sequence $this->assertContainsAny(['1'], $responseText); $this->assertContainsAny(['5'], $responseText); - // Should not contain numbers after 5 (allowing some flexibility) - $this->assertStringNotContainsString('10', $responseText); + // Should not contain numbers after the stop sequence ("10" is excluded because + // models often echo the "1 to 10" instruction in a preamble) + $this->assertStringNotContainsString('9', $responseText); if ($this->verbose) { echo "\n[$name] Stop sequence response: " . $responseText; diff --git a/tests/Integration/fixtures/test-document.pdf b/tests/Integration/fixtures/test-document.pdf new file mode 100644 index 0000000..e2f39b7 Binary files /dev/null and b/tests/Integration/fixtures/test-document.pdf differ diff --git a/tests/LLMConversationTest.php b/tests/LLMConversationTest.php index eaf8388..4d6046d 100644 --- a/tests/LLMConversationTest.php +++ b/tests/LLMConversationTest.php @@ -132,4 +132,33 @@ public function testWithMessage(): void { $this->assertCount(2, $deserializedConversation->getMessages()); } + + public function testErrorFlagSurvivesSerializationRoundTrip(): void { + $conversation = new LLMConversation([ + LLMMessage::createFromUser(new LLMMessageContents([ + new LLMMessageToolResult('tool-1', LLMMessageContents::fromErrorString('ERROR: something failed')), + ])), + ]); + + $data = json_decode(json_encode($conversation, JSON_THROW_ON_ERROR), true, 512, JSON_THROW_ON_ERROR); + $deserialized = LLMConversation::fromJson($data); + + $toolResult = $deserialized->getMessages()[0]->getContents()[0]; + $this->assertInstanceOf(LLMMessageToolResult::class, $toolResult); + $this->assertTrue($toolResult->getContent()->isError()); + } + + public function testFromJsonAcceptsLegacyContentFormat(): void { + // Before the isError flag was serialized, contents were stored as a plain list + $legacyContents = [ + ['class' => LLMMessageText::class, 'data' => ['text' => 'Hello', 'cached' => false]], + ]; + + $contents = LLMMessageContents::fromJson($legacyContents); + + $this->assertCount(1, $contents); + $this->assertInstanceOf(LLMMessageText::class, $contents[0]); + $this->assertSame('Hello', $contents[0]->getText()); + $this->assertFalse($contents->isError()); + } } diff --git a/tests/MarkdownFormatterTest.php b/tests/MarkdownFormatterTest.php new file mode 100644 index 0000000..c97b3c9 --- /dev/null +++ b/tests/MarkdownFormatterTest.php @@ -0,0 +1,79 @@ +formatter = new MarkdownFormatter(); + } + + private function createRequest(): LLMRequest { + return new LLMRequest( + model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001), + conversation: new LLMConversation([ + LLMMessage::createFromSystem(LLMMessageContents::fromString('You are a helpful assistant')), + LLMMessage::createFromUserString('What is 2+2?'), + LLMMessage::createFromAssistant(new LLMMessageContents([ + new LLMMessageToolUse('tool-1', 'calculator', ['expression' => '2+2']), + ])), + LLMMessage::createFromUser(new LLMMessageContents([ + new LLMMessageToolResult('tool-1', LLMMessageContents::fromArrayData(['result' => 4])), + ])), + LLMMessage::createFromAssistantString('The answer is 4'), + ]), + ); + } + + public function testRequestFormatting(): void { + $markdown = $this->formatter->responseToMarkdown($this->createRequest()); + + $this->assertStringContainsString(' - **Model:** claude-haiku-4-5-20251001', $markdown); + // Each role gets its own heading (system messages used to render as "## User:") + $this->assertStringContainsString('## System:', $markdown); + $this->assertStringContainsString('## User:', $markdown); + $this->assertStringContainsString('## Assistant:', $markdown); + $this->assertStringContainsString('You are a helpful assistant', $markdown); + $this->assertStringContainsString('**Tool use:** calculator (tool-1)', $markdown); + $this->assertStringContainsString('**Tool result:** tool-1', $markdown); + $this->assertStringContainsString('The answer is 4', $markdown); + } + + public function testResponseFormattingIncludesStats(): void { + $response = new LLMResponse($this->createRequest(), StopReason::FINISHED, 1000, 200, 200, 0.5, 0.25, 1500); + + $markdown = $this->formatter->responseToMarkdown($response); + + $this->assertStringContainsString('##### Total stats', $markdown); + $this->assertStringContainsString('prompt tokens: 1000', $markdown); + $this->assertStringContainsString('completion tokens: 200', $markdown); + $this->assertStringContainsString('price: $0.750', $markdown); + $this->assertStringContainsString('Finished in 1.500s', $markdown); + } + + /** + * Models without configured pricing produce null prices - formatting must not fail + */ + public function testResponseFormattingWithNullPrices(): void { + $response = new LLMResponse($this->createRequest(), StopReason::FINISHED, 1000, 200, 200, null, null, 1500); + + $markdown = $this->formatter->responseToMarkdown($response); + + $this->assertStringContainsString('price: $0.000', $markdown); + } +} diff --git a/tests/Stream/GeminiStreamAccumulatorTest.php b/tests/Stream/GeminiStreamAccumulatorTest.php index f50c9d0..9310cfe 100644 --- a/tests/Stream/GeminiStreamAccumulatorTest.php +++ b/tests/Stream/GeminiStreamAccumulatorTest.php @@ -25,19 +25,21 @@ public function testTextOnlyResponse(): void { $result = GeminiStreamAccumulator::consume(Utils::streamFor($sse), $listener); - // Verify reconstructed response - $this->assertCount(2, $result['candidates'][0]['content']['parts']); - $this->assertEquals('Hello', $result['candidates'][0]['content']['parts'][0]['text']); - $this->assertEquals(' world', $result['candidates'][0]['content']['parts'][1]['text']); + // Consecutive text chunks must be merged into a single part so the reconstructed + // response matches the non-streaming format (getLastText() returns the full text) + $this->assertCount(1, $result['candidates'][0]['content']['parts']); + $this->assertEquals('Hello world', $result['candidates'][0]['content']['parts'][0]['text']); $this->assertEquals('STOP', $result['candidates'][0]['finishReason']); $this->assertEquals(10, $result['usageMetadata']['promptTokenCount']); $this->assertEquals(5, $result['usageMetadata']['candidatesTokenCount']); - // Verify listener events + // Listener still receives one delta per chunk, all within the same block $textDeltas = array_values(array_filter($events, fn(StreamEvent $e) => $e->type === StreamEventType::TEXT_DELTA)); $this->assertCount(2, $textDeltas); $this->assertEquals('Hello', $textDeltas[0]->delta); + $this->assertEquals(0, $textDeltas[0]->blockIndex); $this->assertEquals(' world', $textDeltas[1]->delta); + $this->assertEquals(0, $textDeltas[1]->blockIndex); $this->assertEquals(StreamEventType::MESSAGE_START, $events[0]->type); $this->assertEquals(StreamEventType::MESSAGE_COMPLETE, $events[array_key_last($events)]->type);