diff --git a/.env.example b/.env.example
index 6ec7ef8..14f0039 100644
--- a/.env.example
+++ b/.env.example
@@ -17,6 +17,10 @@ GEMINI_API_KEY=xxxxx
 # Get your key from: https://console.scaleway.com/
 SCALEWAY_API_KEY=xxxxx
 
+# OpenRouter API Key (OpenAI compatible)
+# Get your key from: https://openrouter.ai/keys
+OPENROUTER_API_KEY=sk-or-xxxxx
+
 # Optional: Set to 'true' to enable verbose output during tests
 INTEGRATION_TEST_VERBOSE=false
 
diff --git a/Readme.md b/Readme.md
index 71f661b..6ebbe1f 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,5 +1,8 @@
 # PHP LLM - Agentic AI Framework for PHP
 
+[![Latest Version](https://img.shields.io/packagist/v/soukicz/llm.svg)](https://packagist.org/packages/soukicz/llm)
+[![License](https://img.shields.io/packagist/l/soukicz/llm.svg)](https://packagist.org/packages/soukicz/llm)
+
 Build powerful **AI agents** that can use tools, self-correct, and take autonomous actions. A unified PHP framework for Large Language Models with support for Anthropic Claude, OpenAI GPT, Google Gemini, and more.
 
 > **What is Agentic AI?** Agents that can call functions, validate outputs, iterate on responses, and make decisions autonomously - not just generate text.
@@ -24,7 +27,8 @@ composer require soukicz/llm
 - 📝 **Built-in Tools** - TextEditorTool for file manipulation, embeddings API, and more
 - ✅ **Self-Correcting** - Validate and refine outputs with feedback loops
 - 📸 **Multimodal** - Process images and PDFs alongside text (with caching support)
-- 🧠 **Reasoning Models** - Advanced thinking with o3 and o4-mini reasoning models
+- 🧠 **Reasoning Models** - OpenAI reasoning models, Anthropic extended thinking, and Gemini thinking
+- 📐 **Structured Output** - JSON Schema enforced responses across Anthropic, OpenAI, and Gemini
 - 📡 **Streaming** - Real-time response streaming with optional listener for live progress updates
 - ⚡ **Async & Caching** - Fast, cost-effective operations with prompt caching
 - 💾 **State Persistence** - Save and resume conversations with thread IDs
@@ -44,13 +48,18 @@ All LLM clients in this library are **asynchronous by default** using Guzzle Pro
 - **Agent Client** (`LLMAgentClient`) - High-level orchestrator that handles multi-turn conversations, automatic tool calling, feedback loops, and retries. Use this for building agents that need to iterate or use tools.
 
 ### Model Versions
-Anthropic and OpenAI models require explicit version constants:
+Many Anthropic and OpenAI models pin an explicit version constant:
+```php
+<?php
+new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001)
+new GPT54(GPT54::VERSION_2026_03_05)
+```
+The newest Anthropic models (e.g. Claude 4.6) and Google Gemini models do NOT require versions - just instantiate them directly:
 ```php
 <?php
-new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929)
-new GPTo3(GPTo3::VERSION_2025_04_16)
+new AnthropicClaude46Sonnet()
+new Gemini25Flash()
 ```
-Google Gemini models do NOT require versions - just instantiate them directly.
 
 ### Conversations & State
 `LLMConversation` manages the message history and can be serialized/deserialized for persistence. Each conversation has an optional `threadId` (UUID) for tracking across sessions.
@@ -63,7 +72,7 @@ require_once __DIR__ . '/vendor/autoload.php';
 
 use Soukicz\Llm\Cache\FileCache;
 use Soukicz\Llm\Client\Anthropic\AnthropicClient;
-use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
 use Soukicz\Llm\Client\LLMAgentClient;
 use Soukicz\Llm\Message\LLMMessage;
 use Soukicz\Llm\LLMConversation;
@@ -82,7 +91,7 @@ $agentClient = new LLMAgentClient();
 $response = $agentClient->run(
     client: $client,
     request: new LLMRequest(
-        model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+        model: new AnthropicClaude46Sonnet(),
         conversation: new LLMConversation([
             LLMMessage::createFromUserString('What is PHP?')
         ]),
@@ -119,7 +128,7 @@ $client = new AnthropicClient(
     apiKey: 'sk-ant-xxxxx',
     cache: $cache,
     customHttpMiddleware: null,
-    betaFeatures: [] // e.g., ['text-editor-20250116'] for TextEditorTool
+    betaFeatures: [] // Optional Anthropic beta feature flags
 );
 
 // OpenAI (organization parameter is required)
@@ -254,19 +263,19 @@ Use advanced reasoning for complex problems:
 ```php
 use Soukicz\Llm\Config\ReasoningEffort;
 use Soukicz\Llm\Config\ReasoningBudget;
-use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet;
-use Soukicz\Llm\Client\OpenAI\Model\GPT5;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Client\OpenAI\Model\GPT54;
 
-// Control reasoning with effort level (for supported models)
+// Control reasoning with effort level (OpenAI, Anthropic, and Gemini)
 $request = new LLMRequest(
-    model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+    model: new GPT54(GPT54::VERSION_2026_03_05),
     conversation: $conversation,
-    reasoningConfig: ReasoningEffort::HIGH // LOW, MEDIUM, or HIGH
+    reasoningConfig: ReasoningEffort::HIGH // NONE, MINIMAL, LOW, MEDIUM, HIGH, or EXTRA_HIGH
 );
 
-// Or use token-based budget control (for supported models)
+// Or use token-based budget control (Anthropic only)
 $request = new LLMRequest(
-    model: new GPT5(GPT5::VERSION_2025_08_07),
+    model: new AnthropicClaude46Sonnet(),
     conversation: $conversation,
     reasoningConfig: new ReasoningBudget(10000) // Max reasoning tokens
 );
@@ -274,6 +283,37 @@ $request = new LLMRequest(
 
 **→ [Reasoning Models Documentation](docs/guides/reasoning.md)**
 
+### 📐 Structured Output
+
+Force responses to match a JSON Schema and get them back as a PHP array - supported by Anthropic, OpenAI, and Gemini:
+
+```php
+use Soukicz\Llm\Config\StructuredOutputConfig;
+
+$response = $agentClient->run($client, new LLMRequest(
+    model: new AnthropicClaude46Sonnet(),
+    conversation: new LLMConversation([
+        LLMMessage::createFromUserString('Extract user data: John Doe, age 30, email john@example.com')
+    ]),
+    structuredOutputConfig: new StructuredOutputConfig([
+        'type' => 'object',
+        'properties' => [
+            'name' => ['type' => 'string'],
+            'age' => ['type' => 'integer'],
+            'email' => ['type' => 'string'],
+        ],
+        'required' => ['name', 'age', 'email'],
+        'additionalProperties' => false,
+    ]),
+));
+
+$data = $response->getLastStructuredData(); // ['name' => 'John Doe', 'age' => 30, 'email' => 'john@example.com']
+```
+
+> **Tip:** Strict schema validation is enabled by default - pass `strict: false` to relax it.
+
+**→ [Structured Output Documentation](docs/guides/structured-output.md)**
+
 ## Advanced Features
 
 ### 📝 TextEditorTool - Built-in File Manipulation
@@ -281,22 +321,21 @@ $request = new LLMRequest(
 Empower agents to read, write, and manage files with the built-in TextEditorTool:
 
 ```php
-use Soukicz\Llm\Tool\TextEditorTool;
-use Soukicz\Llm\Tool\TextEditorStorageFilesystem;
+use Soukicz\Llm\Tool\TextEditor\TextEditorTool;
+use Soukicz\Llm\Tool\TextEditor\TextEditorStorageFilesystem;
 
 // Create filesystem storage with sandboxing
 $storage = new TextEditorStorageFilesystem('/safe/workspace/path');
 $textEditorTool = new TextEditorTool($storage);
 
-// Enable for Anthropic Claude with beta features
+// Works out of the box with Anthropic Claude - no beta flags needed on modern models
 $client = new AnthropicClient(
     apiKey: 'sk-ant-xxxxx',
-    cache: $cache,
-    betaFeatures: ['text-editor-20250116'] // Required for TextEditorTool
+    cache: $cache
 );
 
 $response = $agentClient->run($client, new LLMRequest(
-    model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+    model: new AnthropicClaude46Sonnet(),
     conversation: new LLMConversation([
         LLMMessage::createFromUserString('Create a PHP file with a hello world function')
     ]),
@@ -333,11 +372,17 @@ Built-in interfaces for logging and monitoring:
 
 ```php
 use Soukicz\Llm\Log\LLMLogger;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\LLMResponse;
 
 // Implement custom logger
 class MyLogger implements LLMLogger {
-    public function log(LLMRequest $request, LLMResponse $response): void {
-        // Log requests, responses, costs, tokens, etc.
+    public function requestStarted(LLMRequest $request): void {
+        echo "Request started\n";
+    }
+
+    public function requestFinished(LLMResponse $response): void {
+        // Log responses, costs, tokens, etc.
         $cost = ($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0);
         echo "Cost: $" . $cost . "\n";
         echo "Tokens: {$response->getInputTokens()} in, {$response->getOutputTokens()} out\n";
@@ -365,10 +410,10 @@ $request = new LLMRequest(
     // Custom stop sequences to halt generation
     stopSequences: ['END', '---'],
 
-    // Reasoning configuration (for o3/o4-mini models)
+    // Reasoning configuration (OpenAI reasoning models, Anthropic extended thinking, Gemini thinking)
     reasoningConfig: ReasoningEffort::HIGH,
-    // OR
-    reasoningConfig: new ReasoningBudget(10000),
+    // OR token-based budget (Anthropic only):
+    // reasoningConfig: new ReasoningBudget(10000),
 
     // Optional: Stream responses for real-time progress
     // streamListener: new CallableStreamListener(fn($e) => print($e->delta)),
@@ -380,14 +425,14 @@ $cost = ($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?
 echo "Cost: $" . $cost . "\n";
 echo "Input tokens: " . $response->getInputTokens() . "\n";
 echo "Output tokens: " . $response->getOutputTokens() . "\n";
-echo "Stop reason: " . $response->getStopReason()->value . "\n"; // END_TURN, TOOL_USE, MAX_TOKENS, STOP_SEQUENCE
+echo "Stop reason: " . $response->getStopReason()->value . "\n"; // FINISHED, TOOL_USE, LENGTH, SAFETY
 ```
 
 ## Supported Providers
 
-- **Anthropic (Claude)** - Claude 3.5, 3.7, 4.0, 4.1, and 4.5 series models
-- **OpenAI (GPT)** - GPT-4o, GPT-4.1, o3 and o4-mini (reasoning), and GPT-5 series models
-- **Google Gemini** - Gemini 2.0 and 2.5 series models
+- **Anthropic (Claude)** - Claude 3.5 through 4.6 series models
+- **OpenAI (GPT)** - GPT-4o, GPT-4.1, o3 and o4-mini (reasoning), and GPT-5 through GPT-5.4 series models
+- **Google Gemini** - Gemini 2.0 through 3.x series models
 - **OpenAI-Compatible** - OpenRouter, local servers (Ollama, llama-server), and more
 - **AWS Bedrock** - Via separate package ([`soukicz/llm-aws-bedrock`](https://github.com/soukicz/llm-aws-bedrock))
 
@@ -406,7 +451,8 @@ echo "Stop reason: " . $response->getStopReason()->value . "\n"; // END_TURN, TO
 - [Feedback Loops](docs/guides/feedback-loops.md) - Self-correcting agents and validation
 - [Multimodal Support](docs/guides/multimodal.md) - Images, PDFs, and caching
 - [Streaming](docs/guides/streaming.md) - Real-time response streaming with progress listeners
-- [Reasoning Models](docs/guides/reasoning.md) - o3/o4-mini with effort and budget control
+- [Reasoning Models](docs/guides/reasoning.md) - Reasoning and extended thinking with effort and budget control
+- [Structured Output](docs/guides/structured-output.md) - JSON Schema enforced responses
 
 ### Advanced Features
 - [Caching](docs/guides/caching.md) - Prompt caching and cost reduction
@@ -465,6 +511,8 @@ $response = $agentClient->run($client, new LLMRequest(
 ### Self-Correcting JSON Parser
 ```php
 // Agent that validates and corrects its own output
+$iterations = 0;
+
 $response = $agentClient->run(
     client: $client,
     request: new LLMRequest(
@@ -473,7 +521,11 @@ $response = $agentClient->run(
             LLMMessage::createFromUserString('Extract user data as JSON: John Doe, age 30, email john@example.com')
         ])
     ),
-    feedbackCallback: function ($response) {
+    feedbackCallback: function ($response) use (&$iterations) {
+        if (++$iterations >= 3) {
+            return null; // Limit retry attempts
+        }
+
         $text = $response->getLastText();
         json_decode($text);
 
@@ -484,8 +536,7 @@ $response = $agentClient->run(
         }
 
         return null; // Valid JSON, stop iteration
-    },
-    maxIterations: 3 // Limit retry attempts
+    }
 );
 ```
 
@@ -498,7 +549,7 @@ $chartData = base64_encode(file_get_contents('/sales-chart.png'));
 $reportData = base64_encode(file_get_contents('/quarterly-report.pdf'));
 
 $response = $agentClient->run($client, new LLMRequest(
-    model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+    model: new AnthropicClaude46Sonnet(),
     conversation: new LLMConversation([
         LLMMessage::createFromUser(new LLMMessageContents([
             new LLMMessageText('Analyze these documents and summarize the key insights'),
@@ -582,7 +633,7 @@ This project is open-sourced software licensed under the BSD-3-Clause license.
 
 ## Links
 
-- [Documentation](docs/) - Full documentation
+- [Documentation](https://soukicz.github.io/php-llm/) - Full documentation
 - [GitHub](https://github.com/soukicz/llm) - Source code
 - [Packagist](https://packagist.org/packages/soukicz/llm) - Composer package
 
diff --git a/docs/examples/best-practices.md b/docs/examples/best-practices.md
index b6f0bd2..b0adb8a 100644
--- a/docs/examples/best-practices.md
+++ b/docs/examples/best-practices.md
@@ -135,6 +135,8 @@ For example, if an agent needs to fetch data from three different sources, runni
 
 ```php
 <?php
+use GuzzleHttp\Promise\Utils;
+
 // Process multiple requests concurrently
 $promises = [];
 
@@ -151,7 +153,7 @@ foreach ($items as $item) {
 }
 
 // Wait for all to complete
-$responses = Promise\Utils::all($promises)->wait();
+$responses = Utils::all($promises)->wait();
 
 // Process results
 foreach ($responses as $response) {
diff --git a/docs/examples/index.md b/docs/examples/index.md
index 8a43b7c..c52c24c 100644
--- a/docs/examples/index.md
+++ b/docs/examples/index.md
@@ -4,7 +4,7 @@ Practical, copy-paste ready examples to help you get started with PHP LLM and bu
 
 ## Getting Started
 
-- **[Quick Start](quick-start.md)** - Get up and running in minutes with basic examples for simple synchronous requests, conversation management, and streaming responses.
+- **[Quick Start](quick-start.md)** - Get up and running in minutes with basic examples for simple synchronous requests, async requests, and conversation management.
 
 ## Core Functionality
 
@@ -24,7 +24,7 @@ Practical, copy-paste ready examples to help you get started with PHP LLM and bu
 
 These examples cover:
 - **Basic usage**: Simple requests, conversations
-- **Advanced features**: Tools, multimodal, [streaming](../guides/streaming.md), caching, reasoning models
+- **Advanced features**: Tools, multimodal, caching, reasoning models (see also the [streaming guide](../guides/streaming.md))
 - **Production patterns**: Error handling, logging, retries, resilience
 - **Best practices**: Security, performance, cost optimization
 - **Real-world scenarios**: Practical code you can adapt to your needs
diff --git a/docs/examples/logging-debugging.md b/docs/examples/logging-debugging.md
index 82069d5..0c3151f 100644
--- a/docs/examples/logging-debugging.md
+++ b/docs/examples/logging-debugging.md
@@ -12,36 +12,32 @@ use Soukicz\Llm\MarkdownFormatter;
 
 $formatter = new MarkdownFormatter();
 
-// Format response
+// Format a response (includes the request parameters, the full conversation and stats)
 $markdown = $formatter->responseToMarkdown($response);
 echo $markdown;
 
-// Format request
-$markdown = $formatter->requestToMarkdown($request);
+// The same method also accepts a request (e.g. before a response is available)
+$markdown = $formatter->responseToMarkdown($request);
 echo $markdown;
 ```
 
 **Sample Output:**
 
 ```markdown
-## Request
-**Model:** claude-sonnet-4-5-20250929
-**Temperature:** 1.0
-**Messages:** 2
-
-### User
+ - **Model:** claude-sonnet-4-6
+ - **Temperature:** 0
+ - **Max tokens:** 4096
+## User:
 What is the capital of France?
 
----
+## Assistant:
+The capital of France is Paris.
+
+----------------------
 
-## Response
-**Stop Reason:** end_turn
-**Input Tokens:** 15
-**Output Tokens:** 8
-**Cost:** $0.000345
+##### Total stats
 
-### Assistant
-The capital of France is Paris.
+Finished in 1.823s, prompt tokens: 15, completion tokens: 8, maximum completion tokens: 4096, total tokens: 23, price: $0.000
 ```
 
 ## Custom Logger
@@ -64,7 +60,7 @@ readonly class LLMFileLogger implements LLMLogger {
     }
 
     public function requestStarted(LLMRequest $request): void {
-        $markdown = $this->formatter->requestToMarkdown($request);
+        $markdown = $this->formatter->responseToMarkdown($request);
         file_put_contents($this->logPath, $markdown . "\n\n", FILE_APPEND);
     }
 
@@ -143,8 +139,8 @@ $agentClient = new LLMAgentClient($logger);
 **Sample Log Output:**
 
 ```
-[2025-01-15 10:23:45] llm.INFO: LLM Request Started {"model":"claude-sonnet-4-5-20250929","messages":1}
-[2025-01-15 10:23:47] llm.INFO: LLM Request Finished {"model":"claude-sonnet-4-5-20250929","input_tokens":15,"output_tokens":8,"cost":0.000345,"response_time_ms":1823}
+[2026-06-12 10:23:45] llm.INFO: LLM Request Started {"model":"claude-sonnet-4-6","messages":1}
+[2026-06-12 10:23:47] llm.INFO: LLM Request Finished {"model":"claude-sonnet-4-6","input_tokens":15,"output_tokens":8,"cost":0.000345,"response_time_ms":1823}
 ```
 
 ## HTTP Middleware Logging
@@ -190,7 +186,7 @@ try {
 } catch (LLMClientException $e) {
     // Log error details
     error_log("LLM Error: " . $e->getMessage());
-    error_log("Request: " . $formatter->requestToMarkdown($request));
+    error_log("Request: " . $formatter->responseToMarkdown($request));
 
     // Check if it's a rate limit
     if ($e->getCode() === 429) {
@@ -210,13 +206,16 @@ class PerformanceLogger implements LLMLogger {
     private array $timings = [];
 
     public function requestStarted(LLMRequest $request): void {
-        $this->timings[spl_object_id($request)] = microtime(true);
+        // Key by the conversation thread ID: the LLMRequest available in
+        // requestFinished() is a different (cloned) object, so spl_object_id()
+        // would not match between the two callbacks
+        $this->timings[$request->getConversation()->getThreadId()] = microtime(true);
     }
 
     public function requestFinished(LLMResponse $response): void {
-        $requestId = spl_object_id($response->getRequest());
-        $duration = isset($this->timings[$requestId])
-            ? (microtime(true) - $this->timings[$requestId]) * 1000
+        $threadId = $response->getRequest()->getConversation()->getThreadId();
+        $duration = isset($this->timings[$threadId])
+            ? (microtime(true) - $this->timings[$threadId]) * 1000
             : $response->getTotalTimeMs();
 
         $totalTokens = $response->getInputTokens() + $response->getOutputTokens();
@@ -230,7 +229,7 @@ class PerformanceLogger implements LLMLogger {
             $totalCost
         );
 
-        unset($this->timings[$requestId]);
+        unset($this->timings[$threadId]);
     }
 }
 ```
@@ -238,8 +237,8 @@ class PerformanceLogger implements LLMLogger {
 **Sample Output:**
 
 ```
-Request claude-sonnet-4-5-20250929: 1823ms, 23 tokens, $0.000345
-Request gpt-5-2025-08-07: 956ms, 45 tokens, $0.000890
+Request claude-sonnet-4-6: 1823ms, 23 tokens, $0.000345
+Request gpt-5.4-2026-03-05: 956ms, 45 tokens, $0.000890
 Request gemini-2.5-pro: 1245ms, 31 tokens, $0.000520
 ```
 
@@ -253,15 +252,15 @@ class DebugLogger implements LLMLogger {
     public function requestStarted(LLMRequest $request): void {
         echo "=== REQUEST STARTED ===\n";
         echo "Model: " . $request->getModel()->getCode() . "\n";
-        echo "Temperature: " . ($request->getTemperature() ?? 'default') . "\n";
-        echo "Max Tokens: " . ($request->getMaxTokens() ?? 'default') . "\n";
+        echo "Temperature: " . $request->getTemperature() . "\n";
+        echo "Max Tokens: " . $request->getMaxTokens() . "\n";
         echo "Messages: " . count($request->getConversation()->getMessages()) . "\n";
         echo "Tools: " . count($request->getTools()) . "\n\n";
     }
 
     public function requestFinished(LLMResponse $response): void {
         echo "=== REQUEST FINISHED ===\n";
-        echo "Stop Reason: " . $response->getStopReason() . "\n";
+        echo "Stop Reason: " . $response->getStopReason()->value . "\n";
         echo "Response Time: " . $response->getTotalTimeMs() . "ms\n";
         echo "Input Tokens: " . $response->getInputTokens() . "\n";
         echo "Output Tokens: " . $response->getOutputTokens() . "\n";
@@ -279,14 +278,14 @@ class DebugLogger implements LLMLogger {
 
 ```
 === REQUEST STARTED ===
-Model: claude-sonnet-4-5-20250929
-Temperature: 1.0
-Max Tokens: 2048
+Model: claude-sonnet-4-6
+Temperature: 0
+Max Tokens: 4096
 Messages: 1
 Tools: 0
 
 === REQUEST FINISHED ===
-Stop Reason: end_turn
+Stop Reason: finished
 Response Time: 1823ms
 Input Tokens: 15
 Output Tokens: 8
@@ -321,7 +320,7 @@ class JSONLogger implements LLMLogger {
             'output_cost' => $outputCost,
             'total_cost' => $inputCost + $outputCost,
             'response_time_ms' => $response->getTotalTimeMs(),
-            'stop_reason' => $response->getStopReason(),
+            'stop_reason' => $response->getStopReason()->value,
         ];
 
         file_put_contents(
@@ -336,9 +335,9 @@ class JSONLogger implements LLMLogger {
 **Sample Log Output (llm.json):**
 
 ```json
-{"timestamp":"2025-01-15T10:23:47+00:00","model":"claude-sonnet-4-5-20250929","input_tokens":15,"output_tokens":8,"total_tokens":23,"input_cost":0.000045,"output_cost":0.0003,"total_cost":0.000345,"response_time_ms":1823,"stop_reason":"end_turn"}
-{"timestamp":"2025-01-15T10:24:12+00:00","model":"gpt-5-2025-08-07","input_tokens":22,"output_tokens":45,"total_tokens":67,"input_cost":0.00011,"output_cost":0.00078,"total_cost":0.00089,"response_time_ms":956,"stop_reason":"stop"}
-{"timestamp":"2025-01-15T10:25:03+00:00","model":"gemini-2.5-pro","input_tokens":18,"output_tokens":31,"total_tokens":49,"input_cost":0.00009,"output_cost":0.00043,"total_cost":0.00052,"response_time_ms":1245,"stop_reason":"STOP"}
+{"timestamp":"2026-06-12T10:23:47+00:00","model":"claude-sonnet-4-6","input_tokens":15,"output_tokens":8,"total_tokens":23,"input_cost":0.000045,"output_cost":0.0003,"total_cost":0.000345,"response_time_ms":1823,"stop_reason":"finished"}
+{"timestamp":"2026-06-12T10:24:12+00:00","model":"gpt-5.4-2026-03-05","input_tokens":22,"output_tokens":45,"total_tokens":67,"input_cost":0.00011,"output_cost":0.00078,"total_cost":0.00089,"response_time_ms":956,"stop_reason":"finished"}
+{"timestamp":"2026-06-12T10:25:03+00:00","model":"gemini-2.5-pro","input_tokens":18,"output_tokens":31,"total_tokens":49,"input_cost":0.00009,"output_cost":0.00043,"total_cost":0.00052,"response_time_ms":1245,"stop_reason":"finished"}
 ```
 
 This format is ideal for log aggregation tools like ELK stack, Splunk, or DataDog.
diff --git a/docs/examples/multimodal.md b/docs/examples/multimodal.md
index 9493519..dad686a 100644
--- a/docs/examples/multimodal.md
+++ b/docs/examples/multimodal.md
@@ -73,12 +73,12 @@ function analyzeUIScreenshot(string $screenshotPath): array {
             conversation: new LLMConversation([
                 LLMMessage::createFromUser(new LLMMessageContents([
                     new LLMMessageText(
-                        'Analyze this UI screenshot and provide:\n' .
-                        '1. Accessibility issues (contrast, font sizes, etc.)\n' .
-                        '2. Layout problems (alignment, spacing, overlapping)\n' .
-                        '3. Responsive design concerns\n' .
-                        '4. UX improvement suggestions\n\n' .
-                        'Format as a structured list with severity levels.'
+                        "Analyze this UI screenshot and provide:\n" .
+                        "1. Accessibility issues (contrast, font sizes, etc.)\n" .
+                        "2. Layout problems (alignment, spacing, overlapping)\n" .
+                        "3. Responsive design concerns\n" .
+                        "4. UX improvement suggestions\n\n" .
+                        "Format as a structured list with severity levels."
                     ),
                     new LLMMessageImage('base64', 'image/png', $imageData)
                 ]))
@@ -194,13 +194,13 @@ function reviewContract(string $contractPdfPath): array {
             conversation: new LLMConversation([
                 LLMMessage::createFromUser(new LLMMessageContents([
                     new LLMMessageText(
-                        'Review this contract and provide:\n' .
-                        '1. Key terms (parties, dates, amounts)\n' .
-                        '2. Obligations and responsibilities\n' .
-                        '3. Termination clauses\n' .
-                        '4. Potential red flags or unusual clauses\n' .
-                        '5. Missing standard clauses\n\n' .
-                        'Format as a structured report.'
+                        "Review this contract and provide:\n" .
+                        "1. Key terms (parties, dates, amounts)\n" .
+                        "2. Obligations and responsibilities\n" .
+                        "3. Termination clauses\n" .
+                        "4. Potential red flags or unusual clauses\n" .
+                        "5. Missing standard clauses\n\n" .
+                        "Format as a structured report."
                     ),
                     new LLMMessagePdf('base64', $pdfData)
                 ]))
@@ -233,13 +233,13 @@ function summarizeResearchPaper(string $paperPdfPath): string {
             conversation: new LLMConversation([
                 LLMMessage::createFromUser(new LLMMessageContents([
                     new LLMMessageText(
-                        'Summarize this research paper. Include:\n' .
-                        '- Research question/hypothesis\n' .
-                        '- Methodology\n' .
-                        '- Key findings\n' .
-                        '- Conclusions\n' .
-                        '- Limitations\n\n' .
-                        'Write for a technical but non-specialist audience (max 500 words).'
+                        "Summarize this research paper. Include:\n" .
+                        "- Research question/hypothesis\n" .
+                        "- Methodology\n" .
+                        "- Key findings\n" .
+                        "- Conclusions\n" .
+                        "- Limitations\n\n" .
+                        "Write for a technical but non-specialist audience (max 500 words)."
                     ),
                     new LLMMessagePdf('base64', $pdfData)
                 ]))
@@ -476,8 +476,9 @@ try {
 <?php
 function supportsMultimodal($model): bool {
     // Check if model supports images/PDFs
-    return $model instanceof AnthropicClaude45Sonnet ||
-           $model instanceof GPT5 ||
+    // (PDF input is supported by Anthropic, OpenAI and Gemini models)
+    return $model instanceof AnthropicClaude46Sonnet ||
+           $model instanceof GPT54 ||
            $model instanceof Gemini25Pro;
 }
 
diff --git a/docs/examples/quick-start.md b/docs/examples/quick-start.md
index ba9226a..1f7e9ba 100644
--- a/docs/examples/quick-start.md
+++ b/docs/examples/quick-start.md
@@ -18,7 +18,7 @@ require_once __DIR__ . '/vendor/autoload.php';
 
 use Soukicz\Llm\Cache\FileCache;
 use Soukicz\Llm\Client\Anthropic\AnthropicClient;
-use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
 use Soukicz\Llm\Client\LLMAgentClient;
 use Soukicz\Llm\Message\LLMMessage;
 use Soukicz\Llm\LLMConversation;
@@ -37,7 +37,7 @@ $agentClient = new LLMAgentClient();
 $response = $agentClient->run(
     client: $anthropic,
     request: new LLMRequest(
-        model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+        model: new AnthropicClaude46Sonnet(),
         conversation: new LLMConversation([
             LLMMessage::createFromUserString('What is PHP?')
         ]),
@@ -60,7 +60,7 @@ use Soukicz\Llm\LLMResponse;
 $promise = $agentClient->runAsync(
     client: $anthropic,
     request: new LLMRequest(
-        model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+        model: new AnthropicClaude46Sonnet(),
         conversation: new LLMConversation([
             LLMMessage::createFromUserString('Explain async programming')
         ]),
@@ -90,18 +90,17 @@ $conversation = new LLMConversation([
 $response = $agentClient->run(
     client: $anthropic,
     request: new LLMRequest(
-        model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+        model: new AnthropicClaude46Sonnet(),
         conversation: $conversation,
     )
 );
 
 echo "AI: " . $response->getLastText() . "\n"; // "4"
 
-// Add AI response to conversation (returns new instance)
-$conversation = $conversation->withMessage($response->getLastMessage());
-
+// The response's conversation already includes the assistant reply
+// (and any tool use/result messages), so continue from there.
 // Add user's follow-up question (returns new instance)
-$conversation = $conversation->withMessage(
+$conversation = $response->getConversation()->withMessage(
     LLMMessage::createFromUserString('What about 2 * 2?')
 );
 
@@ -109,7 +108,7 @@ $conversation = $conversation->withMessage(
 $response = $agentClient->run(
     client: $anthropic,
     request: new LLMRequest(
-        model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+        model: new AnthropicClaude46Sonnet(),
         conversation: $conversation,
     )
 );
@@ -126,14 +125,14 @@ PHP LLM provides a unified interface across multiple LLM providers. Simply swap
 ```php
 <?php
 use Soukicz\Llm\Client\OpenAI\OpenAIClient;
-use Soukicz\Llm\Client\OpenAI\Model\GPT5;
+use Soukicz\Llm\Client\OpenAI\Model\GPT54;
 
 $openai = new OpenAIClient('sk-xxxxx', 'org-xxxxx', $cache);
 
 $response = $agentClient->run(
     client: $openai,
     request: new LLMRequest(
-        model: new GPT5(GPT5::VERSION_2025_08_07),
+        model: new GPT54(GPT54::VERSION_2026_03_05),
         conversation: $conversation,
     )
 );
@@ -172,7 +171,7 @@ $client = new OpenAICompatibleClient(
 $response = $agentClient->run(
     client: $client,
     request: new LLMRequest(
-        model: new LocalModel('anthropic/claude-3.5-sonnet'),
+        model: new LocalModel('anthropic/claude-haiku-4.5'),
         conversation: $conversation,
     )
 );
diff --git a/docs/examples/state-management.md b/docs/examples/state-management.md
index 7aa296e..5b134fa 100644
--- a/docs/examples/state-management.md
+++ b/docs/examples/state-management.md
@@ -8,6 +8,8 @@ Save and resume AI agent conversations using JSON serialization. Conversation st
 
 **Immutability**: Remember that `LLMConversation` is immutable - use `withMessage()` to add messages, which returns a new instance.
 
+**Thread ID**: Every `LLMConversation` has a thread ID - an auto-generated UUID, or a value you pass as the second constructor argument (`new LLMConversation([], 'my-thread-id')`). It is preserved by `withMessage()` and JSON serialization, and `getThreadId()` makes it a convenient key for persisting conversations.
+
 ## Saving Conversations
 
 ```php
@@ -165,7 +167,7 @@ $_SESSION['conversation'] = json_encode($conversation);
 // Load from session
 $conversation = isset($_SESSION['conversation'])
     ? LLMConversation::fromJson(json_decode($_SESSION['conversation'], true))
-    : new LLMConversation();
+    : new LLMConversation([]);
 ```
 
 ## Conversation History Management
@@ -188,10 +190,10 @@ function trimConversation(LLMConversation $conversation, int $maxMessages): LLMC
         return $conversation;
     }
 
-    // Keep most recent messages
+    // Keep most recent messages (preserve the thread ID)
     $trimmedMessages = array_slice($messages, -$maxMessages);
 
-    return new LLMConversation($trimmedMessages);
+    return new LLMConversation($trimmedMessages, $conversation->getThreadId());
 }
 
 $conversation = trimConversation($conversation, 20); // Keep last 20 messages
@@ -259,7 +261,7 @@ class ChatService {
         }
 
         // Create new conversation
-        $conversation = new LLMConversation();
+        $conversation = new LLMConversation([]);
         $this->saveConversation($userId, $conversation);
 
         return $conversation;
@@ -281,9 +283,9 @@ class ChatService {
             )
         );
 
-        // Add AI response (immutable - returns new instance)
-        $conversation = $conversation->withMessage($response->getLastMessage());
-        $this->saveConversation($userId, $conversation);
+        // The response's conversation already includes the assistant reply
+        // (and any tool use/result messages) - persist that
+        $this->saveConversation($userId, $response->getConversation());
 
         return $response->getLastText();
     }
diff --git a/docs/examples/tools-and-function-calling.md b/docs/examples/tools-and-function-calling.md
index 1440404..89e6afe 100644
--- a/docs/examples/tools-and-function-calling.md
+++ b/docs/examples/tools-and-function-calling.md
@@ -16,10 +16,12 @@ The LLM decides when to use tools based on your prompts and the tool description
 
 ```php
 <?php
-use Soukicz\Llm\Tool\CallbackToolDefinition;
-use Soukicz\Llm\Message\LLMMessageContents;
-use Soukicz\Llm\Message\LLMMessageText;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\LLMConversation;
 use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Message\LLMMessageContents;
+use Soukicz\Llm\Tool\CallbackToolDefinition;
 
 // Define a simple calculator tool
 $calculator = new CallbackToolDefinition(
@@ -61,7 +63,7 @@ $calculator = new CallbackToolDefinition(
 
 // Use the tool in a request
 $request = new LLMRequest(
-    model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+    model: new AnthropicClaude46Sonnet(),
     conversation: new LLMConversation([
         LLMMessage::createFromUserString('What is 157 * 832?')
     ]),
@@ -196,8 +198,24 @@ $tools = [
     new CallbackToolDefinition(
         name: 'get_weather',
         description: 'Get current weather for a location',
-        inputSchema: [...],
-        handler: fn($input) => // weather logic
+        inputSchema: [
+            'type' => 'object',
+            'properties' => [
+                'city' => [
+                    'type' => 'string',
+                    'description' => 'City name (e.g., "London", "New York")',
+                ],
+            ],
+            'required' => ['city'],
+        ],
+        handler: function (array $input): LLMMessageContents {
+            // ... call your weather API here ...
+            return LLMMessageContents::fromArrayData([
+                'city' => $input['city'],
+                'temperature' => 18,
+                'description' => 'partly cloudy',
+            ]);
+        }
     ),
 
     // Stock price tool
@@ -214,7 +232,14 @@ $tools = [
             ],
             'required' => ['ticker'],
         ],
-        handler: fn($input) => // stock API logic
+        handler: function (array $input): LLMMessageContents {
+            // ... call your stock API here ...
+            return LLMMessageContents::fromArrayData([
+                'ticker' => $input['ticker'],
+                'price' => 187.42,
+                'currency' => 'USD',
+            ]);
+        }
     ),
 
     // Calculator
@@ -237,60 +262,40 @@ $response = $agentClient->run($client, $request);
 
 ## Multi-Step Tool Usage
 
-Handle conversations where the LLM uses tools multiple times:
+You don't need to handle the tool-use loop yourself. `LLMAgentClient::run()` does it automatically: whenever the LLM stops to call a tool (`StopReason::TOOL_USE`), the agent client executes the matching tool handler, appends the tool result to the conversation, and sends a follow-up request - repeating until the LLM produces a final answer. Calls to unknown tools are answered with an error result so the LLM can recover.
 
 ```php
 <?php
-$conversation = new LLMConversation([
-    LLMMessage::createFromUserString('Calculate 50 * 30, then add 100 to the result')
-]);
-
-// First request
 $response = $agentClient->run(
     client: $client,
     request: new LLMRequest(
         model: $model,
-        conversation: $conversation,
+        conversation: new LLMConversation([
+            LLMMessage::createFromUserString('Calculate 50 * 30, then add 100 to the result')
+        ]),
         tools: [$calculator]
     )
 );
 
-// Check if the LLM used a tool
-if ($response->getLastMessage()->hasToolUse()) {
-    // Add the assistant's response (including tool use) to conversation
-    $conversation = $conversation->withMessage($response->getLastMessage());
-
-    // Execute the tool and add result
-    foreach ($response->getLastMessage()->getContents()->getToolUses() as $toolUse) {
-        $tool = $tools[$toolUse->getName()] ?? null;
-
-        if ($tool) {
-            $result = $tool->handle($toolUse->getInput());
-            $conversation = $conversation->withMessage(
-                LLMMessage::createFromUser(
-                    new LLMMessageContents([
-                        new LLMMessageToolResult(
-                            toolUseId: $toolUse->getId(),
-                            content: $result,
-                            isError: false
-                        )
-                    ])
-                )
-            );
-        }
-    }
+// The final answer, after any number of intermediate tool calls
+echo $response->getLastText();
+```
 
-    // Continue the conversation
-    $finalResponse = $agentClient->run(
-        client: $client,
-        request: new LLMRequest(
-            model: $model,
-            conversation: $conversation,
-            tools: [$calculator]
-        )
-    );
+If you need to inspect the intermediate steps, the conversation returned with the response contains every tool call and tool result:
 
-    echo $finalResponse->getLastText();
+```php
+<?php
+use Soukicz\Llm\Message\LLMMessageToolResult;
+use Soukicz\Llm\Message\LLMMessageToolUse;
+
+foreach ($response->getConversation()->getMessages() as $message) {
+    foreach ($message->getContents() as $content) {
+        if ($content instanceof LLMMessageToolUse) {
+            echo "Tool call: " . $content->getName() . " " . json_encode($content->getInput()) . "\n";
+        } elseif ($content instanceof LLMMessageToolResult) {
+            echo "Tool result for call " . $content->getId() . "\n";
+        }
+    }
 }
 ```
 
@@ -424,11 +429,13 @@ Test tools independently before using with LLMs:
 // Unit test a tool
 $calculator = new CallbackToolDefinition(...);
 
+// The handler returns LLMMessageContents, which supports array access;
+// the first item is an LLMMessageArrayData holding the returned array
 $result = $calculator->handle(['expression' => '2 + 2']);
-assert($result->toArray()['result'] === 4);
+assert($result[0]->getData()['result'] === 4);
 
 $result = $calculator->handle(['expression' => 'invalid']);
-assert(isset($result->toArray()['error']));
+assert(isset($result[0]->getData()['error']));
 ```
 
 ## See Also
diff --git a/docs/guides/batch-processing.md b/docs/guides/batch-processing.md
index 6d83204..b2fd8f7 100644
--- a/docs/guides/batch-processing.md
+++ b/docs/guides/batch-processing.md
@@ -11,7 +11,7 @@ Batch processing allows you to:
 - Save costs (often 50% cheaper than real-time)
 - Handle large-scale operations
 
-**Note:** Batch processing support varies by provider. Check provider-specific documentation.
+**Note:** Batch processing support varies by provider. Both `AnthropicClient` and `OpenAIClient` implement batches; Gemini does not.
 
 ## LLMBatchClient Interface
 
@@ -28,6 +28,9 @@ interface LLMBatchClient {
 }
 ```
 
+- `createBatch()` takes an array of `LLMRequest` objects **keyed by your custom ID** and returns the provider's batch ID.
+- `retrieveBatch()` returns `null` while the batch is still in progress. Once finished, it returns an array mapping each custom ID to the response text content.
+
 ## Basic Usage
 
 ### Submit Batch
@@ -43,10 +46,10 @@ use Soukicz\Llm\Message\LLMMessage;
 /** @var LLMBatchClient $client */
 $client = new OpenAIClient('sk-xxxxx', 'org-xxxxx');
 
-// Prepare multiple requests
+// Prepare multiple requests, keyed by a custom ID of your choice
 $requests = [];
 for ($i = 0; $i < 1000; $i++) {
-    $requests[] = new LLMRequest(
+    $requests["document-$i"] = new LLMRequest(
         model: new GPT5(GPT5::VERSION_2025_08_07),
         conversation: new LLMConversation([
             LLMMessage::createFromUserString("Summarize document $i")
@@ -63,13 +66,14 @@ echo "Batch created: $batchId\n";
 
 ```php
 <?php
-// Retrieve batch information (returns null if not ready, array with status and results when complete)
-$batch = $client->retrieveBatch($batchId);
+// Returns null while the batch is in progress,
+// or an array of [custom ID => response text] when finished
+$results = $client->retrieveBatch($batchId);
 
-if ($batch !== null) {
-    // Batch information available
-    // Check provider-specific documentation for exact response format
-    var_dump($batch);
+if ($results !== null) {
+    foreach ($results as $customId => $text) {
+        echo "$customId: $text\n";
+    }
 }
 ```
 
@@ -87,9 +91,9 @@ $client = new OpenAIClient('sk-xxxxx', 'org-xxxxx');
 
 // Prepare batch of classification tasks
 $texts = [
-    'This product is amazing!',
-    'Terrible service, would not recommend.',
-    'It\'s okay, nothing special.',
+    'review-1' => 'This product is amazing!',
+    'review-2' => 'Terrible service, would not recommend.',
+    'review-3' => 'It\'s okay, nothing special.',
     // ... 1000s more
 ];
 
@@ -101,7 +105,7 @@ $requests = array_map(
         ])
     ),
     $texts
-);
+); // array_map preserves the string keys, which become custom IDs
 
 // Submit batch
 $batchId = $client->createBatch($requests);
@@ -109,20 +113,17 @@ $batchId = $client->createBatch($requests);
 // Poll until complete
 do {
     sleep(60); // Wait 1 minute
-    $batch = $client->retrieveBatch($batchId);
+    $results = $client->retrieveBatch($batchId);
+} while ($results === null);
 
-    if ($batch !== null) {
-        // Check provider-specific response format for status
-        echo "Batch retrieved\n";
-        break;
-    }
-} while (true);
-
-// Process batch results
-// Note: Exact format depends on provider implementation
-var_dump($batch);
+// Process batch results: custom ID => response text
+foreach ($results as $customId => $text) {
+    echo "$customId: $text\n";
+}
 ```
 
+The same code works with `AnthropicClient` — only the client and model classes change.
+
 ## Async Polling
 
 Use async operations for efficient polling:
@@ -232,27 +233,33 @@ $batchId = $client->createBatch($requests);
 
 ## Error Handling
 
+There is no dedicated batch exception class. Batch creation fails with a Guzzle HTTP exception (e.g. `GuzzleHttp\Exception\ClientException`), and `retrieveBatch()` throws a `\RuntimeException` when the batch itself failed (e.g. OpenAI produced only an error file) or returned an unexpected status:
+
 ```php
 <?php
+use GuzzleHttp\Exception\GuzzleException;
+
 try {
     $batchId = $client->createBatch($requests);
-} catch (BatchCreationException $e) {
-    // Handle batch creation error
+} catch (GuzzleException $e) {
+    // Handle batch creation error (invalid request, rate limit, ...)
     echo "Failed to create batch: " . $e->getMessage();
 
     // Retry with smaller batch size
-    $smallerBatches = array_chunk($requests, 500);
+    $smallerBatches = array_chunk($requests, 500, preserve_keys: true);
     foreach ($smallerBatches as $batch) {
-        $batchId = $client->createBatch($batch);
+        $batchIds[] = $client->createBatch($batch);
     }
 }
 
 // Retrieve batch results
-$batch = $client->retrieveBatch($batchId);
-if ($batch !== null) {
-    // Process batch results according to provider-specific format
-    // Check provider documentation for exact structure
-    processResults($batch);
+try {
+    $results = $client->retrieveBatch($batchId);
+    if ($results !== null) {
+        processResults($results); // custom ID => response text
+    }
+} catch (\RuntimeException $e) {
+    echo "Batch failed: " . $e->getMessage();
 }
 ```
 
@@ -273,10 +280,16 @@ echo "Savings: $" . ($realTimeCost - $batchCost); // $50
 
 ## Provider Support
 
-- ✅ **OpenAI** - Full batch API support
-- ⚠️ **Anthropic** - Check current API documentation
-- ⚠️ **Google Gemini** - Check current API documentation
-- ❌ **OpenAI-compatible** - Varies by provider
+- ✅ **OpenAI** - `OpenAIClient` implements `LLMBatchClient` (uploads a JSONL file via `/files` and creates a `/batches` job with a 24h completion window)
+- ✅ **Anthropic** - `AnthropicClient` implements `LLMBatchClient` (uses the `/v1/messages/batches` API)
+- ❌ **Google Gemini** - Not supported by `GeminiClient`
+- ⚠️ **OpenAI-compatible** - `OpenAICompatibleClient` inherits the batch methods, but the provider must support the OpenAI files and batches endpoints
+
+## Implementation Notes
+
+- Custom IDs are the array keys you pass to `createBatch()` and they identify each result returned by `retrieveBatch()`.
+- `retrieveBatch()` extracts only the **text content** of each response. Tool calls, structured output and other content types are not decoded.
+- For OpenAI, when a completed batch produced only an error file, `retrieveBatch()` throws a `\RuntimeException` with the error details (or returns an empty array when the batch is older than 3 days).
 
 ## Limitations
 
diff --git a/docs/guides/caching.md b/docs/guides/caching.md
index 5efccaf..d4c956d 100644
--- a/docs/guides/caching.md
+++ b/docs/guides/caching.md
@@ -13,7 +13,7 @@ All PHP LLM clients support caching at the HTTP request level. When enabled:
 
 ## File Cache
 
-The built-in `FileCache` stores responses on the filesystem:
+The built-in `FileCache` stores responses on the filesystem. The directory must already exist — the constructor throws a `RuntimeException` otherwise:
 
 ```php
 <?php
@@ -70,33 +70,48 @@ $client = new AnthropicClient('sk-xxxxx', $cache);
 
 ## Custom Cache Implementation
 
-Implement the `CacheInterface` for custom caching:
+The cache operates on PSR-7 HTTP messages. The `CacheInterface` has three methods:
 
 ```php
 <?php
-use Soukicz\Llm\Cache\CacheInterface;
+use Psr\Http\Message\RequestInterface;
+use Psr\Http\Message\ResponseInterface;
+
+interface CacheInterface {
+    public function fetch(RequestInterface $request): ?ResponseInterface;
+
+    public function store(RequestInterface $request, ResponseInterface $response): void;
+
+    public function invalidate(RequestInterface $request): void;
+}
+```
 
-class RedisCache implements CacheInterface {
+For custom backends, extend `AbstractCache` — it provides `getCacheKey(RequestInterface): string` (a SHA-512 hash of URL, method and body) plus `responseToJson()`/`responseFromJson()` helpers for serializing PSR-7 responses:
+
+```php
+<?php
+use Psr\Http\Message\RequestInterface;
+use Psr\Http\Message\ResponseInterface;
+use Soukicz\Llm\Cache\AbstractCache;
+
+class RedisCache extends AbstractCache {
     public function __construct(
-        private Redis $redis,
-        private int $ttl = 3600
+        private readonly Redis $redis,
+        private readonly int $ttl = 3600
     ) {}
 
-    public function get(string $key): ?string {
-        $value = $this->redis->get($key);
-        return $value !== false ? $value : null;
-    }
+    public function fetch(RequestInterface $request): ?ResponseInterface {
+        $json = $this->redis->get($this->getCacheKey($request));
 
-    public function set(string $key, string $value): void {
-        $this->redis->setex($key, $this->ttl, $value);
+        return $json !== false ? $this->responseFromJson($json) : null;
     }
 
-    public function has(string $key): bool {
-        return $this->redis->exists($key) > 0;
+    public function store(RequestInterface $request, ResponseInterface $response): void {
+        $this->redis->setex($this->getCacheKey($request), $this->ttl, $this->responseToJson($response));
     }
 
-    public function delete(string $key): void {
-        $this->redis->del($key);
+    public function invalidate(RequestInterface $request): void {
+        $this->redis->del($this->getCacheKey($request));
     }
 }
 ```
@@ -109,15 +124,17 @@ $client = new AnthropicClient('sk-xxxxx', $cache);
 
 ## Cache Keys
 
-Cache keys are generated from:
-- API endpoint
-- Model name and version
-- Request parameters (temperature, maxTokens, etc.)
-- Conversation messages
-- Tool definitions
+Cache keys are a SHA-512 hash of the HTTP request:
+- Request URL (API endpoint, including the model for Gemini)
+- HTTP method
+- Request body (model, temperature, maxTokens, conversation messages, tool definitions, ...)
+
+Any change to the request body produces a new cache key.
 
 **Important:** Always use exact model versions to prevent stale cached responses.
 
+**Security caveat:** The cache key does **not** include request headers, so API keys are not part of the key. Identical requests share cache entries regardless of which credentials were used. The cache is intended for development, testing and request deduplication — do not rely on it for multi-tenant isolation.
+
 ## Best Practices
 
 ### Use Exact Model Versions
@@ -207,38 +224,45 @@ $client = new AnthropicClient('sk-xxxxx', null);
 
 ## Monitoring Cache Performance
 
-Track cache hit rates:
+Track cache hit rates by decorating another cache:
 
 ```php
 <?php
+use Psr\Http\Message\RequestInterface;
+use Psr\Http\Message\ResponseInterface;
+use Soukicz\Llm\Cache\CacheInterface;
+
 class CacheMonitor implements CacheInterface {
     private int $hits = 0;
     private int $misses = 0;
 
     public function __construct(
-        private CacheInterface $cache
+        private readonly CacheInterface $cache
     ) {}
 
-    public function get(string $key): ?string {
-        $value = $this->cache->get($key);
-        if ($value !== null) {
+    public function fetch(RequestInterface $request): ?ResponseInterface {
+        $response = $this->cache->fetch($request);
+        if ($response !== null) {
             $this->hits++;
         } else {
             $this->misses++;
         }
-        return $value;
+
+        return $response;
+    }
+
+    public function store(RequestInterface $request, ResponseInterface $response): void {
+        $this->cache->store($request, $response);
     }
 
-    public function set(string $key, string $value): void {
-        $this->cache->set($key, $value);
+    public function invalidate(RequestInterface $request): void {
+        $this->cache->invalidate($request);
     }
 
     public function getHitRate(): float {
         $total = $this->hits + $this->misses;
         return $total > 0 ? $this->hits / $total : 0;
     }
-
-    // Implement other interface methods...
 }
 ```
 
@@ -255,31 +279,36 @@ echo "Cache hit rate: " . ($cache->getHitRate() * 100) . "%\n";
 
 ### Manual Cleanup
 
+`invalidate()` removes the entry for a specific PSR-7 HTTP request. Since you usually don't have the underlying HTTP request at hand, the simplest cleanup for `FileCache` is to delete the cache files:
+
 ```php
 <?php
-// Clear specific cache entry
-$cache->delete($cacheKey);
-
-// Clear all cache (FileCache example)
-array_map('unlink', glob('/tmp/llm-cache/*'));
+// Clear all cache (FileCache stores one .json file per entry)
+array_map('unlink', glob('/tmp/llm-cache/*.json'));
 ```
 
 ### Automatic Expiration
 
-Implement TTL in custom cache:
+Implement TTL in a custom cache by extending `AbstractCache`:
 
 ```php
 <?php
-class TTLFileCache implements CacheInterface {
-    private int $ttl;
+use Psr\Http\Message\RequestInterface;
+use Psr\Http\Message\ResponseInterface;
+use Soukicz\Llm\Cache\AbstractCache;
+
+class TTLFileCache extends AbstractCache {
+    public function __construct(
+        private readonly string $directory,
+        private readonly int $ttl = 3600
+    ) {}
 
-    public function __construct(string $directory, int $ttlSeconds = 3600) {
-        $this->directory = $directory;
-        $this->ttl = $ttlSeconds;
+    private function getPath(RequestInterface $request): string {
+        return $this->directory . '/' . md5($this->getCacheKey($request)) . '.json';
     }
 
-    public function get(string $key): ?string {
-        $file = $this->getFilePath($key);
+    public function fetch(RequestInterface $request): ?ResponseInterface {
+        $file = $this->getPath($request);
 
         if (!file_exists($file)) {
             return null;
@@ -288,13 +317,20 @@ class TTLFileCache implements CacheInterface {
         // Check if expired
         if (time() - filemtime($file) > $this->ttl) {
             unlink($file);
+
             return null;
         }
 
-        return file_get_contents($file);
+        return $this->responseFromJson(file_get_contents($file));
     }
 
-    // Implement other methods...
+    public function store(RequestInterface $request, ResponseInterface $response): void {
+        file_put_contents($this->getPath($request), $this->responseToJson($response), LOCK_EX);
+    }
+
+    public function invalidate(RequestInterface $request): void {
+        @unlink($this->getPath($request));
+    }
 }
 ```
 
@@ -306,17 +342,18 @@ Example cost calculation:
 <?php
 $request = new LLMRequest(/*...*/);
 
-// First request - hits API ($0.015)
+// First request - hits the API
 $response1 = $agentClient->run($client, $request);
-echo "Cost: $" . $response1->getTokenUsage()->getTotalCost() . "\n";
+echo "Cost: $" . ($response1->getInputPriceUsd() + $response1->getOutputPriceUsd()) . "\n";
 
-// Cached request - no cost ($0.00)
+// Identical request - served from the cache, no API call is made
 $response2 = $agentClient->run($client, $request);
-echo "Cost: $" . $response2->getTokenUsage()->getTotalCost() . "\n";
 
 // 100% savings on repeated requests!
 ```
 
+Note that the reported price is calculated from the token counts in the response, so a cached response still reports the original cost — but no API call is made and nothing is billed.
+
 ## See Also
 
 - [Configuration Guide](configuration.md) - Client configuration
diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
index dad7fa7..8b51d22 100644
--- a/docs/guides/configuration.md
+++ b/docs/guides/configuration.md
@@ -6,19 +6,20 @@ Configure your AI agent requests with various parameters to control behavior, ou
 
 ```php
 <?php
-use Soukicz\Llm\Config\ReasoningBudget;
 use Soukicz\Llm\Config\ReasoningEffort;
+use Soukicz\Llm\Config\StructuredOutputConfig;
 use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Stream\CallableStreamListener;
 
 $request = new LLMRequest(
     model: $model,                              // Required: Model instance
     conversation: $conversation,                // Required: LLMConversation
+    temperature: 0.7,                           // Optional: 0.0 to 1.0 (default 0.0)
+    maxTokens: 4096,                            // Optional: Maximum response tokens (default 4096)
     tools: $tools,                              // Optional: Array of tool definitions
-    temperature: 0.7,                           // Optional: 0.0 to 1.0
-    maxTokens: 4096,                            // Optional: Maximum response tokens
     stopSequences: ['###', 'END'],              // Optional: Stop generation strings
-    reasoningConfig: ReasoningEffort::HIGH,     // Optional: For reasoning models
-    reasoningConfig: new ReasoningBudget(10000),// Optional: Token budget for reasoning
+    reasoningConfig: ReasoningEffort::HIGH,     // Optional: ReasoningEffort or ReasoningBudget
+    structuredOutputConfig: new StructuredOutputConfig($schema), // Optional: JSON Schema output
     streamListener: new CallableStreamListener( // Optional: Real-time progress updates
         fn($event) => print($event->delta)
     ),
@@ -71,8 +72,8 @@ $conversation = new LLMConversation([
 
 Controls randomness in responses (0.0 to 1.0):
 
-- **0.0** - Deterministic, focused responses
-- **0.5** - Balanced (default for most models)
+- **0.0** - Deterministic, focused responses (the library default)
+- **0.5** - Balanced
 - **1.0** - Creative, varied responses
 
 ```php
@@ -176,18 +177,44 @@ $request = new LLMRequest(
 
 Or implement `StreamListenerInterface` for a reusable class-based listener. See [Streaming Guide](streaming.md) for full documentation and practical examples.
 
-**Note:** Streaming bypasses the response cache. When a listener is present, the request always goes to the API.
+**Note:** Streaming works with the response cache. On a cache hit, the cached response is replayed through the stream listener, and a completed live stream is stored in the cache for future requests.
+
+### structuredOutputConfig
+
+Force the model to return JSON matching a schema:
+
+```php
+<?php
+use Soukicz\Llm\Config\StructuredOutputConfig;
+
+$request = new LLMRequest(
+    model: $model,
+    conversation: $conversation,
+    structuredOutputConfig: new StructuredOutputConfig([
+        'type' => 'object',
+        'properties' => [
+            'name' => ['type' => 'string'],
+        ],
+        'required' => ['name'],
+    ]),
+);
+
+$data = $agentClient->run($client, $request)->getLastStructuredData();
+```
+
+See [Structured Output Guide](structured-output.md) for full documentation.
 
 ## Reasoning Parameters
 
-For reasoning models (o3, o4):
+### reasoningConfig
 
-### reasoningEffort
+The `reasoningConfig` parameter accepts either a `ReasoningEffort` enum case or a `ReasoningBudget` instance. When left at `null` (the default), the provider's default behavior is used.
 
-Control computational effort:
+Control computational effort with `ReasoningEffort` (works with Anthropic, OpenAI, and Gemini):
 
 ```php
 <?php
+use Soukicz\Llm\Client\OpenAI\Model\GPTo3;
 use Soukicz\Llm\Config\ReasoningEffort;
 
 $request = new LLMRequest(
@@ -198,22 +225,24 @@ $request = new LLMRequest(
 ```
 
 **Options:**
+- `ReasoningEffort::NONE` - Disable reasoning
+- `ReasoningEffort::MINIMAL` - Minimal reasoning
 - `ReasoningEffort::LOW` - Fast, less thorough
-- `ReasoningEffort::MEDIUM` - Balanced (default)
+- `ReasoningEffort::MEDIUM` - Balanced
 - `ReasoningEffort::HIGH` - Thorough, slower
+- `ReasoningEffort::EXTRA_HIGH` - Maximum effort
 
-### reasoningConfig
-
-Limit reasoning tokens for cost control using `ReasoningBudget`:
+Or limit reasoning tokens for cost control using `ReasoningBudget` (**Anthropic only** — the OpenAI and Gemini encoders throw `InvalidArgumentException`):
 
 ```php
 <?php
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
 use Soukicz\Llm\Config\ReasoningBudget;
 
 $request = new LLMRequest(
-    model: new GPTo3(GPTo3::VERSION_2025_04_16),
+    model: new AnthropicClaude46Sonnet(),
     conversation: $conversation,
-    reasoningConfig: new ReasoningBudget(5000)  // Max 5k reasoning tokens
+    reasoningConfig: new ReasoningBudget(5000)  // Max 5k thinking tokens
 );
 ```
 
@@ -238,23 +267,34 @@ See [Caching Guide](caching.md) for cache options.
 
 ### HTTP Middleware
 
-Add Guzzle middleware for logging or custom behavior:
+All clients accept a `customHttpMiddleware` parameter — a single Guzzle middleware callable that is pushed onto the client's internal handler stack. Use it for logging or custom behavior:
 
 ```php
 <?php
-use GuzzleHttp\HandlerStack;
-use GuzzleHttp\Middleware;
+use Psr\Http\Message\RequestInterface;
+use Psr\Http\Message\ResponseInterface;
 
-$stack = HandlerStack::create();
-$stack->push(Middleware::log($logger, $formatter));
+$loggingMiddleware = function (callable $handler) {
+    return function (RequestInterface $request, array $options) use ($handler) {
+        return $handler($request, $options)->then(
+            function (ResponseInterface $response) use ($request) {
+                error_log($request->getMethod() . ' ' . $request->getUri() . ' - ' . $response->getStatusCode());
+
+                return $response;
+            }
+        );
+    };
+};
 
 $client = new AnthropicClient(
     apiKey: 'sk-xxxxx',
     cache: $cache,
-    handler: $stack
+    customHttpMiddleware: $loggingMiddleware
 );
 ```
 
+See [Logging & Debugging](../examples/logging-debugging.md) for a complete middleware example.
+
 ## Provider-Specific Configuration
 
 ### Gemini Safety Settings
@@ -303,10 +343,10 @@ use Soukicz\Llm\Client\Universal\LocalModel;
 
 $client = new OpenAICompatibleClient(
     apiKey: 'your-api-key',
-    baseUrl: 'https://api.openrouter.ai/v1'
+    baseUrl: 'https://openrouter.ai/api/v1'
 );
 
-$model = new LocalModel('anthropic/claude-3.5-sonnet');
+$model = new LocalModel('anthropic/claude-haiku-4.5');
 ```
 
 ## Configuration Best Practices
@@ -318,7 +358,7 @@ $model = new LocalModel('anthropic/claude-3.5-sonnet');
 5. **Use higher temperature** for creative tasks
 6. **Set stopSequences** for structured outputs
 7. **Configure safety settings** appropriately for your use case
-8. **Use reasoning budgets** in production
+8. **Limit reasoning costs in production** - `ReasoningBudget` on Anthropic, lower `ReasoningEffort` elsewhere
 
 ## Example: Complete Configuration
 
@@ -358,6 +398,7 @@ $response = $agentClient->run($client, $request);
 ## See Also
 
 - [Reasoning Models](reasoning.md) - Reasoning-specific configuration
+- [Structured Output](structured-output.md) - JSON Schema constrained responses
 - [Streaming](streaming.md) - Real-time response streaming
 - [Tools Guide](tools.md) - Tool configuration
 - [Caching Guide](caching.md) - Cache configuration
diff --git a/docs/guides/feedback-loops.md b/docs/guides/feedback-loops.md
index db4d525..fc9732b 100644
--- a/docs/guides/feedback-loops.md
+++ b/docs/guides/feedback-loops.md
@@ -259,10 +259,12 @@ feedbackCallback: function (LLMResponse $response) use (&$attempt): ?LLMMessage
 
 ### With Tools
 
-Validate tool outputs in feedback loops:
+Feedback loops and tools combine naturally: `LLMAgentClient` first runs the tool loop to completion, and **only then** invokes the feedback callback on the final response. The callback therefore never sees a pending tool call — but you can inspect the conversation history to verify which tools were actually used:
 
 ```php
 <?php
+use Soukicz\Llm\Message\LLMMessageToolUse;
+
 $response = $agentClient->run(
     client: $anthropic,
     request: new LLMRequest(
@@ -271,11 +273,16 @@ $response = $agentClient->run(
         tools: [$calculatorTool],
     ),
     feedbackCallback: function (LLMResponse $response): ?LLMMessage {
-        // Ensure the agent used the calculator tool
-        if (!$response->hasToolCalls()) {
-            return LLMMessage::createFromUserString('Please use the calculator tool for this calculation');
+        // Ensure the agent used the calculator tool at some point in the conversation
+        foreach ($response->getConversation()->getMessages() as $message) {
+            foreach ($message->getContents() as $content) {
+                if ($content instanceof LLMMessageToolUse && $content->getName() === 'calculator') {
+                    return null; // Tool was used - accept the response
+                }
+            }
         }
-        return null;
+
+        return LLMMessage::createFromUserString('Please use the calculator tool for this calculation');
     }
 );
 ```
@@ -286,15 +293,15 @@ Validate reasoning model outputs:
 
 ```php
 <?php
-use Soukicz\Llm\Client\OpenAI\Model\OpenAIGPTo3;
+use Soukicz\Llm\Client\OpenAI\Model\GPTo3;
 use Soukicz\Llm\Config\ReasoningEffort;
 
 $response = $agentClient->run(
     client: $openai,
     request: new LLMRequest(
-        model: new OpenAIGPTo3(),
+        model: new GPTo3(GPTo3::VERSION_2025_04_16),
         conversation: $conversation,
-        reasoningEffort: ReasoningEffort::HIGH
+        reasoningConfig: ReasoningEffort::HIGH
     ),
     feedbackCallback: function (LLMResponse $response): ?LLMMessage {
         // Verify mathematical accuracy
@@ -356,6 +363,7 @@ return LLMMessage::createFromUserString('The JSON is missing the required "email
 ## See Also
 
 - [Tools Guide](tools.md) - Validate tool usage in feedback loops
+- [Structured Output](structured-output.md) - Guarantee JSON shape without re-prompting
 - [Reasoning Models](reasoning.md) - Combine reasoning with validation
 - [Examples](../examples/index.md) - More feedback loop examples
 - [Configuration](configuration.md) - Configure request behavior
diff --git a/docs/guides/multimodal.md b/docs/guides/multimodal.md
index 6b493d9..eddc509 100644
--- a/docs/guides/multimodal.md
+++ b/docs/guides/multimodal.md
@@ -151,7 +151,7 @@ $message = LLMMessage::createFromUser(new LLMMessageContents([
 **PDF Support:**
 - ✅ Anthropic (Claude) - All models
 - ✅ OpenAI (GPT) - GPT-4o and later models
-- ❌ Google Gemini - Not currently supported
+- ✅ Google Gemini - Supported (sent as inline `application/pdf` data)
 - ⚠️ OpenAI-compatible - Depends on the underlying model
 
 ## Provider-Specific Notes
diff --git a/docs/guides/reasoning.md b/docs/guides/reasoning.md
index 5718a00..bdec8b6 100644
--- a/docs/guides/reasoning.md
+++ b/docs/guides/reasoning.md
@@ -1,6 +1,12 @@
 # Reasoning Models
 
-Reasoning models like OpenAI's o3 and o4 series spend additional computation time thinking through problems before responding. This makes them particularly effective for complex tasks requiring deep analysis, mathematics, coding, and logical reasoning.
+Reasoning models spend additional computation time thinking through problems before responding. This makes them particularly effective for complex tasks requiring deep analysis, mathematics, coding, and logical reasoning.
+
+All three major providers support reasoning through this library:
+
+- **Anthropic** - Claude extended thinking (adaptive thinking with effort levels, or an explicit token budget)
+- **OpenAI** - Reasoning effort on o-series and GPT-5.x models
+- **Google Gemini** - Thinking levels on Gemini 2.5+ models
 
 ## Overview
 
@@ -14,11 +20,11 @@ This results in more accurate responses for challenging tasks, at the cost of hi
 
 ## Configuring Reasoning
 
-PHP LLM provides two ways to configure reasoning models:
+PHP LLM provides two ways to configure reasoning via the `reasoningConfig` parameter of `LLMRequest`. When `reasoningConfig` is left at `null` (the default), the provider's default behavior is used.
 
 ### Reasoning Effort
 
-Control how much computational effort the model spends reasoning:
+Control how much computational effort the model spends reasoning. `ReasoningEffort` works with all three providers:
 
 ```php
 <?php
@@ -34,22 +40,42 @@ $request = new LLMRequest(
 ```
 
 **Effort Levels:**
+- `ReasoningEffort::NONE` - Disable reasoning entirely
+- `ReasoningEffort::MINIMAL` - Minimal reasoning
 - `ReasoningEffort::LOW` - Fast, less thorough reasoning
-- `ReasoningEffort::MEDIUM` - Balanced reasoning (default)
+- `ReasoningEffort::MEDIUM` - Balanced reasoning
 - `ReasoningEffort::HIGH` - Thorough, slower reasoning
+- `ReasoningEffort::EXTRA_HIGH` - Maximum reasoning effort
+
+There is no default level — omitting `reasoningConfig` leaves the decision to the provider.
+
+**How effort maps to each provider:**
+
+| Effort | Anthropic (adaptive thinking + effort) | OpenAI (`reasoning_effort`) | Gemini 3.x (`thinkingLevel`) | Gemini 2.x (`thinkingBudget`) |
+|---|---|---|---|---|
+| `NONE` | thinking disabled | `none` | `thinkingBudget: 0` | `0` |
+| `MINIMAL` | `low` | `minimal` | `minimal` | `512` |
+| `LOW` | `low` | `low` | `low` | `1024` |
+| `MEDIUM` | `medium` | `medium` | `medium` | `8192` |
+| `HIGH` | `high` | `high` | `high` | `24576` |
+| `EXTRA_HIGH` | `max` | `xhigh` | `high` | `24576` |
+
+Gemini 2.x models do not accept `thinkingLevel` — the library automatically translates the effort level to a `thinkingBudget` token budget for them.
 
 ### Reasoning Budget
 
-Set a token limit for the model's internal reasoning:
+Set an explicit token limit for the model's internal reasoning. `ReasoningBudget` is **Anthropic-only** — it maps to Claude's `thinking.budget_tokens`. The OpenAI and Gemini encoders throw an `InvalidArgumentException` when given a `ReasoningBudget`.
 
 ```php
 <?php
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
 use Soukicz\Llm\Config\ReasoningBudget;
+use Soukicz\Llm\LLMRequest;
 
 $request = new LLMRequest(
-    model: new GPTo3(GPTo3::VERSION_2025_04_16),
+    model: new AnthropicClaude46Sonnet(),
     conversation: $conversation,
-    reasoningConfig: new ReasoningBudget(10000) // Max 10k tokens for reasoning
+    reasoningConfig: new ReasoningBudget(10000) // Max 10k tokens for thinking
 );
 ```
 
@@ -79,13 +105,32 @@ $response = $agentClient->run(
                 'A farmer has 17 sheep. All but 9 die. How many sheep are left alive?'
             )
         ]),
-        reasoningEffort: ReasoningEffort::HIGH
+        reasoningConfig: ReasoningEffort::HIGH
     )
 );
 
 echo $response->getLastText(); // "9 sheep are left alive"
 ```
 
+The same request works with Claude extended thinking:
+
+```php
+<?php
+use Soukicz\Llm\Client\Anthropic\AnthropicClient;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+
+$anthropic = new AnthropicClient('sk-xxxxx', $cache);
+
+$response = $agentClient->run(
+    client: $anthropic,
+    request: new LLMRequest(
+        model: new AnthropicClaude46Sonnet(),
+        conversation: $conversation,
+        reasoningConfig: ReasoningEffort::HIGH
+    )
+);
+```
+
 ## When to Use Reasoning Models
 
 **Ideal Use Cases:**
@@ -105,18 +150,52 @@ echo $response->getLastText(); // "9 sheep are left alive"
 
 ## Supported Models
 
+### Anthropic (Extended Thinking)
+
+```php
+<?php
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Opus;
+
+// Supports ReasoningEffort (adaptive thinking) and ReasoningBudget (explicit token budget)
+$sonnet = new AnthropicClaude46Sonnet();
+$opus = new AnthropicClaude46Opus();
+```
+
+Claude's thinking blocks are returned as `LLMMessageReasoning` content in the conversation, so you can inspect what the model thought about.
+
 ### OpenAI Reasoning Models
 
 ```php
 <?php
 use Soukicz\Llm\Client\OpenAI\Model\GPTo3;
 use Soukicz\Llm\Client\OpenAI\Model\GPTo4Mini;
+use Soukicz\Llm\Client\OpenAI\Model\GPT54;
 
-// o3 - Most capable reasoning model
+// o3 - Dedicated reasoning model
 $o3 = new GPTo3(GPTo3::VERSION_2025_04_16);
 
 // o4-mini - Faster, more cost-effective reasoning
 $o4mini = new GPTo4Mini(GPTo4Mini::VERSION_2025_04_16);
+
+// GPT-5.x - General models with configurable reasoning effort
+$gpt54 = new GPT54(GPT54::VERSION_2026_03_05);
+```
+
+### Google Gemini (Thinking)
+
+```php
+<?php
+use Soukicz\Llm\Client\Gemini\Model\Gemini25Flash;
+use Soukicz\Llm\Client\Gemini\Model\Gemini25Pro;
+use Soukicz\Llm\Client\Gemini\Model\Gemini3ProPreview;
+
+// Gemini 2.5 models: effort is sent as a thinking token budget
+$pro = new Gemini25Pro();
+$flash = new Gemini25Flash();
+
+// Gemini 3.x models: effort is sent as a thinking level
+$gemini3 = new Gemini3ProPreview();
 ```
 
 ## Cost Considerations
@@ -124,38 +203,43 @@ $o4mini = new GPTo4Mini(GPTo4Mini::VERSION_2025_04_16);
 Reasoning models consume significantly more tokens due to their internal thinking process:
 
 1. **Input tokens** - Your prompt (standard pricing)
-2. **Reasoning tokens** - Internal thinking (usually discounted pricing)
+2. **Reasoning tokens** - Internal thinking (billed as output tokens)
 3. **Output tokens** - The response (standard pricing)
 
-Use `ReasoningBudget` to control costs:
+On Anthropic, use `ReasoningBudget` to cap thinking tokens:
 
 ```php
 <?php
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
 use Soukicz\Llm\Config\ReasoningBudget;
 
-// Limit reasoning to 5000 tokens for cost control
+// Limit thinking to 5000 tokens for cost control
 $request = new LLMRequest(
-    model: new GPTo3(GPTo3::VERSION_2025_04_16),
+    model: new AnthropicClaude46Sonnet(),
     conversation: $conversation,
     reasoningConfig: new ReasoningBudget(5000)
 );
 ```
 
-## Tracking Reasoning Usage
+On OpenAI and Gemini, use a lower `ReasoningEffort` level instead.
+
+## Tracking Usage
 
-Monitor token usage including reasoning tokens:
+Monitor token usage and cost directly on the response:
 
 ```php
 <?php
 $response = $agentClient->run($client, $request);
-$usage = $response->getTokenUsage();
 
-echo "Input tokens: " . $usage->getInputTokens() . "\n";
-echo "Reasoning tokens: " . $usage->getReasoningTokens() . "\n";
-echo "Output tokens: " . $usage->getOutputTokens() . "\n";
-echo "Total cost: $" . $usage->getTotalCost() . "\n";
+echo "Input tokens: " . $response->getInputTokens() . "\n";
+echo "Output tokens: " . $response->getOutputTokens() . "\n";
+echo "Input cost: $" . $response->getInputPriceUsd() . "\n";
+echo "Output cost: $" . $response->getOutputPriceUsd() . "\n";
+echo "Time: " . $response->getTotalTimeMs() . " ms\n";
 ```
 
+Reasoning tokens are included in the output token count reported by the providers.
+
 ## Combining with Other Features
 
 ### With Tools
@@ -195,17 +279,20 @@ $response = $agentClient->run(
 ## Best Practices
 
 1. **Start with MEDIUM effort** - Only increase if needed
-2. **Set budgets for production** - Prevent runaway costs
+2. **Cap thinking tokens on Anthropic** - Use `ReasoningBudget` to prevent runaway costs
 3. **Use for appropriate tasks** - Don't use reasoning models for simple queries
-4. **Monitor costs closely** - Track token usage and adjust budgets
-5. **Test with o4-mini first** - More cost-effective for development
+4. **Monitor costs closely** - Track token usage via `getOutputTokens()` and `getOutputPriceUsd()`
+5. **Test with cheaper models first** - o4-mini or Gemini Flash are more cost-effective for development
 
 ## Provider Support
 
-- ✅ **OpenAI** - o3, o4-mini (native reasoning support)
-- ❌ **Anthropic** - Not available (Claude uses different architecture)
-- ❌ **Google Gemini** - Not available
-- ⚠️ **OpenAI-compatible** - Depends on provider
+| Feature | Anthropic | OpenAI | Gemini |
+|---|---|---|---|
+| `ReasoningEffort` | ✅ (adaptive extended thinking + effort) | ✅ (`reasoning_effort`) | ✅ (`thinkingLevel` on 3.x, `thinkingBudget` on 2.x) |
+| `ReasoningBudget` | ✅ (`thinking.budget_tokens`) | ❌ throws `InvalidArgumentException` | ❌ throws `InvalidArgumentException` |
+| Thinking visible in response | ✅ (`LLMMessageReasoning`) | ❌ | ✅ (streaming `THINKING_DELTA`) |
+
+For OpenAI-compatible providers, support depends on the underlying model.
 
 ## See Also
 
diff --git a/docs/guides/streaming.md b/docs/guides/streaming.md
index e8d90f0..def88a0 100644
--- a/docs/guides/streaming.md
+++ b/docs/guides/streaming.md
@@ -211,10 +211,9 @@ $response = $agentClient->run($client, new LLMRequest(
 $textDeltas = array_filter($events, fn($e) => $e->type === StreamEventType::TEXT_DELTA);
 assert(count($textDeltas) > 0, 'Expected text deltas');
 
-// The accumulated text matches the final response
+// The accumulated text matches the final response on all providers
 $streamedText = implode('', array_map(fn($e) => $e->delta, $textDeltas));
-// Note: For Anthropic/OpenAI, this equals $response->getLastText()
-// For Gemini, text parts are separate (each chunk is a distinct text part)
+assert($streamedText === $response->getLastText());
 ```
 
 ### Logging Tool Calls with Timing
diff --git a/docs/guides/structured-output.md b/docs/guides/structured-output.md
new file mode 100644
index 0000000..b9fed18
--- /dev/null
+++ b/docs/guides/structured-output.md
@@ -0,0 +1,200 @@
+# Structured Output
+
+Force the model to respond with JSON matching a schema you define. Instead of parsing free-form text and hoping for the best, structured output guarantees machine-readable responses — ideal for data extraction, classification, and any workflow where the LLM response feeds directly into your application logic.
+
+## Overview
+
+Structured output works the same way across all three providers:
+
+1. Define a JSON Schema describing the response shape
+2. Pass it to `LLMRequest` via the `structuredOutputConfig` parameter
+3. Read the decoded result with `$response->getLastStructuredData()`
+
+The library translates your schema to each provider's native structured-output mechanism, so the same request code works with Anthropic, OpenAI, and Gemini.
+
+## Basic Usage
+
+Create a `StructuredOutputConfig` with a raw JSON Schema array:
+
+```php
+<?php
+use Soukicz\Llm\Config\StructuredOutputConfig;
+use Soukicz\Llm\LLMRequest;
+
+$schema = [
+    'type' => 'object',
+    'properties' => [
+        'name' => ['type' => 'string'],
+        'email' => ['type' => 'string'],
+    ],
+    'required' => ['name', 'email'],
+];
+
+$request = new LLMRequest(
+    model: $model,
+    conversation: $conversation,
+    structuredOutputConfig: new StructuredOutputConfig($schema),
+);
+```
+
+## Reading the Result
+
+When a request has a `structuredOutputConfig`, the response text is parsed as JSON and stored as structured data. Read it with `getLastStructuredData()`, which returns the decoded array:
+
+```php
+<?php
+$response = $agentClient->run($client, $request);
+
+$data = $response->getLastStructuredData();
+echo $data['name'];   // "Jane"
+echo $data['email'];  // "jane@example.com"
+```
+
+**Note:** With structured output enabled, the assistant message contains structured data instead of plain text — `getLastText()` will throw a `RuntimeException`. Use `getLastStructuredData()` instead.
+
+The raw JSON string is preserved internally, so structured responses round-trip correctly when you continue the conversation in follow-up requests.
+
+## Strict Mode
+
+`StructuredOutputConfig` takes an optional second parameter:
+
+```php
+<?php
+new StructuredOutputConfig($schema, strict: true);  // default
+new StructuredOutputConfig($schema, strict: false); // permissive
+```
+
+**`strict: true` (default)** — The schema is enforced exactly. On OpenAI this enables the provider-side strict schema mode (`"strict": true` in `response_format`), which constrains generation so the output is guaranteed to match the schema: all required fields present, no extra properties, correct types.
+
+**`strict: false`** — Permissive mode. The model is guided by the schema but the provider does not hard-enforce it, which can help when a schema uses features the strict mode rejects. The strict flag is currently forwarded to OpenAI; Anthropic and Gemini requests are encoded the same way regardless of the flag.
+
+### Schema Normalization
+
+Each encoder automatically adjusts your schema to what the provider accepts:
+
+- **Anthropic and OpenAI** require `"additionalProperties": false` on every object in strict mode — the library adds it recursively wherever you didn't specify it.
+- **Anthropic** strict mode does not support the constraints `minItems`, `maxItems`, `minimum`, `maximum`, `minLength`, `maxLength`, and `pattern`. The library removes them and appends them to the property `description` so the model still sees them as guidance.
+- **Gemini** does not support `additionalProperties` at all — the library strips it recursively before sending the schema.
+
+You can write one portable schema and let the encoders handle the differences.
+
+## Complete Example
+
+Extract structured contact data from free-form text:
+
+```php
+<?php
+use Soukicz\Llm\Cache\FileCache;
+use Soukicz\Llm\Client\Anthropic\AnthropicClient;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Client\LLMAgentClient;
+use Soukicz\Llm\Config\StructuredOutputConfig;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+
+require_once __DIR__ . '/vendor/autoload.php';
+
+$cache = new FileCache(sys_get_temp_dir());
+$anthropic = new AnthropicClient(getenv('ANTHROPIC_API_KEY'), $cache);
+$agentClient = new LLMAgentClient();
+
+$schema = [
+    'type' => 'object',
+    'properties' => [
+        'name' => ['type' => 'string', 'description' => 'Full name of the person'],
+        'email' => ['type' => 'string', 'description' => 'Email address'],
+        'phone' => ['type' => ['string', 'null'], 'description' => 'Phone number, null if not mentioned'],
+        'topics' => [
+            'type' => 'array',
+            'items' => ['type' => 'string'],
+            'description' => 'Topics the person wants to discuss',
+        ],
+    ],
+    'required' => ['name', 'email', 'phone', 'topics'],
+];
+
+$response = $agentClient->run(
+    client: $anthropic,
+    request: new LLMRequest(
+        model: new AnthropicClaude46Sonnet(),
+        conversation: new LLMConversation([
+            LLMMessage::createFromUserString(
+                'Extract the contact information from this email: ' .
+                '"Hi, this is Jane Novak (jane.novak@example.com). ' .
+                'I would like to talk about pricing and the API integration next week."'
+            )
+        ]),
+        structuredOutputConfig: new StructuredOutputConfig($schema),
+    )
+);
+
+$contact = $response->getLastStructuredData();
+
+echo $contact['name'] . "\n";          // Jane Novak
+echo $contact['email'] . "\n";         // jane.novak@example.com
+var_dump($contact['phone']);           // NULL
+print_r($contact['topics']);           // ['pricing', 'API integration']
+```
+
+## Combining with Other Features
+
+### With Tools
+
+Structured output and tools can be combined in a single request with `LLMAgentClient`. The agent runs the tool loop as usual, and the final response is constrained to your schema:
+
+```php
+<?php
+$response = $agentClient->run(
+    client: $anthropic,
+    request: new LLMRequest(
+        model: new AnthropicClaude46Sonnet(),
+        conversation: $conversation,
+        tools: [$currencyTool],
+        structuredOutputConfig: new StructuredOutputConfig($schema),
+    )
+);
+
+$data = $response->getLastStructuredData();
+```
+
+### With Reasoning
+
+Structured output also works alongside reasoning configuration — for example Anthropic encodes both into the same `output_config`:
+
+```php
+<?php
+use Soukicz\Llm\Config\ReasoningEffort;
+
+$request = new LLMRequest(
+    model: new AnthropicClaude46Sonnet(),
+    conversation: $conversation,
+    reasoningConfig: ReasoningEffort::HIGH,
+    structuredOutputConfig: new StructuredOutputConfig($schema),
+);
+```
+
+## Provider Support
+
+| Provider | Mechanism |
+|---|---|
+| ✅ **Anthropic** | `output_config` with a `json_schema` format |
+| ✅ **OpenAI** | `response_format` of type `json_schema` (with `strict` flag) |
+| ✅ **Google Gemini** | `responseMimeType: application/json` + `responseSchema` (`additionalProperties` is stripped) |
+| ⚠️ **OpenAI-compatible** | Uses the OpenAI encoding; depends on the underlying provider/model |
+
+## Best Practices
+
+1. **Mark fields as required** - Combined with strict mode this guarantees field presence
+2. **Use descriptions** - Property descriptions guide the model just like prompt text
+3. **Allow null where data may be missing** - Use `'type' => ['string', 'null']` instead of omitting fields
+4. **Keep schemas flat where possible** - Deeply nested schemas are harder for models to fill correctly
+5. **Use `getLastStructuredData()`** - Don't parse `getLastText()`; it throws for structured responses
+6. **Prefer structured output over prompt-engineered JSON** - It removes the need for "respond only with JSON" instructions and feedback-loop re-parsing
+
+## See Also
+
+- [Configuration Guide](configuration.md) - All `LLMRequest` parameters
+- [Tools Guide](tools.md) - Combine structured output with function calling
+- [Feedback Loops](feedback-loops.md) - Validate response content beyond schema shape
+- [Reasoning Models](reasoning.md) - Combine structured output with reasoning
diff --git a/docs/guides/tools.md b/docs/guides/tools.md
index 64beab8..29d08ab 100644
--- a/docs/guides/tools.md
+++ b/docs/guides/tools.md
@@ -126,44 +126,35 @@ handler: function (array $input): PromiseInterface {
 
 **Note:** Tool handlers cannot return plain arrays or scalar values. Always wrap your results in `LLMMessageContents::fromArrayData()`.
 
-**Tip:** If you need to convert `LLMMessageContents` back to a plain array (e.g., for testing), use the `toArray()` method:
+**Tip:** If you need to inspect the contents of an `LLMMessageContents` (e.g., for testing), iterate over it or call `getMessages()` — it returns the individual `LLMMessageContent` items:
 ```php
 <?php
+use Soukicz\Llm\Message\LLMMessageArrayData;
+
 $result = $tool->handle(['input' => 'value']);
-$array = $result->toArray();  // Converts to plain array
+foreach ($result->getMessages() as $content) {
+    if ($content instanceof LLMMessageArrayData) {
+        $array = $content->getData();  // The plain array passed to fromArrayData()
+    }
+}
 ```
 
 ## Built-in Tools
 
 ### Text Editor Tool
 
-For building file-manipulation agents with Anthropic models, you can use the `TextEditorTool`. This tool requires a custom storage implementation.
+For building file-manipulation agents, use the `TextEditorTool`. The library ships with two ready-to-use storage backends, so no custom code is needed:
 
-**Note:** This is an advanced feature that requires implementing the `TextEditorStorage` interface to handle file operations securely.
+- `TextEditorStorageFilesystem` - Works on real files, sandboxed to a base directory (path traversal and symlink escapes are blocked)
+- `TextEditorStorageMemory` - Keeps files in memory, ideal for tests or ephemeral workspaces
 
 ```php
 <?php
+use Soukicz\Llm\Tool\TextEditor\TextEditorStorageFilesystem;
 use Soukicz\Llm\Tool\TextEditor\TextEditorTool;
-use Soukicz\Llm\Tool\TextEditor\TextEditorStorage;
-
-// You need to implement TextEditorStorage interface
-class MyTextEditorStorage implements TextEditorStorage {
-    public function __construct(private string $basePath) {}
-
-    public function read(string $path): string {
-        // Implement secure file reading
-        return file_get_contents($this->basePath . '/' . $path);
-    }
 
-    public function write(string $path, string $content): void {
-        // Implement secure file writing
-        file_put_contents($this->basePath . '/' . $path, $content);
-    }
-
-    // Implement other required methods...
-}
-
-$storage = new MyTextEditorStorage('/path/to/working/directory');
+// All file operations are restricted to this directory
+$storage = new TextEditorStorageFilesystem('/path/to/working/directory');
 $textEditorTool = new TextEditorTool($storage);
 
 $request = new LLMRequest(
@@ -181,7 +172,9 @@ The text editor tool supports:
 - `str_replace` - Replace text in files
 - `insert` - Insert text at specific line numbers
 
-**Security Considerations:** When implementing `TextEditorStorage`, ensure proper path validation, access controls, and sandboxing to prevent unauthorized file access.
+When used with Anthropic models, the tool is automatically registered as Claude's native text editor tool; with other providers it works as a regular function-calling tool.
+
+**Custom storage:** For other backends (database, S3, ...), implement the `TextEditorStorage` interface. It defines file operations (`getFileContent`, `setFileContent`, `createFile`, `deleteFile`, `renameFile`, `isFile`) and directory operations (`getDirectoryContent`, `createDirectory`, `deleteDirectory`, `renameDirectory`, `isDirectory`) — see `Soukicz\Llm\Tool\TextEditor\TextEditorStorage` for the exact signatures. Ensure proper path validation and sandboxing to prevent unauthorized file access.
 
 ## Input Schema
 
@@ -258,5 +251,6 @@ The `inputSchema` follows JSON Schema specification. Common patterns:
 ## See Also
 
 - [Feedback Loops](feedback-loops.md) - Validate tool outputs
+- [Structured Output](structured-output.md) - Combine tools with schema-constrained responses
 - [Examples](../examples/index.md) - More tool examples
 - [Provider Documentation](../providers/README.md) - Provider-specific tool features
diff --git a/docs/providers/README.md b/docs/providers/README.md
index 17e907d..9755afd 100644
--- a/docs/providers/README.md
+++ b/docs/providers/README.md
@@ -6,10 +6,12 @@ This guide shows how to use each provider client in PHP LLM. All providers share
 
 | Client | Implements LLMClient | Implements LLMBatchClient | Constructor Parameters |
 |--------|---------------------|---------------------------|------------------------|
-| `AnthropicClient` | ✅ | ✅ | `apiKey`, `cache`, `handler` |
-| `OpenAIClient` | ✅ | ✅ | `apiKey`, `apiOrganization`, `cache`, `handler` |
-| `GeminiClient` | ✅ | ❌ | `apiKey`, `cache`, `safetySettings`, `handler` |
-| `OpenAICompatibleClient` | ✅ | Varies | `apiKey`, `baseUrl`, `cache`, `handler` |
+| `AnthropicClient` | ✅ | ✅ | `apiKey`, `cache`, `customHttpMiddleware`, `betaFeatures` |
+| `OpenAIClient` | ✅ | ✅ | `apiKey`, `apiOrganization`, `cache`, `customHttpMiddleware` |
+| `GeminiClient` | ✅ | ❌ | `apiKey`, `cache`, `customHttpMiddleware`, `safetySettings` |
+| `OpenAICompatibleClient` | ✅ | ✅ (if the endpoint supports it) | `apiKey`, `baseUrl`, `cache`, `customHttpMiddleware` |
+
+Structured output (`StructuredOutputConfig`) is supported by all three native providers (Anthropic, OpenAI, Gemini).
 
 ## Anthropic (Claude)
 
@@ -23,32 +25,47 @@ use Soukicz\Llm\Client\Anthropic\AnthropicClient;
 $cache = new FileCache(sys_get_temp_dir());
 $client = new AnthropicClient(
     apiKey: 'sk-ant-xxxxx',
-    cache: $cache,           // Optional: CacheInterface
-    handler: $handlerStack   // Optional: Guzzle HandlerStack for middleware
+    cache: $cache,                         // Optional: CacheInterface
+    customHttpMiddleware: $middleware,     // Optional: a single Guzzle middleware callable
+    betaFeatures: ['context-1m-2025-08-07'] // Optional: Anthropic beta feature flags
 );
 ```
 
 ### Model Classes
 
-All Anthropic models require a version constant:
+Most Anthropic models require a version constant; the newest models take no constructor arguments:
 
 ```php
 <?php
 use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet;
 use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude35Haiku;
-use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude37Sonnet;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Opus;
 
-// Must specify version when instantiating
+// Versioned models require a version constant
 $model = new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929);
 $model = new AnthropicClaude35Haiku(AnthropicClaude35Haiku::VERSION_20241022);
+
+// Claude 4.6 models take no arguments
+$model = new AnthropicClaude46Sonnet();
+$model = new AnthropicClaude46Opus();
 ```
 
 **Available model classes:**
-- `AnthropicClaude35Sonnet`, `AnthropicClaude35Haiku`
-- `AnthropicClaude37Sonnet`
-- `AnthropicClaude4Sonnet`, `AnthropicClaude4Opus`
-- `AnthropicClaude41Opus`
-- `AnthropicClaude45Sonnet`
+
+| Class | Constructor |
+|-------|-------------|
+| `AnthropicClaude35Sonnet` | `new AnthropicClaude35Sonnet(AnthropicClaude35Sonnet::VERSION_20241022)` |
+| `AnthropicClaude35Haiku` | `new AnthropicClaude35Haiku(AnthropicClaude35Haiku::VERSION_20241022)` |
+| `AnthropicClaude37Sonnet` | `new AnthropicClaude37Sonnet(AnthropicClaude37Sonnet::VERSION_20250219)` |
+| `AnthropicClaude4Sonnet` | `new AnthropicClaude4Sonnet(AnthropicClaude4Sonnet::VERSION_20250514)` |
+| `AnthropicClaude4Opus` | `new AnthropicClaude4Opus(AnthropicClaude4Opus::VERSION_20250514)` |
+| `AnthropicClaude41Opus` | `new AnthropicClaude41Opus(AnthropicClaude41Opus::VERSION_20250805)` |
+| `AnthropicClaude45Sonnet` | `new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929)` |
+| `AnthropicClaude45Opus` | `new AnthropicClaude45Opus(AnthropicClaude45Opus::VERSION_20251101)` |
+| `AnthropicClaude45Haiku` | `new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001)` |
+| `AnthropicClaude46Sonnet` | `new AnthropicClaude46Sonnet()` |
+| `AnthropicClaude46Opus` | `new AnthropicClaude46Opus()` |
 
 ### Batch Processing Support
 
@@ -74,10 +91,13 @@ use Soukicz\Llm\Client\OpenAI\OpenAIClient;
 $cache = new FileCache(sys_get_temp_dir());
 $client = new OpenAIClient(
     apiKey: 'sk-xxxxx',
-    apiOrganization: 'org-xxxxx',  // Optional: for organization accounts
-    cache: $cache,                  // Optional: CacheInterface
-    handler: $handlerStack          // Optional: Guzzle HandlerStack
+    apiOrganization: 'org-xxxxx',       // Required positional argument (nullable) - pass null if you have none
+    cache: $cache,                      // Optional: CacheInterface
+    customHttpMiddleware: $middleware   // Optional: a single Guzzle middleware callable
 );
+
+// Without an organization, pass null explicitly:
+$client = new OpenAIClient('sk-xxxxx', null, $cache);
 ```
 
 ### Model Classes
@@ -86,44 +106,63 @@ All OpenAI models require a version constant:
 
 ```php
 <?php
+use Soukicz\Llm\Client\OpenAI\Model\GPT54;
+use Soukicz\Llm\Client\OpenAI\Model\GPT52;
 use Soukicz\Llm\Client\OpenAI\Model\GPT5;
-use Soukicz\Llm\Client\OpenAI\Model\GPT4o;
 use Soukicz\Llm\Client\OpenAI\Model\GPTo3;
-use Soukicz\Llm\Client\OpenAI\Model\GPTo4Mini;
 
 // All models require version parameter
+$model = new GPT54(GPT54::VERSION_2026_03_05);
+$model = new GPT52(GPT52::VERSION_2025_12_11);
 $model = new GPT5(GPT5::VERSION_2025_08_07);
-$model = new GPT4o(GPT4o::VERSION_2024_11_20);
 $model = new GPTo3(GPTo3::VERSION_2025_04_16);
-$model = new GPTo4Mini(GPTo4Mini::VERSION_2025_04_16);
 ```
 
 **Available model classes:**
-- `GPT4o`, `GPT4oMini`
-- `GPT41`, `GPT41Mini`, `GPT41Nano`
-- `GPTo3`, `GPTo4Mini` (reasoning models)
-- `GPT5`, `GPT5Mini`, `GPT5Nano`
+
+| Class | Constructor |
+|-------|-------------|
+| `GPT4o` | `new GPT4o(GPT4o::VERSION_2024_11_20)` |
+| `GPT4oMini` | `new GPT4oMini(GPT4oMini::VERSION_2024_07_18)` |
+| `GPT41` | `new GPT41(GPT41::VERSION_2025_04_14)` |
+| `GPT41Mini` | `new GPT41Mini(GPT41Mini::VERSION_2025_04_14)` |
+| `GPT41Nano` | `new GPT41Nano(GPT41Nano::VERSION_2025_04_14)` |
+| `GPTo3` | `new GPTo3(GPTo3::VERSION_2025_04_16)` |
+| `GPTo4Mini` | `new GPTo4Mini(GPTo4Mini::VERSION_2025_04_16)` |
+| `GPT5` | `new GPT5(GPT5::VERSION_2025_08_07)` |
+| `GPT5Mini` | `new GPT5Mini(GPT5Mini::VERSION_2025_08_07)` |
+| `GPT5Nano` | `new GPT5Nano(GPT5Nano::VERSION_2025_08_07)` |
+| `GPT52` | `new GPT52(GPT52::VERSION_2025_12_11)` |
+| `GPT54` | `new GPT54(GPT54::VERSION_2026_03_05)` |
+| `GPT54Mini` | `new GPT54Mini(GPT54Mini::VERSION_2026_03_17)` |
+| `GPT54Nano` | `new GPT54Nano(GPT54Nano::VERSION_2026_03_17)` |
 
 **Note:** Each model class has version constants defined (e.g., `GPT5::VERSION_2025_08_07`). Check the class for available versions.
 
-### Reasoning Model Configuration
+### Reasoning Configuration
 
-Use `reasoningEffort` or `reasoningConfig` in `LLMRequest`:
+Use the `reasoningConfig` parameter of `LLMRequest`. The `ReasoningEffort` enum (`NONE`, `MINIMAL`, `LOW`, `MEDIUM`, `HIGH`, `EXTRA_HIGH`) works on OpenAI, Anthropic, and Gemini:
 
 ```php
 <?php
 use Soukicz\Llm\Config\ReasoningEffort;
-use Soukicz\Llm\Config\ReasoningBudget;
 
 $request = new LLMRequest(
     model: new GPTo3(GPTo3::VERSION_2025_04_16),
     conversation: $conversation,
-    reasoningEffort: ReasoningEffort::HIGH  // or ReasoningEffort::LOW, ::MEDIUM
+    reasoningConfig: ReasoningEffort::HIGH  // or ::LOW, ::MEDIUM, ...
 );
+```
+
+A token-based `ReasoningBudget` is supported by **Anthropic only** (OpenAI and Gemini throw an exception):
+
+```php
+<?php
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Config\ReasoningBudget;
 
-// Or use token budget
 $request = new LLMRequest(
-    model: new GPTo3(GPTo3::VERSION_2025_04_16),
+    model: new AnthropicClaude46Sonnet(),
     conversation: $conversation,
     reasoningConfig: new ReasoningBudget(10000)
 );
@@ -150,9 +189,9 @@ use Soukicz\Llm\Client\Gemini\GeminiClient;
 $cache = new FileCache(sys_get_temp_dir());
 $client = new GeminiClient(
     apiKey: 'your-api-key',
-    cache: $cache,              // Optional: CacheInterface
-    safetySettings: [],         // Optional: array of safety settings (see below)
-    handler: $handlerStack      // Optional: Guzzle HandlerStack
+    cache: $cache,                        // Optional: CacheInterface
+    customHttpMiddleware: $middleware,    // Optional: a single Guzzle middleware callable
+    safetySettings: []                    // Optional: array of safety settings (see below)
 );
 ```
 
@@ -162,19 +201,33 @@ $client = new GeminiClient(
 <?php
 use Soukicz\Llm\Client\Gemini\Model\Gemini25Pro;
 use Soukicz\Llm\Client\Gemini\Model\Gemini25Flash;
-use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash;
+use Soukicz\Llm\Client\Gemini\Model\Gemini3ProPreview;
 
-// Models don't require version parameters
+// Most models don't require constructor arguments
 $model = new Gemini25Pro();
 $model = new Gemini25Flash();
+$model = new Gemini3ProPreview();
 ```
 
 **Available model classes:**
-- `Gemini20Flash`, `Gemini20FlashLite`
-- `Gemini25Flash`, `Gemini25FlashLite`
-- `Gemini25FlashImagePreview`, `Gemini25FlashImage` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation)
-- `Gemini3ProImagePreview`, `Gemini31FlashImagePreview` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation)
-- `Gemini25Pro`, `Gemini25ProPreview`
+
+| Class | Constructor |
+|-------|-------------|
+| `Gemini20Flash` | `new Gemini20Flash()` |
+| `Gemini20FlashLite` | `new Gemini20FlashLite()` |
+| `Gemini25Flash` | `new Gemini25Flash()` |
+| `Gemini25FlashLite` | `new Gemini25FlashLite()` |
+| `Gemini25Pro` | `new Gemini25Pro()` |
+| `Gemini25ProPreview` | `new Gemini25ProPreview(Gemini25ProPreview::VERSION_03_25)` |
+| `Gemini3ProPreview` | `new Gemini3ProPreview()` |
+| `Gemini25FlashImage` | `new Gemini25FlashImage()` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation) |
+| `Gemini25FlashImagePreview` | `new Gemini25FlashImagePreview()` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation) |
+| `Gemini3ProImagePreview` | `new Gemini3ProImagePreview()` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation) |
+| `Gemini31FlashImagePreview` | `new Gemini31FlashImagePreview()` — [image generation](https://ai.google.dev/gemini-api/docs/image-generation) |
+
+Image generation models accept optional constructor arguments for output control, e.g. `new Gemini25FlashImage(imageAspectRatio: '16:9', imageSize: '2K')`.
+
+Gemini models accept PDF inputs (sent as inline `application/pdf` data) and image inputs through the same `LLMRequest` API as the other providers.
 
 ### Safety Settings Configuration
 
@@ -216,7 +269,6 @@ $client = new GeminiClient(
 
 ### Limitations
 
-- **No PDF support** - Gemini models don't accept PDF inputs through this library
 - **No batch processing** - Must process requests individually
 
 ## OpenAI-Compatible Providers
@@ -235,7 +287,7 @@ $client = new OpenAICompatibleClient(
     apiKey: 'your-api-key',
     baseUrl: 'https://api.provider.com/v1',  // Required: API endpoint
     cache: $cache,                            // Optional: CacheInterface
-    handler: $handlerStack                    // Optional: Guzzle HandlerStack
+    customHttpMiddleware: $middleware         // Optional: a single Guzzle middleware callable
 );
 ```
 
@@ -269,9 +321,9 @@ $client = new OpenAICompatibleClient(
 );
 
 // Use OpenRouter's model naming format
-$model = new LocalModel('anthropic/claude-3.5-sonnet');
-$model = new LocalModel('openai/gpt-4o');
-$model = new LocalModel('meta-llama/llama-3.2-8b-instruct');
+$model = new LocalModel('anthropic/claude-haiku-4.5');
+$model = new LocalModel('openai/gpt-5.2');
+$model = new LocalModel('meta-llama/llama-3.3-70b-instruct');
 ```
 
 #### Ollama (Local)
@@ -305,7 +357,7 @@ $model = new LocalModel('local-model');
 Feature availability depends entirely on the underlying provider/model:
 - **Function calling**: Check if provider supports tools
 - **Multimodal**: Check if model accepts images/PDFs
-- **Batch processing**: Most compatible APIs don't support batching
+- **Batch processing**: `OpenAICompatibleClient` implements `LLMBatchClient`, but it only works if the endpoint provides the OpenAI batch API
 - **Reasoning**: Only if provider offers reasoning models
 
 ## AWS Bedrock
@@ -362,20 +414,18 @@ $gemini = new GeminiClient('key', $cache);
 
 ### Guzzle Middleware
 
-All clients accept a Guzzle `HandlerStack` for custom middleware (logging, retries, etc.):
+All clients accept a single Guzzle middleware callable via `customHttpMiddleware` (it is pushed onto the client's internal `HandlerStack`). Use this for logging, custom retries, etc.:
 
 ```php
 <?php
-use GuzzleHttp\HandlerStack;
 use GuzzleHttp\Middleware;
 
-$stack = HandlerStack::create();
-$stack->push(Middleware::log($logger, $messageFormatter));
+$middleware = Middleware::log($logger, $messageFormatter);
 
 $client = new AnthropicClient(
     apiKey: 'key',
     cache: $cache,
-    handler: $stack
+    customHttpMiddleware: $middleware
 );
 ```
 
@@ -392,7 +442,7 @@ $anthropic = new AnthropicClient(
 
 $openai = new OpenAIClient(
     apiKey: getenv('OPENAI_API_KEY'),
-    apiOrganization: getenv('OPENAI_ORG_ID'),
+    apiOrganization: getenv('OPENAI_ORG_ID') ?: null,
     cache: $cache
 );
 ```
@@ -401,5 +451,5 @@ $openai = new OpenAIClient(
 
 - [Configuration Guide](../guides/configuration.md) - Request configuration options
 - [Multimodal Guide](../guides/multimodal.md) - Using images and PDFs
-- [Reasoning Guide](../guides/reasoning.md) - OpenAI reasoning models
+- [Reasoning Guide](../guides/reasoning.md) - Reasoning configuration for OpenAI, Anthropic, and Gemini
 - [Batch Processing Guide](../guides/batch-processing.md) - Anthropic and OpenAI batch APIs
diff --git a/mkdocs.yml b/mkdocs.yml
index 990acb4..3163f10 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -38,6 +38,7 @@ nav:
   - Getting Started:
       - Quick Start: examples/quick-start.md
   - Examples:
+      - Examples Overview: examples/index.md
       - Quick Start: examples/quick-start.md
       - Best Practices: examples/best-practices.md
       - Tools & Function Calling: examples/tools-and-function-calling.md
@@ -50,7 +51,9 @@ nav:
       - Caching: guides/caching.md
       - Batch Processing: guides/batch-processing.md
       - Multimodal: guides/multimodal.md
+      - Streaming: guides/streaming.md
       - Reasoning: guides/reasoning.md
+      - Structured Output: guides/structured-output.md
       - Feedback Loops: guides/feedback-loops.md
   - Providers:
       - Overview: providers/README.md
diff --git a/src/Cache/FileCache.php b/src/Cache/FileCache.php
index fba3d04..04b25e8 100644
--- a/src/Cache/FileCache.php
+++ b/src/Cache/FileCache.php
@@ -22,12 +22,22 @@ public function fetch(RequestInterface $request): ?ResponseInterface {
             return null;
         }
 
-        return $this->responseFromJson(file_get_contents($path));
+        $contents = @file_get_contents($path);
+        if ($contents === false) {
+            return null;
+        }
+
+        try {
+            return $this->responseFromJson($contents);
+        } catch (\JsonException) {
+            // Treat a corrupted cache file as a cache miss
+            return null;
+        }
     }
 
     public function store(RequestInterface $request, ResponseInterface $response): void {
         $key = $this->getCacheKey($request);
-        file_put_contents($this->getPath($key), $this->responseToJson($response));
+        file_put_contents($this->getPath($key), $this->responseToJson($response), LOCK_EX);
     }
 
     public function invalidate(RequestInterface $request): void {
diff --git a/src/Client/Anthropic/AnthropicEncoder.php b/src/Client/Anthropic/AnthropicEncoder.php
index d4ff4eb..1c7c0c9 100644
--- a/src/Client/Anthropic/AnthropicEncoder.php
+++ b/src/Client/Anthropic/AnthropicEncoder.php
@@ -196,6 +196,8 @@ public function encodeRequest(LLMRequest $request): array {
             'model' => $request->getModel()->getCode(),
         ];
 
+        $outputConfig = [];
+
         $reasoningConfig = $request->getReasoningConfig();
         if ($reasoningConfig) {
             if ($reasoningConfig instanceof ReasoningBudget) {
@@ -208,14 +210,12 @@ public function encodeRequest(LLMRequest $request): array {
                     $options['thinking'] = [
                         'type' => 'adaptive',
                     ];
-                    $outputConfig = $options['output_config'] ?? [];
                     $outputConfig['effort'] = match ($reasoningConfig) {
                         ReasoningEffort::MINIMAL, ReasoningEffort::LOW => 'low',
                         ReasoningEffort::MEDIUM => 'medium',
                         ReasoningEffort::HIGH => 'high',
                         ReasoningEffort::EXTRA_HIGH => 'max',
                     };
-                    $options['output_config'] = $outputConfig;
                 }
             } else {
                 throw new \InvalidArgumentException('Unsupported reasoning config type');
@@ -224,11 +224,13 @@ public function encodeRequest(LLMRequest $request): array {
 
         $structuredOutputConfig = $request->getStructuredOutputConfig();
         if ($structuredOutputConfig !== null) {
-            $outputConfig = $options['output_config'] ?? [];
             $outputConfig['format'] = [
                 'type' => 'json_schema',
                 'schema' => self::normalizeSchemaForStrictMode($structuredOutputConfig->getSchema()),
             ];
+        }
+
+        if (!empty($outputConfig)) {
             $options['output_config'] = $outputConfig;
         }
 
@@ -298,7 +300,8 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse
         $outputPrice = $response['usage']['output_tokens'] * ($request->getModel()->getOutputPricePerMillionTokens() / 1000 / 1000);
 
         $inputPrice += $cacheInputTokens * ($request->getModel()->getCachedInputPricePerMillionTokens() / 1000 / 1000);
-        $outputPrice += $cacheReadInputTokens * ($request->getModel()->getCachedOutputPricePerMillionTokens() / 1000 / 1000);
+        // Cache reads are input tokens, so their cost belongs to the input bucket
+        $inputPrice += $cacheReadInputTokens * ($request->getModel()->getCachedOutputPricePerMillionTokens() / 1000 / 1000);
 
         $request = $request
             ->withCost(
diff --git a/src/Client/Gemini/GeminiEncoder.php b/src/Client/Gemini/GeminiEncoder.php
index 4993004..b660b06 100644
--- a/src/Client/Gemini/GeminiEncoder.php
+++ b/src/Client/Gemini/GeminiEncoder.php
@@ -3,6 +3,7 @@
 namespace Soukicz\Llm\Client\Gemini;
 
 use Soukicz\Llm\Client\Gemini\Model\GeminiImageModel;
+use Soukicz\Llm\Client\Gemini\Model\GeminiModel;
 use Soukicz\Llm\Client\ModelEncoder;
 use Soukicz\Llm\Client\ModelResponse;
 use Soukicz\Llm\Client\StopReason;
@@ -10,6 +11,7 @@
 use Soukicz\Llm\LLMRequest;
 use Soukicz\Llm\LLMResponse;
 use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Message\LLMMessageArrayData;
 use Soukicz\Llm\Message\LLMMessageContents;
 use Soukicz\Llm\Message\LLMMessageImage;
 use Soukicz\Llm\Message\LLMMessagePdf;
@@ -25,6 +27,7 @@ class GeminiEncoder implements ModelEncoder {
     public function encodeRequest(LLMRequest $request): array {
         $contents = [];
         $systemInstruction = null;
+        $toolNamesById = [];
 
         foreach ($request->getConversation()->getMessages() as $message) {
             if ($message->isSystem()) {
@@ -53,7 +56,9 @@ public function encodeRequest(LLMRequest $request): array {
                         ],
                     ];
                 } elseif ($messageContent instanceof LLMMessageToolUse) {
-                    // Function call in Gemini format
+                    // Function call in Gemini format - remember the name so the matching
+                    // function response can reference it (Gemini correlates by name, not ID)
+                    $toolNamesById[$messageContent->getId()] = $messageContent->getName();
                     $contents[] = [
                         'role' => 'model',
                         'parts' => [
@@ -69,14 +74,12 @@ public function encodeRequest(LLMRequest $request): array {
                 } elseif ($messageContent instanceof LLMMessageToolResult) {
                     // Function response in Gemini format
                     $contents[] = [
-                        'role' => 'function',
+                        'role' => 'user',
                         'parts' => [
                             [
                                 'function_response' => [
-                                    'name' => 'function_' . $messageContent->getId(), // Create a name from ID
-                                    'response' => [
-                                        'content' => $messageContent->getContent(),
-                                    ],
+                                    'name' => $toolNamesById[$messageContent->getId()] ?? $messageContent->getId(),
+                                    'response' => self::encodeToolResultResponse($messageContent),
                                 ],
                             ],
                         ],
@@ -84,8 +87,15 @@ public function encodeRequest(LLMRequest $request): array {
                     continue 2;
                 } elseif ($messageContent instanceof LLMMessageStructuredData) {
                     $parts[] = ['text' => $messageContent->getRawJson()];
+                } elseif ($messageContent instanceof LLMMessageArrayData) {
+                    $parts[] = ['text' => json_encode($messageContent->getData(), JSON_THROW_ON_ERROR)];
                 } elseif ($messageContent instanceof LLMMessagePdf) {
-                    throw new \InvalidArgumentException('PDF content type not supported for Gemini');
+                    $parts[] = [
+                        'inline_data' => [
+                            'mime_type' => 'application/pdf',
+                            'data' => $messageContent->getData(),
+                        ],
+                    ];
                 } else {
                     throw new \InvalidArgumentException('Unsupported message content type for Gemini');
                 }
@@ -143,6 +153,16 @@ public function encodeRequest(LLMRequest $request): array {
                     $requestData['generationConfig']['thinkingConfig'] = [
                         'thinkingBudget' => 0,
                     ];
+                } elseif ($model instanceof GeminiModel && !$model->supportsThinkingLevel()) {
+                    // Gemini 2.x models reject thinkingLevel and only accept a token budget
+                    $requestData['generationConfig']['thinkingConfig'] = [
+                        'thinkingBudget' => match ($reasoningConfig) {
+                            ReasoningEffort::MINIMAL => 512,
+                            ReasoningEffort::LOW => 1024,
+                            ReasoningEffort::MEDIUM => 8192,
+                            ReasoningEffort::HIGH, ReasoningEffort::EXTRA_HIGH => 24576,
+                        },
+                    ];
                 } else {
                     $requestData['generationConfig']['thinkingConfig'] = [
                         'thinkingLevel' => match ($reasoningConfig) {
@@ -159,18 +179,18 @@ public function encodeRequest(LLMRequest $request): array {
         }
 
         if (!empty($request->getTools())) {
-            $requestData['tools'] = [];
+            // Gemini expects all function declarations in a single tools entry
+            $functionDeclarations = [];
             foreach ($request->getTools() as $tool) {
-                $requestData['tools'][] = [
-                    'functionDeclarations' => [
-                        [
-                            'name' => $tool->getName(),
-                            'description' => $tool->getDescription(),
-                            'parameters' => $tool->getInputSchema(),
-                        ],
-                    ],
+                $functionDeclarations[] = [
+                    'name' => $tool->getName(),
+                    'description' => $tool->getDescription(),
+                    'parameters' => $tool->getInputSchema(),
                 ];
             }
+            $requestData['tools'] = [
+                ['functionDeclarations' => $functionDeclarations],
+            ];
         }
 
         return $requestData;
@@ -228,11 +248,7 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse
 
         if (isset($response['usageMetadata'])) {
             $promptTokenCount = $response['usageMetadata']['promptTokenCount'];
-            if ($stopReason === StopReason::SAFETY && !isset($response['usageMetadata']['candidatesTokenCount'])) {
-                $outputTokenCount = 0;
-            } else {
-                $outputTokenCount = $response['usageMetadata']['candidatesTokenCount'];
-            }
+            $outputTokenCount = $response['usageMetadata']['candidatesTokenCount'] ?? 0;
 
             $inputPrice = $promptTokenCount * ($model->getInputPricePerMillionTokens() / 1_000_000);
             $outputPrice = $outputTokenCount * ($model->getOutputPricePerMillionTokens() / 1_000_000);
@@ -257,6 +273,27 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse
         );
     }
 
+    /**
+     * Convert tool result contents to a JSON-friendly response payload for Gemini.
+     */
+    private static function encodeToolResultResponse(LLMMessageToolResult $toolResult): array {
+        $contents = $toolResult->getContent();
+        if (count($contents) === 1 && $contents[0] instanceof LLMMessageArrayData) {
+            return $contents[0]->getData();
+        }
+
+        $texts = [];
+        foreach ($contents as $content) {
+            if ($content instanceof LLMMessageText) {
+                $texts[] = $content->getText();
+            } elseif ($content instanceof LLMMessageArrayData) {
+                $texts[] = json_encode($content->getData(), JSON_THROW_ON_ERROR);
+            }
+        }
+
+        return ['content' => implode("\n", $texts)];
+    }
+
     /**
      * Normalize a JSON Schema for Gemini by stripping unsupported properties.
      * Gemini does not support "additionalProperties" — it is silently removed.
diff --git a/src/Client/Gemini/Model/Gemini31FlashImagePreview.php b/src/Client/Gemini/Model/Gemini31FlashImagePreview.php
index dfcb5c0..d639e90 100644
--- a/src/Client/Gemini/Model/Gemini31FlashImagePreview.php
+++ b/src/Client/Gemini/Model/Gemini31FlashImagePreview.php
@@ -21,6 +21,10 @@ public function getImageSize(): ?string {
         return $this->imageSize;
     }
 
+    public function supportsThinkingLevel(): bool {
+        return true;
+    }
+
     public function getCode(): string {
         return 'gemini-3.1-flash-image-preview';
     }
diff --git a/src/Client/Gemini/Model/Gemini3ProImagePreview.php b/src/Client/Gemini/Model/Gemini3ProImagePreview.php
index 29cdda8..91d1544 100644
--- a/src/Client/Gemini/Model/Gemini3ProImagePreview.php
+++ b/src/Client/Gemini/Model/Gemini3ProImagePreview.php
@@ -20,6 +20,10 @@ public function getImageSize(): ?string {
         return $this->imageSize;
     }
 
+    public function supportsThinkingLevel(): bool {
+        return true;
+    }
+
     public function getCode(): string {
         return 'gemini-3-pro-image-preview';
     }
diff --git a/src/Client/Gemini/Model/Gemini3ProPreview.php b/src/Client/Gemini/Model/Gemini3ProPreview.php
index 61c28d4..867921b 100644
--- a/src/Client/Gemini/Model/Gemini3ProPreview.php
+++ b/src/Client/Gemini/Model/Gemini3ProPreview.php
@@ -6,6 +6,10 @@
  * @see https://ai.google.dev/gemini-api/docs/pricing
  */
 class Gemini3ProPreview extends GeminiModel {
+    public function supportsThinkingLevel(): bool {
+        return true;
+    }
+
     public function getCode(): string {
         return 'gemini-3-pro-preview';
     }
diff --git a/src/Client/Gemini/Model/GeminiModel.php b/src/Client/Gemini/Model/GeminiModel.php
index d2f311a..3f6df66 100644
--- a/src/Client/Gemini/Model/GeminiModel.php
+++ b/src/Client/Gemini/Model/GeminiModel.php
@@ -5,4 +5,11 @@
 use Soukicz\Llm\Client\ModelInterface;
 
 abstract class GeminiModel implements ModelInterface {
+    /**
+     * Whether the model accepts thinkingConfig.thinkingLevel (Gemini 3.x and newer).
+     * Older models (2.x) only support thinkingConfig.thinkingBudget.
+     */
+    public function supportsThinkingLevel(): bool {
+        return false;
+    }
 }
diff --git a/src/Client/LLMAgentClient.php b/src/Client/LLMAgentClient.php
index faf1111..cb3103a 100644
--- a/src/Client/LLMAgentClient.php
+++ b/src/Client/LLMAgentClient.php
@@ -62,34 +62,48 @@ private function processToolUseResponse(LLMResponse $response, LLMClient $client
         $toolResponseContents = [];
 
         foreach ($response->getConversation()->getLastMessage()->getContents() as $content) {
-            if ($content instanceof LLMMessageToolUse) {
-                foreach ($request->getTools() as $tool) {
-                    if ($tool->getName() === $content->getName()) {
-                        $input = $content->getInput();
-                        $noContent = empty($input) && empty($tool->getInputSchema()['required']);
-
-                        if (!$noContent) {
-                            try {
-                                Schema::import(json_decode(json_encode($tool->getInputSchema())))->in(json_decode(json_encode($input)));
-                            } catch (Exception $e) {
-                                $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult(
-                                    $content->getId(),
-                                    LLMMessageContents::fromErrorString('ERROR: Input is not matching expected schema: ' . $e->getMessage())
-                                ));
-                                continue;
-                            }
-                        }
-
-                        $toolResponse = $tool->handle($input);
-                        if ($toolResponse instanceof LLMMessageContents) {
-                            $toolResponse = Create::promiseFor($toolResponse);
-                        }
-                        $toolResponseContents[] = $toolResponse->then(function (LLMMessageContents $response) use ($content) {
-                            return new LLMMessageToolResult($content->getId(), $response);
-                        });
-                    }
+            if (!$content instanceof LLMMessageToolUse) {
+                continue;
+            }
+
+            $tool = null;
+            foreach ($request->getTools() as $candidateTool) {
+                if ($candidateTool->getName() === $content->getName()) {
+                    $tool = $candidateTool;
+                    break;
                 }
             }
+
+            if ($tool === null) {
+                $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult(
+                    $content->getId(),
+                    LLMMessageContents::fromErrorString('ERROR: Tool "' . $content->getName() . '" is not available')
+                ));
+                continue;
+            }
+
+            $input = $content->getInput();
+            $noContent = empty($input) && empty($tool->getInputSchema()['required']);
+
+            if (!$noContent) {
+                try {
+                    Schema::import(json_decode(json_encode($tool->getInputSchema())))->in(json_decode(json_encode($input)));
+                } catch (Exception $e) {
+                    $toolResponseContents[] = Create::promiseFor(new LLMMessageToolResult(
+                        $content->getId(),
+                        LLMMessageContents::fromErrorString('ERROR: Input is not matching expected schema: ' . $e->getMessage())
+                    ));
+                    continue;
+                }
+            }
+
+            $toolResponse = $tool->handle($input);
+            if ($toolResponse instanceof LLMMessageContents) {
+                $toolResponse = Create::promiseFor($toolResponse);
+            }
+            $toolResponseContents[] = $toolResponse->then(function (LLMMessageContents $response) use ($content) {
+                return new LLMMessageToolResult($content->getId(), $response);
+            });
         }
 
         $newRequest = $response->getRequest()->withMessage(LLMMessage::createFromUser(new LLMMessageContents(Utils::unwrap($toolResponseContents))));
diff --git a/src/Client/OpenAI/AbstractOpenAIClient.php b/src/Client/OpenAI/AbstractOpenAIClient.php
index 531019e..e6acf55 100644
--- a/src/Client/OpenAI/AbstractOpenAIClient.php
+++ b/src/Client/OpenAI/AbstractOpenAIClient.php
@@ -225,11 +225,15 @@ public function retrieveBatch(string $batchId): ?array {
             $result = json_decode($row, true, 512, JSON_THROW_ON_ERROR);
             $content = '';
             foreach ($result['response']['body']['choices'] as $contentPart) {
-                $content = $contentPart['message']['content'];
-                if (is_string($content)) {
-                    $content .= $content;
-                } elseif ($content['type'] === 'text') {
-                    $content .= $content['text'];
+                $messageContent = $contentPart['message']['content'];
+                if (is_string($messageContent)) {
+                    $content .= $messageContent;
+                } elseif (is_array($messageContent)) {
+                    foreach ($messageContent as $part) {
+                        if (($part['type'] ?? null) === 'text') {
+                            $content .= $part['text'];
+                        }
+                    }
                 }
             }
             $responses[$result['custom_id']] = $content;
diff --git a/src/Client/OpenAI/OpenAIEncoder.php b/src/Client/OpenAI/OpenAIEncoder.php
index b4512e3..c030b9e 100644
--- a/src/Client/OpenAI/OpenAIEncoder.php
+++ b/src/Client/OpenAI/OpenAIEncoder.php
@@ -148,7 +148,7 @@ public function encodeRequest(LLMRequest $request): array {
         $reasoningConfig = $request->getReasoningConfig();
         if ($reasoningConfig) {
             if ($reasoningConfig instanceof ReasoningEffort) {
-                $reasoningValue = match ($reasoningConfig) {
+                $requestData['reasoning_effort'] = match ($reasoningConfig) {
                     ReasoningEffort::NONE => 'none',
                     ReasoningEffort::LOW => 'low',
                     ReasoningEffort::MINIMAL => 'minimal',
@@ -156,9 +156,6 @@ public function encodeRequest(LLMRequest $request): array {
                     ReasoningEffort::HIGH => 'high',
                     ReasoningEffort::EXTRA_HIGH => 'xhigh',
                 };
-                if ($reasoningValue !== null) {
-                    $requestData['reasoning_effort'] = $reasoningValue;
-                }
             } else {
                 throw new InvalidArgumentException('Unsupported reasoning config type');
             }
@@ -204,8 +201,10 @@ public function decodeResponse(LLMRequest $request, ModelResponse $modelResponse
         if (isset($response['usage'])) {
             $promptTokens = $response['usage']['prompt_tokens'];
             $completionTokens = $response['usage']['completion_tokens'];
+            $cachedTokens = $response['usage']['prompt_tokens_details']['cached_tokens'] ?? 0;
 
-            $inputPrice = $promptTokens * ($model->getInputPricePerMillionTokens() / 1_000_000);
+            $inputPrice = ($promptTokens - $cachedTokens) * ($model->getInputPricePerMillionTokens() / 1_000_000)
+                + $cachedTokens * ($model->getCachedInputPricePerMillionTokens() / 1_000_000);
             $outputPrice = $completionTokens * ($model->getOutputPricePerMillionTokens() / 1_000_000);
 
             $request = $request->withCost($promptTokens, $completionTokens, $inputPrice, $outputPrice);
diff --git a/src/Http/HttpClientFactory.php b/src/Http/HttpClientFactory.php
index c3f16f3..3b3737b 100644
--- a/src/Http/HttpClientFactory.php
+++ b/src/Http/HttpClientFactory.php
@@ -3,6 +3,7 @@
 namespace Soukicz\Llm\Http;
 
 use GuzzleHttp\Client;
+use GuzzleHttp\Exception\ConnectException;
 use GuzzleHttp\HandlerStack;
 use GuzzleHttp\Middleware;
 use GuzzleHttp\Promise\Create;
@@ -66,15 +67,21 @@ function (ResponseInterface $response) use ($request, $cache, $requestStart) {
     }
 
     private static function addRetryMiddleware(HandlerStack $handler): void {
-        $decider = static function (int $retries, RequestInterface $request, ?ResponseInterface $response = null): bool {
+        $decider = static function (int $retries, RequestInterface $request, ?ResponseInterface $response = null, ?\Throwable $exception = null): bool {
+            if ($retries >= self::MAX_RETRIES) {
+                return false;
+            }
+            if ($exception instanceof ConnectException) {
+                return true;
+            }
+
             return
-                $retries < self::MAX_RETRIES
-                && null !== $response
+                null !== $response
                 && in_array($response->getStatusCode(), [429, 529, 500, 502, 503, 504], true);
         };
 
-        $delay = static function (int $retries, ResponseInterface $response): int {
-            if (!$response->hasHeader('Retry-After')) {
+        $delay = static function (int $retries, ?ResponseInterface $response = null): int {
+            if ($response === null || !$response->hasHeader('Retry-After')) {
                 return RetryMiddleware::exponentialDelay($retries);
             }
 
diff --git a/src/LLMConversation.php b/src/LLMConversation.php
index c0032b1..cb95278 100644
--- a/src/LLMConversation.php
+++ b/src/LLMConversation.php
@@ -47,6 +47,10 @@ public static function fromJson(array $data): self {
     }
 
     public function getLastMessage(): LLMMessage {
+        if (empty($this->messages)) {
+            throw new \UnderflowException('Conversation has no messages');
+        }
+
         return $this->messages[array_key_last($this->messages)];
     }
 }
diff --git a/src/LLMRequest.php b/src/LLMRequest.php
index e1c1937..06b4f2a 100644
--- a/src/LLMRequest.php
+++ b/src/LLMRequest.php
@@ -100,7 +100,7 @@ public function withCost(int $inputTokens, int $outputTokens, float $previousInp
 
         $clone->previousInputTokens += $inputTokens;
         $clone->previousOutputTokens += $outputTokens;
-        if ($outputTokens > $this->previousMaximumOutputTokens) {
+        if ($outputTokens > $clone->previousMaximumOutputTokens) {
             $clone->previousMaximumOutputTokens = $outputTokens;
         }
         $clone->previousInputCostUSD += $previousInputCostUSD;
diff --git a/src/MarkdownFormatter.php b/src/MarkdownFormatter.php
index 073fa4a..2e4def7 100644
--- a/src/MarkdownFormatter.php
+++ b/src/MarkdownFormatter.php
@@ -63,7 +63,7 @@ public function responseToMarkdown(LLMRequest|LLMResponse $requestOrResponse): s
             if ($message->isUser()) {
                 $markdown .= '## User:' . "\n";
             } elseif ($message->isSystem()) {
-                $markdown .= '## User:' . "\n";
+                $markdown .= '## System:' . "\n";
             } elseif ($message->isAssistant()) {
                 $markdown .= '## Assistant:' . "\n";
             } else {
@@ -93,7 +93,7 @@ public function responseToMarkdown(LLMRequest|LLMResponse $requestOrResponse): s
             $markdown .= '----------------------';
             $markdown .= "\n\n";
 
-            $price = $response->getInputPriceUsd() + $response->getOutputPriceUsd();
+            $price = ($response->getInputPriceUsd() ?? 0.0) + ($response->getOutputPriceUsd() ?? 0.0);
             $markdown .= "##### Total stats\n\n";
             $markdown .= 'Finished in ' . number_format($response->getTotalTimeMs() / 1000, 3, '.') . 's' .
                 ', prompt tokens: ' . $response->getInputTokens() .
diff --git a/src/Message/LLMMessageContents.php b/src/Message/LLMMessageContents.php
index b9a428e..f911097 100644
--- a/src/Message/LLMMessageContents.php
+++ b/src/Message/LLMMessageContents.php
@@ -29,9 +29,13 @@ public function getMessages(): array {
     }
 
     public static function fromJson(array $data): self {
+        // Current format wraps items to preserve the isError flag; legacy format was a plain list
+        $items = $data['items'] ?? $data;
+        $isError = $data['isError'] ?? false;
+
         /** @var LLMMessageContent[] $content */
         $content = [];
-        foreach ($data as $item) {
+        foreach ($items as $item) {
             $class = $item['class'];
             if (!is_subclass_of($class, LLMMessageContent::class)) {
                 throw new InvalidArgumentException("Class $class does not implement LLMMessageContent");
@@ -46,11 +50,14 @@ public static function fromJson(array $data): self {
             $content[] = $result;
         }
 
-        return new self($content);
+        return new self($content, $isError);
     }
 
     public function jsonSerialize(): array {
-        return array_map(static fn(LLMMessageContent $content) => ['class' => $content::class, 'data' => $content], $this->messages);
+        return [
+            'isError' => $this->isError,
+            'items' => array_map(static fn(LLMMessageContent $content) => ['class' => $content::class, 'data' => $content], $this->messages),
+        ];
     }
 
     public static function fromString(string $content): self {
diff --git a/src/Message/LLMMessageReasoning.php b/src/Message/LLMMessageReasoning.php
index dd023d1..d5507a0 100644
--- a/src/Message/LLMMessageReasoning.php
+++ b/src/Message/LLMMessageReasoning.php
@@ -3,7 +3,7 @@
 namespace Soukicz\Llm\Message;
 
 class LLMMessageReasoning implements LLMMessageContent {
-    public function __construct(private string $text, private readonly ?string $signature, private readonly bool $cached = false) {
+    public function __construct(private readonly string $text, private readonly ?string $signature, private readonly bool $cached = false) {
     }
 
     public function getText(): string {
diff --git a/src/Message/LLMMessageText.php b/src/Message/LLMMessageText.php
index 187d69d..0170448 100644
--- a/src/Message/LLMMessageText.php
+++ b/src/Message/LLMMessageText.php
@@ -3,7 +3,7 @@
 namespace Soukicz\Llm\Message;
 
 class LLMMessageText implements LLMMessageContent {
-    public function __construct(private string $text, private readonly bool $cached = false) {
+    public function __construct(private readonly string $text, private readonly bool $cached = false) {
     }
 
     public function getText(): string {
diff --git a/src/Stream/GeminiStreamAccumulator.php b/src/Stream/GeminiStreamAccumulator.php
index 086e87b..e5c02ae 100644
--- a/src/Stream/GeminiStreamAccumulator.php
+++ b/src/Stream/GeminiStreamAccumulator.php
@@ -76,7 +76,6 @@ public static function consume(StreamInterface $stream, StreamListenerInterface
         $allParts = [];
         $finishReason = null;
         $usageMetadata = [];
-        $blockIndex = 0;
 
         $listener->onStreamEvent(new StreamEvent(
             type: StreamEventType::MESSAGE_START,
@@ -102,48 +101,59 @@ public static function consume(StreamInterface $stream, StreamListenerInterface
                 $finishReason = $candidate['finishReason'];
             }
 
-            // Process parts
+            // Process parts. Gemini streams text in many small parts that all belong to
+            // the same logical block - consecutive parts of the same kind are merged so
+            // the reconstructed response matches the non-streaming format (a single text
+            // part) and decodeResponse()/getLastText() see the full text.
             if (isset($candidate['content']['parts'])) {
                 foreach ($candidate['content']['parts'] as $part) {
-                    if (isset($part['text'])) {
-                        $allParts[] = $part;
+                    $lastIndex = count($allParts) - 1;
+                    if (isset($part['text']) && !isset($part['thought'])) {
+                        if ($lastIndex >= 0 && isset($allParts[$lastIndex]['text']) && !isset($allParts[$lastIndex]['thought'])) {
+                            $allParts[$lastIndex]['text'] .= $part['text'];
+                        } else {
+                            $allParts[] = $part;
+                            $lastIndex++;
+                        }
                         $listener->onStreamEvent(new StreamEvent(
                             type: StreamEventType::TEXT_DELTA,
-                            blockIndex: $blockIndex,
+                            blockIndex: $lastIndex,
                             delta: $part['text'],
                         ));
-                        $blockIndex++;
                     } elseif (isset($part['thought'])) {
-                        $allParts[] = $part;
+                        if ($lastIndex >= 0 && isset($allParts[$lastIndex]['thought']) && is_string($allParts[$lastIndex]['thought']) && is_string($part['thought'])) {
+                            $allParts[$lastIndex]['thought'] .= $part['thought'];
+                        } else {
+                            $allParts[] = $part;
+                            $lastIndex++;
+                        }
                         $listener->onStreamEvent(new StreamEvent(
                             type: StreamEventType::THINKING_DELTA,
-                            blockIndex: $blockIndex,
-                            delta: $part['thought'],
+                            blockIndex: $lastIndex,
+                            delta: is_string($part['thought']) ? $part['thought'] : ($part['text'] ?? ''),
                         ));
-                        $blockIndex++;
                     } elseif (isset($part['functionCall'])) {
                         $allParts[] = $part;
+                        $lastIndex++;
                         $listener->onStreamEvent(new StreamEvent(
                             type: StreamEventType::TOOL_USE_START,
-                            blockIndex: $blockIndex,
+                            blockIndex: $lastIndex,
                             toolName: $part['functionCall']['name'],
                         ));
                         // Emit the full input as a single delta since Gemini sends complete tool calls
                         $inputJson = json_encode($part['functionCall']['args'] ?? [], JSON_THROW_ON_ERROR);
                         $listener->onStreamEvent(new StreamEvent(
                             type: StreamEventType::TOOL_INPUT_DELTA,
-                            blockIndex: $blockIndex,
+                            blockIndex: $lastIndex,
                             delta: $inputJson,
                             toolName: $part['functionCall']['name'],
                         ));
                         $listener->onStreamEvent(new StreamEvent(
                             type: StreamEventType::CONTENT_BLOCK_STOP,
-                            blockIndex: $blockIndex,
+                            blockIndex: $lastIndex,
                         ));
-                        $blockIndex++;
                     } elseif (isset($part['inlineData'])) {
                         $allParts[] = $part;
-                        $blockIndex++;
                     }
                 }
             }
diff --git a/src/Tool/TextEditor/TextEditorTool.php b/src/Tool/TextEditor/TextEditorTool.php
index 4ca0fa6..d54a007 100644
--- a/src/Tool/TextEditor/TextEditorTool.php
+++ b/src/Tool/TextEditor/TextEditorTool.php
@@ -236,14 +236,14 @@ protected function replaceInFile(string $path, string $oldString, string $newStr
             }
 
             if ($matchCount > 1) {
-                // Find line numbers where old_str appears (matches reference implementation)
-                $contentLines = explode("\n", $content);
+                // Find line numbers where old_str appears (works for multi-line strings too)
                 $lineNumbers = [];
-                foreach ($contentLines as $idx => $line) {
-                    if (str_contains($line, $oldString)) {
-                        $lineNumbers[] = $idx + 1;
-                    }
+                $offset = 0;
+                while (($pos = strpos($content, $oldString, $offset)) !== false) {
+                    $lineNumbers[] = substr_count($content, "\n", 0, $pos) + 1;
+                    $offset = $pos + 1;
                 }
+                $lineNumbers = array_values(array_unique($lineNumbers));
 
                 return LLMMessageContents::fromErrorString(
                     "No replacement was performed. Multiple occurrences of old_str `$oldString` in lines [" . implode(', ', $lineNumbers) . "]. Please ensure it is unique"
diff --git a/tests/Cache/FileCacheTest.php b/tests/Cache/FileCacheTest.php
new file mode 100644
index 0000000..4663d4c
--- /dev/null
+++ b/tests/Cache/FileCacheTest.php
@@ -0,0 +1,95 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Cache;
+
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Cache\FileCache;
+
+class FileCacheTest extends TestCase {
+    private string $cacheDir;
+    private FileCache $cache;
+
+    protected function setUp(): void {
+        $this->cacheDir = sys_get_temp_dir() . '/llm-file-cache-test-' . uniqid();
+        mkdir($this->cacheDir);
+        $this->cache = new FileCache($this->cacheDir);
+    }
+
+    protected function tearDown(): void {
+        foreach (glob($this->cacheDir . '/*') ?: [] as $file) {
+            unlink($file);
+        }
+        rmdir($this->cacheDir);
+    }
+
+    private function createRequest(string $body = '{"prompt":"hello"}'): Request {
+        return new Request('POST', 'https://api.example.com/v1/messages', [], $body);
+    }
+
+    public function testConstructorRejectsMissingDirectory(): void {
+        $this->expectException(\RuntimeException::class);
+
+        new FileCache($this->cacheDir . '/does-not-exist');
+    }
+
+    public function testFetchReturnsNullOnMiss(): void {
+        $this->assertNull($this->cache->fetch($this->createRequest()));
+    }
+
+    public function testStoreFetchRoundTripPreservesResponse(): void {
+        $request = $this->createRequest();
+        $response = new Response(200, ['Content-Type' => 'application/json', 'X-Custom' => 'abc'], '{"answer":42}');
+
+        $this->cache->store($request, $response);
+        $cached = $this->cache->fetch($request);
+
+        $this->assertNotNull($cached);
+        $this->assertSame(200, $cached->getStatusCode());
+        $this->assertSame('{"answer":42}', (string) $cached->getBody());
+        $this->assertSame('application/json', $cached->getHeaderLine('Content-Type'));
+        $this->assertSame('abc', $cached->getHeaderLine('X-Custom'));
+    }
+
+    public function testDifferentRequestBodiesGetDifferentEntries(): void {
+        $requestA = $this->createRequest('{"prompt":"a"}');
+        $requestB = $this->createRequest('{"prompt":"b"}');
+
+        $this->cache->store($requestA, new Response(200, [], 'response A'));
+        $this->cache->store($requestB, new Response(200, [], 'response B'));
+
+        $this->assertSame('response A', (string) $this->cache->fetch($requestA)->getBody());
+        $this->assertSame('response B', (string) $this->cache->fetch($requestB)->getBody());
+    }
+
+    public function testInvalidateRemovesEntry(): void {
+        $request = $this->createRequest();
+        $this->cache->store($request, new Response(200, [], 'data'));
+        $this->assertNotNull($this->cache->fetch($request));
+
+        $this->cache->invalidate($request);
+
+        $this->assertNull($this->cache->fetch($request));
+    }
+
+    public function testInvalidateOnMissingEntryIsSilent(): void {
+        $this->cache->invalidate($this->createRequest());
+
+        $this->assertNull($this->cache->fetch($this->createRequest()));
+    }
+
+    public function testCorruptedCacheFileIsTreatedAsMiss(): void {
+        $request = $this->createRequest();
+        $this->cache->store($request, new Response(200, [], 'data'));
+
+        // Corrupt the single stored file
+        $files = glob($this->cacheDir . '/*.json');
+        $this->assertCount(1, $files);
+        file_put_contents($files[0], 'this is not json {');
+
+        $this->assertNull($this->cache->fetch($request));
+    }
+}
diff --git a/tests/Client/Anthropic/AnthropicBatchTest.php b/tests/Client/Anthropic/AnthropicBatchTest.php
new file mode 100644
index 0000000..9950cfc
--- /dev/null
+++ b/tests/Client/Anthropic/AnthropicBatchTest.php
@@ -0,0 +1,127 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Client\Anthropic;
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Handler\MockHandler;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Middleware;
+use GuzzleHttp\Psr7\Response;
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\Anthropic\AnthropicClient;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+
+class AnthropicBatchTest extends TestCase {
+    /** @var array<int, \Psr\Http\Message\RequestInterface> */
+    private array $sentRequests = [];
+
+    private function createClientWithResponses(array $responses): AnthropicClient {
+        $mock = new MockHandler($responses);
+        $stack = HandlerStack::create($mock);
+        $this->sentRequests = [];
+        $history = Middleware::history($this->sentRequests);
+        $stack->push($history);
+
+        $client = new AnthropicClient('test-api-key');
+
+        // Inject the mocked HTTP client into the lazily initialized private property
+        $reflection = new \ReflectionProperty(AnthropicClient::class, 'httpClient');
+        $reflection->setValue($client, new Client(['handler' => $stack]));
+
+        return $client;
+    }
+
+    private function createRequest(string $prompt): LLMRequest {
+        return new LLMRequest(
+            model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001),
+            conversation: new LLMConversation([LLMMessage::createFromUserString($prompt)]),
+        );
+    }
+
+    public function testCreateBatchEncodesRequestsWithCustomIds(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode(['id' => 'msgbatch_123'], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $batchId = $client->createBatch([
+            'first' => $this->createRequest('Hello'),
+            'second' => $this->createRequest('World'),
+        ]);
+
+        $this->assertSame('msgbatch_123', $batchId);
+
+        $this->assertCount(1, $this->sentRequests);
+        $sent = $this->sentRequests[0]['request'];
+        $this->assertSame('POST', $sent->getMethod());
+        $this->assertSame('https://api.anthropic.com/v1/messages/batches', (string) $sent->getUri());
+        $this->assertSame('test-api-key', $sent->getHeaderLine('x-api-key'));
+
+        $payload = json_decode((string) $sent->getBody(), true, 512, JSON_THROW_ON_ERROR);
+        $this->assertCount(2, $payload['requests']);
+        $this->assertSame('first', $payload['requests'][0]['custom_id']);
+        $this->assertSame('second', $payload['requests'][1]['custom_id']);
+        $this->assertSame('Hello', $payload['requests'][0]['params']['messages'][0]['content'][0]['text']);
+        $this->assertSame('claude-haiku-4-5-20251001', $payload['requests'][0]['params']['model']);
+    }
+
+    public function testRetrieveBatchReturnsNullWhileInProgress(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode(['processing_status' => 'in_progress'], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $this->assertNull($client->retrieveBatch('msgbatch_123'));
+    }
+
+    public function testRetrieveBatchReturnsContentKeyedByCustomId(): void {
+        $statusResponse = json_encode([
+            'processing_status' => 'ended',
+            'results_url' => 'https://api.anthropic.com/v1/messages/batches/msgbatch_123/results',
+        ], JSON_THROW_ON_ERROR);
+
+        // JSONL results: multiple text blocks must be concatenated, non-text blocks skipped
+        $resultsJsonl = implode("\n", [
+            json_encode([
+                'custom_id' => 'first',
+                'result' => ['message' => ['content' => [
+                    ['type' => 'text', 'text' => 'Hello '],
+                    ['type' => 'text', 'text' => 'world'],
+                ]]],
+            ], JSON_THROW_ON_ERROR),
+            json_encode([
+                'custom_id' => 'second',
+                'result' => ['message' => ['content' => [
+                    ['type' => 'thinking', 'thinking' => 'hmm', 'signature' => 'sig'],
+                    ['type' => 'text', 'text' => 'Second answer'],
+                ]]],
+            ], JSON_THROW_ON_ERROR),
+        ]);
+
+        $client = $this->createClientWithResponses([
+            new Response(200, [], $statusResponse),
+            new Response(200, [], $resultsJsonl),
+        ]);
+
+        $results = $client->retrieveBatch('msgbatch_123');
+
+        $this->assertSame([
+            'first' => 'Hello world',
+            'second' => 'Second answer',
+        ], $results);
+    }
+
+    public function testRetrieveBatchThrowsOnUnexpectedStatus(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode(['processing_status' => 'canceling', 'status' => 'canceling'], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $this->expectException(\RuntimeException::class);
+        $this->expectExceptionMessage('Unexpected batch status');
+
+        $client->retrieveBatch('msgbatch_123');
+    }
+}
diff --git a/tests/Client/Anthropic/AnthropicDecoderPricingTest.php b/tests/Client/Anthropic/AnthropicDecoderPricingTest.php
new file mode 100644
index 0000000..a272a3e
--- /dev/null
+++ b/tests/Client/Anthropic/AnthropicDecoderPricingTest.php
@@ -0,0 +1,74 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Client\Anthropic;
+
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\Anthropic\AnthropicEncoder;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Sonnet;
+use Soukicz\Llm\Client\ModelResponse;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\LLMResponse;
+use Soukicz\Llm\Message\LLMMessage;
+
+class AnthropicDecoderPricingTest extends TestCase {
+    private AnthropicEncoder $encoder;
+
+    protected function setUp(): void {
+        $this->encoder = new AnthropicEncoder();
+    }
+
+    private function createRequest(): LLMRequest {
+        return new LLMRequest(
+            // Sonnet 4.5: input $3/M, output $15/M, cache write $3.75/M, cache read $0.30/M
+            model: new AnthropicClaude45Sonnet(AnthropicClaude45Sonnet::VERSION_20250929),
+            conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]),
+        );
+    }
+
+    private function decode(array $usage): LLMResponse {
+        $response = $this->encoder->decodeResponse($this->createRequest(), new ModelResponse([
+            'content' => [['type' => 'text', 'text' => 'Hi there']],
+            'usage' => $usage,
+            'stop_reason' => 'end_turn',
+        ], 500));
+
+        $this->assertInstanceOf(LLMResponse::class, $response);
+
+        return $response;
+    }
+
+    public function testPricingWithoutCache(): void {
+        $response = $this->decode([
+            'input_tokens' => 1_000_000,
+            'output_tokens' => 100_000,
+        ]);
+
+        $this->assertEqualsWithDelta(3.0, $response->getInputPriceUsd(), 1e-9);
+        $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9);
+        $this->assertSame(1_000_000, $response->getInputTokens());
+        $this->assertSame(100_000, $response->getOutputTokens());
+        $this->assertSame(StopReason::FINISHED, $response->getStopReason());
+    }
+
+    /**
+     * Cache writes and cache reads are both input-side costs. Cache reads used to be
+     * misattributed to the output price bucket - this pins the corrected behavior.
+     */
+    public function testCacheTokensAreChargedToInputBucket(): void {
+        $response = $this->decode([
+            'input_tokens' => 1_000_000,
+            'output_tokens' => 100_000,
+            'cache_creation_input_tokens' => 1_000_000,
+            'cache_read_input_tokens' => 1_000_000,
+        ]);
+
+        // 1M uncached input ($3.00) + 1M cache write ($3.75) + 1M cache read ($0.30)
+        $this->assertEqualsWithDelta(7.05, $response->getInputPriceUsd(), 1e-9);
+        // Output stays pure output: 100k * $15/M
+        $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9);
+    }
+}
diff --git a/tests/Client/Gemini/GeminiEncoderMediaTest.php b/tests/Client/Gemini/GeminiEncoderMediaTest.php
index 910dd8d..0d01cd4 100644
--- a/tests/Client/Gemini/GeminiEncoderMediaTest.php
+++ b/tests/Client/Gemini/GeminiEncoderMediaTest.php
@@ -91,8 +91,7 @@ public function testMixedMediaRequest(): void {
         $this->assertEquals('What breed is it?', $encoded['contents'][0]['parts'][2]['text']);
     }
 
-    public function testPdfRequestShouldThrowException(): void {
-        // PDF is not supported by Gemini directly
+    public function testPdfRequest(): void {
         $conversation = new LLMConversation([
             LLMMessage::createFromUser(new LLMMessageContents([
                 new LLMMessageText('Analyze this PDF:'),
@@ -105,9 +104,11 @@ public function testPdfRequestShouldThrowException(): void {
             conversation: $conversation
         );
 
-        $this->expectException(InvalidArgumentException::class);
-        $this->expectExceptionMessage('PDF content type not supported for Gemini');
+        $encoded = $this->encoder->encodeRequest($request);
 
-        $this->encoder->encodeRequest($request);
+        $this->assertEquals('Analyze this PDF:', $encoded['contents'][0]['parts'][0]['text']);
+        $this->assertArrayHasKey('inline_data', $encoded['contents'][0]['parts'][1]);
+        $this->assertEquals('application/pdf', $encoded['contents'][0]['parts'][1]['inline_data']['mime_type']);
+        $this->assertEquals('base64encodedpdf', $encoded['contents'][0]['parts'][1]['inline_data']['data']);
     }
 }
diff --git a/tests/Client/Gemini/GeminiEncoderTextTest.php b/tests/Client/Gemini/GeminiEncoderTextTest.php
index bcfea80..607562b 100644
--- a/tests/Client/Gemini/GeminiEncoderTextTest.php
+++ b/tests/Client/Gemini/GeminiEncoderTextTest.php
@@ -7,6 +7,8 @@
 use PHPUnit\Framework\TestCase;
 use Soukicz\Llm\Client\Gemini\GeminiEncoder;
 use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash;
+use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite;
+use Soukicz\Llm\Client\Gemini\Model\Gemini3ProPreview;
 use Soukicz\Llm\Config\ReasoningEffort;
 use Soukicz\Llm\LLMConversation;
 use Soukicz\Llm\LLMRequest;
@@ -110,7 +112,7 @@ public function testReasoningEffortHigh(): void {
         ]);
 
         $request = new LLMRequest(
-            model: new Gemini20Flash(),
+            model: new Gemini3ProPreview(),
             conversation: $conversation,
             reasoningConfig: ReasoningEffort::HIGH,
         );
@@ -127,7 +129,7 @@ public function testReasoningEffortLow(): void {
         ]);
 
         $request = new LLMRequest(
-            model: new Gemini20Flash(),
+            model: new Gemini3ProPreview(),
             conversation: $conversation,
             reasoningConfig: ReasoningEffort::LOW,
         );
@@ -143,7 +145,7 @@ public function testReasoningEffortMedium(): void {
         ]);
 
         $request = new LLMRequest(
-            model: new Gemini20Flash(),
+            model: new Gemini3ProPreview(),
             conversation: $conversation,
             reasoningConfig: ReasoningEffort::MEDIUM,
         );
@@ -159,7 +161,7 @@ public function testReasoningEffortMinimal(): void {
         ]);
 
         $request = new LLMRequest(
-            model: new Gemini20Flash(),
+            model: new Gemini3ProPreview(),
             conversation: $conversation,
             reasoningConfig: ReasoningEffort::MINIMAL,
         );
@@ -175,7 +177,7 @@ public function testReasoningEffortExtraHigh(): void {
         ]);
 
         $request = new LLMRequest(
-            model: new Gemini20Flash(),
+            model: new Gemini3ProPreview(),
             conversation: $conversation,
             reasoningConfig: ReasoningEffort::EXTRA_HIGH,
         );
@@ -201,4 +203,32 @@ public function testReasoningEffortNone(): void {
         $this->assertArrayHasKey('thinkingConfig', $encoded['generationConfig']);
         $this->assertEquals(0, $encoded['generationConfig']['thinkingConfig']['thinkingBudget']);
     }
+
+    /**
+     * Gemini 2.x models reject thinkingLevel ("Thinking level is not supported for this
+     * model") - reasoning effort must be translated to a token budget instead
+     */
+    public function testReasoningEffortUsesThinkingBudgetOnGemini2x(): void {
+        $expectedBudgets = [
+            ReasoningEffort::MINIMAL->value => 512,
+            ReasoningEffort::LOW->value => 1024,
+            ReasoningEffort::MEDIUM->value => 8192,
+            ReasoningEffort::HIGH->value => 24576,
+            ReasoningEffort::EXTRA_HIGH->value => 24576,
+        ];
+
+        foreach ($expectedBudgets as $effort => $expectedBudget) {
+            $request = new LLMRequest(
+                model: new Gemini25FlashLite(),
+                conversation: new LLMConversation([LLMMessage::createFromUserString('Question')]),
+                reasoningConfig: ReasoningEffort::from($effort),
+            );
+
+            $encoded = $this->encoder->encodeRequest($request);
+
+            $thinkingConfig = $encoded['generationConfig']['thinkingConfig'];
+            $this->assertArrayNotHasKey('thinkingLevel', $thinkingConfig, "Effort $effort must not produce thinkingLevel on a 2.x model");
+            $this->assertSame($expectedBudget, $thinkingConfig['thinkingBudget'], "Wrong budget for effort $effort");
+        }
+    }
 }
diff --git a/tests/Client/Gemini/GeminiEncoderToolsTest.php b/tests/Client/Gemini/GeminiEncoderToolsTest.php
index 07639e7..78ff58f 100644
--- a/tests/Client/Gemini/GeminiEncoderToolsTest.php
+++ b/tests/Client/Gemini/GeminiEncoderToolsTest.php
@@ -133,9 +133,42 @@ public function testFunctionResultMessage(): void {
 
         // Check function result structure
         $functionResult = $encoded['contents'][2];
-        $this->assertEquals('function', $functionResult['role']);
+        $this->assertEquals('user', $functionResult['role']);
         $this->assertCount(1, $functionResult['parts']);
         $this->assertArrayHasKey('function_response', $functionResult['parts'][0]);
+        // Gemini correlates function responses by name, which must match the original function call
+        $this->assertEquals('get_weather', $functionResult['parts'][0]['function_response']['name']);
+        $this->assertEquals(
+            ['temperature' => 22, 'condition' => 'sunny'],
+            $functionResult['parts'][0]['function_response']['response']
+        );
+    }
+
+    public function testMultipleToolsInSingleDeclaration(): void {
+        $conversation = new LLMConversation([
+            LLMMessage::createFromUserString('What is the weather like in Prague?'),
+        ]);
+
+        $makeTool = fn(string $name) => new CallbackToolDefinition(
+            $name,
+            'Description of ' . $name,
+            ['type' => 'object', 'properties' => [], 'required' => []],
+            fn(array $input) => []
+        );
+
+        $request = new LLMRequest(
+            model: new Gemini20Flash(),
+            conversation: $conversation,
+            tools: [$makeTool('tool_one'), $makeTool('tool_two')]
+        );
+
+        $encoded = $this->encoder->encodeRequest($request);
+
+        // Gemini requires all function declarations in a single tools entry
+        $this->assertCount(1, $encoded['tools']);
+        $this->assertCount(2, $encoded['tools'][0]['functionDeclarations']);
+        $this->assertEquals('tool_one', $encoded['tools'][0]['functionDeclarations'][0]['name']);
+        $this->assertEquals('tool_two', $encoded['tools'][0]['functionDeclarations'][1]['name']);
     }
 
     public function testCompleteFunctionFlow(): void {
diff --git a/tests/Client/Gemini/GeminiStreamingTest.php b/tests/Client/Gemini/GeminiStreamingTest.php
index 83f7700..be51e3b 100644
--- a/tests/Client/Gemini/GeminiStreamingTest.php
+++ b/tests/Client/Gemini/GeminiStreamingTest.php
@@ -78,8 +78,8 @@ public function testStreamingTextResponse(): void {
 
         $response = $client->sendRequestAsync($request)->wait();
 
-        // Gemini produces separate text parts per chunk, getLastText() returns the last one
-        $this->assertEquals(' world!', $response->getLastText());
+        // Streamed text chunks are merged so getLastText() returns the complete text
+        $this->assertEquals('Hello world!', $response->getLastText());
         $this->assertEquals(StopReason::FINISHED, $response->getStopReason());
         $this->assertEquals(10, $response->getInputTokens());
         $this->assertEquals(5, $response->getOutputTokens());
diff --git a/tests/Client/LLMAgentClientTest.php b/tests/Client/LLMAgentClientTest.php
index 92a931e..403a5f6 100644
--- a/tests/Client/LLMAgentClientTest.php
+++ b/tests/Client/LLMAgentClientTest.php
@@ -211,6 +211,157 @@ function (array $input): PromiseInterface {
     }
 
 
+    /**
+     * Test that a request for an unknown tool produces an error tool result instead of an empty message
+     */
+    public function testUnknownToolReturnsErrorResult(): void {
+        $calculatorTool = new CallbackToolDefinition(
+            'calculator',
+            'Basic calculator for math operations',
+            [
+                'type' => 'object',
+                'properties' => [
+                    'expression' => [
+                        'type' => 'string',
+                        'description' => 'Math expression to evaluate',
+                    ],
+                ],
+                'required' => ['expression'],
+            ],
+            function (array $input): PromiseInterface {
+                return Create::promiseFor(LLMMessageContents::fromArrayData(['result' => 4]));
+            }
+        );
+
+        $conversation = new LLMConversation([
+            LLMMessage::createFromUserString('What is 2+2?'),
+        ]);
+
+        $request = new LLMRequest(
+            model: new GPT41(GPT41::VERSION_2025_04_14),
+            conversation: $conversation,
+            tools: [$calculatorTool]
+        );
+
+        // Model hallucinates a tool that is not registered
+        $response1 = $this->createToolUseResponse($request, 'tool-123', 'nonexistent_tool', ['foo' => 'bar']);
+
+        $request2 = $response1->getRequest()->withMessage(
+            LLMMessage::createFromUser(new LLMMessageContents([
+                new LLMMessageToolResult('tool-123', LLMMessageContents::fromErrorString('ERROR: Tool "nonexistent_tool" is not available')),
+            ]))
+        );
+        $response2 = $this->createFinalResponse($request2, 'I could not use that tool.');
+
+        $sentRequests = [];
+        $responseQueue = [$response1, $response2];
+        $mockClient = $this->createMock(LLMClient::class);
+        $mockClient->method('sendRequestAsync')
+            ->willReturnCallback(function (LLMRequest $sentRequest) use (&$responseQueue, &$sentRequests) {
+                $sentRequests[] = $sentRequest;
+
+                return Create::promiseFor(array_shift($responseQueue));
+            });
+
+        $agentClient = new LLMAgentClient();
+        $finalResponse = $agentClient->run($mockClient, $request);
+
+        $this->assertEquals(StopReason::FINISHED, $finalResponse->getStopReason());
+
+        // The follow-up request built by the agent must contain an error tool result for the unknown tool
+        $this->assertCount(2, $sentRequests);
+        $toolResultMessage = $sentRequests[1]->getLastMessage();
+        $contents = $toolResultMessage->getContents();
+        $this->assertCount(1, $contents);
+        $this->assertInstanceOf(LLMMessageToolResult::class, $contents[0]);
+        $this->assertEquals('tool-123', $contents[0]->getId());
+        $this->assertTrue($contents[0]->getContent()->isError());
+    }
+
+    /**
+     * Test that tool input failing schema validation produces an error result
+     * without executing the tool handler
+     */
+    public function testSchemaValidationFailureSkipsToolExecution(): void {
+        $handlerCalled = false;
+        $calculatorTool = new CallbackToolDefinition(
+            'calculator',
+            'Basic calculator for math operations',
+            [
+                'type' => 'object',
+                'properties' => [
+                    'expression' => ['type' => 'string'],
+                ],
+                'required' => ['expression'],
+            ],
+            function (array $input) use (&$handlerCalled): PromiseInterface {
+                $handlerCalled = true;
+
+                return Create::promiseFor(LLMMessageContents::fromArrayData(['result' => 4]));
+            }
+        );
+
+        $request = new LLMRequest(
+            model: new GPT41(GPT41::VERSION_2025_04_14),
+            conversation: new LLMConversation([LLMMessage::createFromUserString('What is 2+2?')]),
+            tools: [$calculatorTool]
+        );
+
+        // Model sends an integer where the schema requires a string
+        $response1 = $this->createToolUseResponse($request, 'tool-123', 'calculator', ['expression' => 42]);
+        $request2 = $response1->getRequest()->withMessage(
+            LLMMessage::createFromUser(new LLMMessageContents([
+                new LLMMessageToolResult('tool-123', LLMMessageContents::fromErrorString('ERROR: schema mismatch')),
+            ]))
+        );
+        $response2 = $this->createFinalResponse($request2, 'Sorry, I sent invalid input.');
+
+        $sentRequests = [];
+        $responseQueue = [$response1, $response2];
+        $mockClient = $this->createMock(LLMClient::class);
+        $mockClient->method('sendRequestAsync')
+            ->willReturnCallback(function (LLMRequest $sentRequest) use (&$responseQueue, &$sentRequests) {
+                $sentRequests[] = $sentRequest;
+
+                return Create::promiseFor(array_shift($responseQueue));
+            });
+
+        $agentClient = new LLMAgentClient();
+        $agentClient->run($mockClient, $request);
+
+        $this->assertFalse($handlerCalled, 'Tool handler must not run on schema validation failure');
+
+        $toolResultMessage = $sentRequests[1]->getLastMessage();
+        $contents = $toolResultMessage->getContents();
+        $this->assertCount(1, $contents);
+        $this->assertInstanceOf(LLMMessageToolResult::class, $contents[0]);
+        $this->assertTrue($contents[0]->getContent()->isError());
+        $this->assertStringContainsString(
+            'not matching expected schema',
+            $contents[0]->getContent()->getMessages()[0]->getText()
+        );
+    }
+
+    /**
+     * Test that a feedback callback returning anything but LLMMessage or null is rejected
+     */
+    public function testFeedbackCallbackMustReturnLLMMessageOrNull(): void {
+        $request = new LLMRequest(
+            model: new GPT41(GPT41::VERSION_2025_04_14),
+            conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]),
+        );
+
+        $response = $this->createFinalResponse($request, 'Hi there');
+        $mockClient = $this->createMockLLMClient([$response]);
+
+        $agentClient = new LLMAgentClient();
+
+        $this->expectException(\InvalidArgumentException::class);
+        $this->expectExceptionMessage('Feedback callback must return an instance of LLMMessage');
+
+        $agentClient->run($mockClient, $request, fn(LLMResponse $r) => 'try again');
+    }
+
     /**
      * Create a mock LLM client that returns predefined responses
      *
diff --git a/tests/Client/OpenAI/OpenAIBatchTest.php b/tests/Client/OpenAI/OpenAIBatchTest.php
new file mode 100644
index 0000000..65c164e
--- /dev/null
+++ b/tests/Client/OpenAI/OpenAIBatchTest.php
@@ -0,0 +1,159 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Client\OpenAI;
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Handler\MockHandler;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Middleware;
+use GuzzleHttp\Psr7\Response;
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\OpenAI\AbstractOpenAIClient;
+use Soukicz\Llm\Client\OpenAI\Model\GPT4oMini;
+use Soukicz\Llm\Client\OpenAI\OpenAIClient;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+
+class OpenAIBatchTest extends TestCase {
+    /** @var array<int, array{request: \Psr\Http\Message\RequestInterface}> */
+    private array $sentRequests = [];
+
+    private function createClientWithResponses(array $responses): OpenAIClient {
+        $mock = new MockHandler($responses);
+        $stack = HandlerStack::create($mock);
+        $this->sentRequests = [];
+        $stack->push(Middleware::history($this->sentRequests));
+
+        $client = new OpenAIClient('test-api-key', null);
+
+        $reflection = new \ReflectionProperty(AbstractOpenAIClient::class, 'httpClient');
+        $reflection->setValue($client, new Client(['handler' => $stack]));
+
+        return $client;
+    }
+
+    private function createRequest(string $prompt): LLMRequest {
+        return new LLMRequest(
+            model: new GPT4oMini(GPT4oMini::VERSION_2024_07_18),
+            conversation: new LLMConversation([LLMMessage::createFromUserString($prompt)]),
+        );
+    }
+
+    public function testCreateBatchUploadsJsonlAndCreatesBatch(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode(['id' => 'file-abc'], JSON_THROW_ON_ERROR)),
+            new Response(200, [], json_encode(['id' => 'batch-xyz'], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $batchId = $client->createBatch([
+            'first' => $this->createRequest('Hello'),
+            'second' => $this->createRequest('World'),
+        ]);
+
+        $this->assertSame('batch-xyz', $batchId);
+        $this->assertCount(2, $this->sentRequests);
+
+        // First request uploads the JSONL file
+        $fileUpload = $this->sentRequests[0]['request'];
+        $this->assertStringEndsWith('/files', (string) $fileUpload->getUri());
+        $uploadBody = (string) $fileUpload->getBody();
+        $this->assertStringContainsString('"custom_id":"first"', $uploadBody);
+        $this->assertStringContainsString('"custom_id":"second"', $uploadBody);
+        $this->assertStringContainsString('"url":"\/v1\/chat\/completions"', $uploadBody);
+
+        // Second request creates the batch from the uploaded file
+        $batchCreate = $this->sentRequests[1]['request'];
+        $this->assertStringEndsWith('/batches', (string) $batchCreate->getUri());
+        $batchPayload = json_decode((string) $batchCreate->getBody(), true, 512, JSON_THROW_ON_ERROR);
+        $this->assertSame('file-abc', $batchPayload['input_file_id']);
+        $this->assertSame('24h', $batchPayload['completion_window']);
+    }
+
+    public function testRetrieveBatchReturnsNullWhileNotCompleted(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode(['status' => 'in_progress'], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $this->assertNull($client->retrieveBatch('batch-xyz'));
+    }
+
+    /**
+     * Regression test: content used to be doubled ($content .= $content) instead of accumulated
+     */
+    public function testRetrieveBatchReturnsContentKeyedByCustomId(): void {
+        $statusResponse = json_encode([
+            'status' => 'completed',
+            'output_file_id' => 'file-out',
+            'error_file_id' => null,
+            'completed_at' => time(),
+        ], JSON_THROW_ON_ERROR);
+
+        $resultsJsonl = implode("\n", [
+            json_encode([
+                'custom_id' => 'first',
+                'response' => ['body' => ['choices' => [
+                    ['message' => ['content' => 'Hello world']],
+                ]]],
+            ], JSON_THROW_ON_ERROR),
+            json_encode([
+                'custom_id' => 'second',
+                'response' => ['body' => ['choices' => [
+                    // Content may also arrive as a list of typed parts
+                    ['message' => ['content' => [
+                        ['type' => 'text', 'text' => 'Second '],
+                        ['type' => 'text', 'text' => 'answer'],
+                    ]]],
+                ]]],
+            ], JSON_THROW_ON_ERROR),
+        ]);
+
+        $client = $this->createClientWithResponses([
+            new Response(200, [], $statusResponse),
+            new Response(200, [], $resultsJsonl),
+        ]);
+
+        $results = $client->retrieveBatch('batch-xyz');
+
+        $this->assertSame([
+            'first' => 'Hello world',
+            'second' => 'Second answer',
+        ], $results);
+    }
+
+    public function testRetrieveBatchThrowsOnRecentFailure(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode([
+                'status' => 'completed',
+                'output_file_id' => null,
+                'error_file_id' => 'file-err',
+                'completed_at' => time(),
+            ], JSON_THROW_ON_ERROR)),
+            new Response(200, [], '{"error": "something went wrong"}'),
+        ]);
+
+        $this->expectException(\RuntimeException::class);
+        $this->expectExceptionMessage('Batch failed');
+
+        $client->retrieveBatch('batch-xyz');
+    }
+
+    /**
+     * Documents current behavior: failures older than three days are swallowed and
+     * reported as an empty result set (OpenAI error files expire)
+     */
+    public function testRetrieveBatchReturnsEmptyArrayForExpiredFailure(): void {
+        $client = $this->createClientWithResponses([
+            new Response(200, [], json_encode([
+                'status' => 'completed',
+                'output_file_id' => null,
+                'error_file_id' => 'file-err',
+                'completed_at' => time() - 4 * 24 * 60 * 60,
+            ], JSON_THROW_ON_ERROR)),
+        ]);
+
+        $this->assertSame([], $client->retrieveBatch('batch-xyz'));
+    }
+}
diff --git a/tests/Client/OpenAI/OpenAIDecoderPricingTest.php b/tests/Client/OpenAI/OpenAIDecoderPricingTest.php
new file mode 100644
index 0000000..d9410ef
--- /dev/null
+++ b/tests/Client/OpenAI/OpenAIDecoderPricingTest.php
@@ -0,0 +1,76 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Client\OpenAI;
+
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\ModelResponse;
+use Soukicz\Llm\Client\OpenAI\Model\GPT54;
+use Soukicz\Llm\Client\OpenAI\OpenAIEncoder;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\LLMResponse;
+use Soukicz\Llm\Message\LLMMessage;
+
+class OpenAIDecoderPricingTest extends TestCase {
+    private OpenAIEncoder $encoder;
+
+    protected function setUp(): void {
+        $this->encoder = new OpenAIEncoder();
+    }
+
+    private function createRequest(): LLMRequest {
+        return new LLMRequest(
+            // GPT-5.4: input $2.50/M, output $15/M, cached input $0.25/M
+            model: new GPT54(GPT54::VERSION_2026_03_05),
+            conversation: new LLMConversation([LLMMessage::createFromUserString('Hello')]),
+        );
+    }
+
+    private function decode(array $usage): LLMResponse {
+        $response = $this->encoder->decodeResponse($this->createRequest(), new ModelResponse([
+            'choices' => [
+                [
+                    'message' => ['content' => 'Hi there'],
+                    'finish_reason' => 'stop',
+                ],
+            ],
+            'usage' => $usage,
+        ], 500));
+
+        $this->assertInstanceOf(LLMResponse::class, $response);
+
+        return $response;
+    }
+
+    public function testPricingWithoutCachedTokens(): void {
+        $response = $this->decode([
+            'prompt_tokens' => 1_000_000,
+            'completion_tokens' => 100_000,
+        ]);
+
+        $this->assertEqualsWithDelta(2.5, $response->getInputPriceUsd(), 1e-9);
+        $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9);
+        $this->assertSame(StopReason::FINISHED, $response->getStopReason());
+    }
+
+    /**
+     * OpenAI reports cached prompt tokens as a subset of prompt_tokens; they are billed
+     * at the cached input rate. The discount used to be ignored entirely.
+     */
+    public function testCachedPromptTokensGetDiscountedRate(): void {
+        $response = $this->decode([
+            'prompt_tokens' => 1_000_000,
+            'completion_tokens' => 100_000,
+            'prompt_tokens_details' => ['cached_tokens' => 600_000],
+        ]);
+
+        // 400k uncached * $2.50/M + 600k cached * $0.25/M = 1.00 + 0.15
+        $this->assertEqualsWithDelta(1.15, $response->getInputPriceUsd(), 1e-9);
+        $this->assertEqualsWithDelta(1.5, $response->getOutputPriceUsd(), 1e-9);
+        // Token counts still report the full prompt size
+        $this->assertSame(1_000_000, $response->getInputTokens());
+    }
+}
diff --git a/tests/Client/OpenAI/OpenAIEmbeddingsTest.php b/tests/Client/OpenAI/OpenAIEmbeddingsTest.php
new file mode 100644
index 0000000..e378192
--- /dev/null
+++ b/tests/Client/OpenAI/OpenAIEmbeddingsTest.php
@@ -0,0 +1,101 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Client\OpenAI;
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Handler\MockHandler;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Middleware;
+use GuzzleHttp\Psr7\Response;
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\OpenAI\AbstractOpenAIClient;
+use Soukicz\Llm\Client\OpenAI\OpenAIClient;
+
+class OpenAIEmbeddingsTest extends TestCase {
+    /** @var array<int, array{request: \Psr\Http\Message\RequestInterface}> */
+    private array $sentRequests = [];
+
+    private function createClientWithResponses(array $responses): OpenAIClient {
+        $mock = new MockHandler($responses);
+        $stack = HandlerStack::create($mock);
+        $this->sentRequests = [];
+        $stack->push(Middleware::history($this->sentRequests));
+
+        $client = new OpenAIClient('test-api-key', null);
+
+        $reflection = new \ReflectionProperty(AbstractOpenAIClient::class, 'httpClient');
+        $reflection->setValue($client, new Client(['handler' => $stack]));
+
+        return $client;
+    }
+
+    /**
+     * Build an embeddings API response for the given input count. Embeddings are returned
+     * deliberately out of order to verify the client maps them back via the index field.
+     */
+    private function embeddingsResponse(int $count, int $startValue): Response {
+        $data = [];
+        for ($i = $count - 1; $i >= 0; $i--) {
+            $data[] = [
+                'index' => $i,
+                'embedding' => [(float) ($startValue + $i)],
+            ];
+        }
+
+        return new Response(200, [], json_encode([
+            'data' => $data,
+            'usage' => ['total_tokens' => $count],
+        ], JSON_THROW_ON_ERROR));
+    }
+
+    /**
+     * Regression test for parallel batching: results must come back keyed and ordered
+     * by the original input position even with multiple chunks and out-of-order
+     * embeddings within each response
+     */
+    public function testResultsPreserveInputOrderAcrossChunks(): void {
+        // 250 inputs → 3 chunks (100 + 100 + 50)
+        $texts = [];
+        for ($i = 0; $i < 250; $i++) {
+            $texts[] = 'text ' . $i;
+        }
+
+        $client = $this->createClientWithResponses([
+            $this->embeddingsResponse(100, 0),
+            $this->embeddingsResponse(100, 100),
+            $this->embeddingsResponse(50, 200),
+        ]);
+
+        $results = $client->getBatchEmbeddings($texts);
+
+        $this->assertCount(250, $results);
+        $this->assertSame(range(0, 249), array_keys($results));
+        // Each embedding value encodes its global input position
+        foreach ($results as $position => $embedding) {
+            $this->assertEquals([$position], $embedding, "Embedding at position $position is misaligned");
+        }
+    }
+
+    public function testRequestPayloadAndChunking(): void {
+        $texts = array_fill(0, 150, 'hello');
+
+        $client = $this->createClientWithResponses([
+            $this->embeddingsResponse(100, 0),
+            $this->embeddingsResponse(50, 100),
+        ]);
+
+        $client->getBatchEmbeddings($texts, 'text-embedding-3-large', 1024);
+
+        $this->assertCount(2, $this->sentRequests);
+
+        $firstPayload = json_decode((string) $this->sentRequests[0]['request']->getBody(), true, 512, JSON_THROW_ON_ERROR);
+        $this->assertSame('text-embedding-3-large', $firstPayload['model']);
+        $this->assertSame(1024, $firstPayload['dimensions']);
+        $this->assertCount(100, $firstPayload['input']);
+
+        $secondPayload = json_decode((string) $this->sentRequests[1]['request']->getBody(), true, 512, JSON_THROW_ON_ERROR);
+        $this->assertCount(50, $secondPayload['input']);
+    }
+}
diff --git a/tests/Http/HttpClientFactoryTest.php b/tests/Http/HttpClientFactoryTest.php
new file mode 100644
index 0000000..c41556f
--- /dev/null
+++ b/tests/Http/HttpClientFactoryTest.php
@@ -0,0 +1,192 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Http;
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Exception\ClientException;
+use GuzzleHttp\Exception\ConnectException;
+use GuzzleHttp\Exception\ServerException;
+use GuzzleHttp\Handler\MockHandler;
+use GuzzleHttp\HandlerStack;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Cache\CacheInterface;
+use Soukicz\Llm\Http\HttpClientFactory;
+use Soukicz\Llm\Tests\Cache\InMemoryCache;
+
+class HttpClientFactoryTest extends TestCase {
+    private MockHandler $mockHandler;
+
+    /**
+     * Build a client with the full factory middleware stack (custom middleware, cache,
+     * retry) but with the network transport replaced by a MockHandler
+     */
+    private function createClient(?CacheInterface $cache = null, ?callable $customMiddleware = null): Client {
+        $this->mockHandler = new MockHandler();
+        $client = HttpClientFactory::createClient($customMiddleware, $cache);
+
+        /** @var HandlerStack $stack */
+        $stack = $client->getConfig('handler');
+        $stack->setHandler($this->mockHandler);
+
+        return $client;
+    }
+
+    public function testRetriesRetryableStatusCodesUntilSuccess(): void {
+        $client = $this->createClient();
+        $this->mockHandler->append(
+            new Response(429, ['Retry-After' => '0']),
+            new Response(503, ['Retry-After' => '0']),
+            new Response(200, [], 'ok'),
+        );
+
+        $response = $client->get('https://example.com/api');
+
+        $this->assertSame(200, $response->getStatusCode());
+        $this->assertSame('ok', (string) $response->getBody());
+        $this->assertSame(0, $this->mockHandler->count(), 'All queued responses should have been consumed');
+    }
+
+    public function testGivesUpAfterMaxRetries(): void {
+        $client = $this->createClient();
+        // MAX_RETRIES is 3, so the 4th consecutive error is returned to the caller
+        $this->mockHandler->append(
+            new Response(500, ['Retry-After' => '0']),
+            new Response(500, ['Retry-After' => '0']),
+            new Response(500, ['Retry-After' => '0']),
+            new Response(500, ['Retry-After' => '0']),
+        );
+
+        $this->expectException(ServerException::class);
+
+        try {
+            $client->get('https://example.com/api');
+        } finally {
+            $this->assertSame(0, $this->mockHandler->count(), 'Expected exactly 4 attempts (1 + 3 retries)');
+        }
+    }
+
+    public function testDoesNotRetryNonRetryableClientErrors(): void {
+        $client = $this->createClient();
+        $this->mockHandler->append(
+            new Response(404),
+            new Response(200),
+        );
+
+        $this->expectException(ClientException::class);
+
+        try {
+            $client->get('https://example.com/api');
+        } finally {
+            $this->assertSame(1, $this->mockHandler->count(), 'A 404 must not be retried');
+        }
+    }
+
+    public function testHonorsNumericRetryAfterHeader(): void {
+        $client = $this->createClient();
+        $this->mockHandler->append(
+            new Response(429, ['Retry-After' => '1']),
+            new Response(200),
+        );
+
+        $start = microtime(true);
+        $response = $client->get('https://example.com/api');
+        $elapsed = microtime(true) - $start;
+
+        $this->assertSame(200, $response->getStatusCode());
+        $this->assertGreaterThan(0.9, $elapsed, 'Retry should have waited for the Retry-After interval');
+    }
+
+    public function testHonorsHttpDateRetryAfterHeader(): void {
+        $client = $this->createClient();
+        $this->mockHandler->append(
+            new Response(429, ['Retry-After' => gmdate('D, d M Y H:i:s \G\M\T', time() + 1)]),
+            new Response(200),
+        );
+
+        $response = $client->get('https://example.com/api');
+
+        $this->assertSame(200, $response->getStatusCode());
+        $this->assertSame(0, $this->mockHandler->count());
+    }
+
+    /**
+     * Regression test: network-level failures (connection reset, DNS, timeout) used to
+     * propagate immediately without any retry
+     */
+    public function testRetriesConnectExceptions(): void {
+        $client = $this->createClient();
+        $request = new Request('GET', 'https://example.com/api');
+        $this->mockHandler->append(
+            new ConnectException('Connection refused', $request),
+            new Response(200, [], 'ok'),
+        );
+
+        $response = $client->get('https://example.com/api');
+
+        $this->assertSame(200, $response->getStatusCode());
+        $this->assertSame('ok', (string) $response->getBody());
+    }
+
+    public function testSuccessfulResponsesAreCachedAndReplayed(): void {
+        $cache = new InMemoryCache();
+        $client = $this->createClient($cache);
+        $this->mockHandler->append(new Response(200, [], 'fresh'));
+
+        $first = $client->get('https://example.com/api');
+        $this->assertSame('fresh', (string) $first->getBody());
+        $this->assertSame(1, $cache->count());
+
+        // Second identical request must be served from cache - the mock queue is empty,
+        // so hitting the transport again would throw
+        $second = $client->get('https://example.com/api');
+        $this->assertSame('fresh', (string) $second->getBody());
+        $this->assertSame(0, $this->mockHandler->count());
+    }
+
+    public function testErrorResponsesAreNotCached(): void {
+        $cache = new InMemoryCache();
+        $client = $this->createClient($cache);
+        $this->mockHandler->append(new Response(404));
+
+        try {
+            $client->get('https://example.com/api');
+            $this->fail('Expected ClientException');
+        } catch (ClientException) {
+        }
+
+        $this->assertSame(0, $cache->count(), 'Non-2xx responses must not be cached');
+    }
+
+    public function testRequestDurationHeaderIsAddedWhenCacheIsActive(): void {
+        $cache = new InMemoryCache();
+        $client = $this->createClient($cache);
+        $this->mockHandler->append(new Response(200, [], 'ok'));
+
+        $response = $client->get('https://example.com/api');
+
+        $this->assertTrue($response->hasHeader('X-Request-Duration-ms'));
+        $this->assertIsNumeric($response->getHeaderLine('X-Request-Duration-ms'));
+    }
+
+    public function testCustomMiddlewareSeesRequestsAndResponses(): void {
+        $seen = [];
+        $middleware = function (callable $handler) use (&$seen): callable {
+            return function ($request, array $options) use ($handler, &$seen) {
+                $seen[] = $request->getMethod() . ' ' . $request->getUri();
+
+                return $handler($request, $options);
+            };
+        };
+
+        $client = $this->createClient(null, $middleware);
+        $this->mockHandler->append(new Response(200));
+
+        $client->get('https://example.com/api');
+
+        $this->assertSame(['GET https://example.com/api'], $seen);
+    }
+}
diff --git a/tests/Integration/IntegrationTestBase.php b/tests/Integration/IntegrationTestBase.php
index a3a2874..2b505d2 100644
--- a/tests/Integration/IntegrationTestBase.php
+++ b/tests/Integration/IntegrationTestBase.php
@@ -8,9 +8,9 @@
 use Soukicz\Llm\Cache\CacheInterface;
 use Soukicz\Llm\Cache\FileCache;
 use Soukicz\Llm\Client\Anthropic\AnthropicClient;
-use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude35Haiku;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku;
 use Soukicz\Llm\Client\Gemini\GeminiClient;
-use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash;
+use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite;
 use Soukicz\Llm\Client\LLMClient;
 use Soukicz\Llm\Client\ModelInterface;
 use Soukicz\Llm\Client\OpenAI\Model\GPT4oMini;
@@ -134,8 +134,8 @@ protected function getAllClients(): array {
         if (!empty($_ENV['ANTHROPIC_API_KEY'])) {
             $clients[] = [
                 'client' => new AnthropicClient($_ENV['ANTHROPIC_API_KEY'], $this->cache),
-                'model' => new AnthropicClaude35Haiku(AnthropicClaude35Haiku::VERSION_20241022),
-                'name' => 'Anthropic Claude 3.5 Haiku',
+                'model' => new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001),
+                'name' => 'Anthropic Claude 4.5 Haiku',
             ];
         }
 
@@ -150,15 +150,15 @@ protected function getAllClients(): array {
         if (!empty($_ENV['GEMINI_API_KEY'])) {
             $clients[] = [
                 'client' => new GeminiClient($_ENV['GEMINI_API_KEY'], $this->cache),
-                'model' => new Gemini20Flash(),
-                'name' => 'Google Gemini 2.0 Flash',
+                'model' => new Gemini25FlashLite(),
+                'name' => 'Google Gemini 2.5 Flash Lite',
             ];
         }
 
         if (!empty($_ENV['OPENROUTER_API_KEY'])) {
             $clients[] = [
                 'client' => new OpenAICompatibleClient($_ENV['OPENROUTER_API_KEY'], 'https://openrouter.ai/api/v1', $this->cache),
-                'model' => new LocalModel('openrouter/horizon-beta'),
+                'model' => new LocalModel('openai/gpt-4o-mini'),
                 'name' => 'OpenRouter',
             ];
         }
diff --git a/tests/Integration/PdfIntegrationTest.php b/tests/Integration/PdfIntegrationTest.php
new file mode 100644
index 0000000..7c70326
--- /dev/null
+++ b/tests/Integration/PdfIntegrationTest.php
@@ -0,0 +1,67 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Integration;
+
+use Soukicz\Llm\Client\LLMAgentClient;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Message\LLMMessageContents;
+use Soukicz\Llm\Message\LLMMessagePdf;
+use Soukicz\Llm\Message\LLMMessageText;
+
+/**
+ * Verifies PDF input end to end against the live APIs. The fixture contains the
+ * distinctive word "PINEAPPLE" which the model must extract.
+ *
+ * @group integration
+ */
+class PdfIntegrationTest extends IntegrationTestBase {
+    public static function clientProvider(): array {
+        $instance = new self('clientProvider');
+        $instance::loadEnvironmentStatic();
+
+        $clients = [];
+        foreach ($instance->getAllClients() as $clientData) {
+            // PDF input is only supported by the three native providers; the
+            // OpenAI-compatible endpoints (OpenRouter, Scaleway) vary by backing model
+            if (!in_array($clientData['name'], ['OpenRouter', 'Scaleway Mistral Small'], true)) {
+                $clients[$clientData['name']] = [$clientData['client'], $clientData['model'], $clientData['name']];
+            }
+        }
+
+        return $clients;
+    }
+
+    /**
+     * @dataProvider clientProvider
+     */
+    public function testPdfDocumentUnderstanding($client, $model, $name): void {
+        $pdfData = base64_encode(file_get_contents(__DIR__ . '/fixtures/test-document.pdf'));
+
+        $request = new LLMRequest(
+            model: $model,
+            conversation: new LLMConversation([
+                LLMMessage::createFromUser(new LLMMessageContents([
+                    new LLMMessageText('What is the secret word in this document? Reply with just the word.'),
+                    new LLMMessagePdf('base64', $pdfData),
+                ])),
+            ]),
+            maxTokens: 1000,
+        );
+
+        $response = (new LLMAgentClient())->run($client, $request);
+
+        $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0));
+
+        $this->assertEquals(StopReason::FINISHED, $response->getStopReason(), "$name did not finish cleanly");
+        $this->assertContainsIgnoreCase('PINEAPPLE', $response->getLastText(), "$name failed to read the PDF content");
+
+        if ($this->verbose) {
+            echo "\n[$name] PDF response: " . $response->getLastText();
+        }
+    }
+}
diff --git a/tests/Integration/ReasoningIntegrationTest.php b/tests/Integration/ReasoningIntegrationTest.php
new file mode 100644
index 0000000..2d092f3
--- /dev/null
+++ b/tests/Integration/ReasoningIntegrationTest.php
@@ -0,0 +1,126 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Integration;
+
+use Soukicz\Llm\Client\Anthropic\AnthropicClient;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude46Sonnet;
+use Soukicz\Llm\Client\Gemini\GeminiClient;
+use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite;
+use Soukicz\Llm\Client\LLMAgentClient;
+use Soukicz\Llm\Client\LLMClient;
+use Soukicz\Llm\Client\OpenAI\Model\GPT54Nano;
+use Soukicz\Llm\Client\OpenAI\OpenAIClient;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\Config\ReasoningBudget;
+use Soukicz\Llm\Config\ReasoningEffort;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\LLMResponse;
+use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Message\LLMMessageReasoning;
+
+/**
+ * Verifies the reasoning configuration against the live APIs: ReasoningEffort on all
+ * three providers and the Anthropic-only ReasoningBudget (extended thinking).
+ *
+ * Provider constraints exercised here:
+ *  - Anthropic requires temperature=1 when thinking is enabled
+ *  - Anthropic requires maxTokens greater than the thinking budget
+ *
+ * @group integration
+ */
+class ReasoningIntegrationTest extends IntegrationTestBase {
+    protected function getRequiredEnvironmentVariables(): array {
+        // Per-test skipping is handled in requireKey()
+        return [];
+    }
+
+    private function requireKey(string $envVar): string {
+        self::loadEnvironmentStatic();
+        if (empty($_ENV[$envVar])) {
+            $this->markTestSkipped("$envVar is not configured");
+        }
+
+        return $_ENV[$envVar];
+    }
+
+    private function runReasoningRequest(LLMClient $client, LLMRequest $request): LLMResponse {
+        $response = (new LLMAgentClient())->run($client, $request);
+        $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0));
+
+        $this->assertEquals(StopReason::FINISHED, $response->getStopReason());
+        $this->assertStringContainsString('39', $response->getLastText(), 'Expected the correct arithmetic result');
+
+        return $response;
+    }
+
+    private function createConversation(): LLMConversation {
+        // Deliberately simple: this test verifies the reasoning configuration is accepted
+        // by the API, not the model's problem-solving ability
+        return new LLMConversation([
+            LLMMessage::createFromUserString('What is 17 + 24 - 2? Reply with just the number.'),
+        ]);
+    }
+
+    public function testAnthropicReasoningBudgetReturnsThinkingBlocks(): void {
+        $client = new AnthropicClient($this->requireKey('ANTHROPIC_API_KEY'), $this->cache);
+
+        $response = $this->runReasoningRequest($client, new LLMRequest(
+            model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001),
+            conversation: $this->createConversation(),
+            temperature: 1.0,
+            maxTokens: 6000,
+            reasoningConfig: new ReasoningBudget(2048),
+        ));
+
+        // Extended thinking must surface as reasoning content in the conversation
+        $reasoningFound = false;
+        foreach ($response->getConversation()->getMessages() as $message) {
+            foreach ($message->getContents() as $content) {
+                if ($content instanceof LLMMessageReasoning) {
+                    $reasoningFound = true;
+                    $this->assertNotSame('', $content->getText());
+                }
+            }
+        }
+        $this->assertTrue($reasoningFound, 'Expected at least one reasoning block in the conversation');
+    }
+
+    public function testAnthropicReasoningEffort(): void {
+        $client = new AnthropicClient($this->requireKey('ANTHROPIC_API_KEY'), $this->cache);
+
+        $this->runReasoningRequest($client, new LLMRequest(
+            model: new AnthropicClaude46Sonnet(),
+            conversation: $this->createConversation(),
+            temperature: 1.0,
+            maxTokens: 6000,
+            reasoningConfig: ReasoningEffort::LOW,
+        ));
+    }
+
+    public function testOpenAIReasoningEffort(): void {
+        $client = new OpenAIClient($this->requireKey('OPENAI_API_KEY'), null, $this->cache);
+
+        $this->runReasoningRequest($client, new LLMRequest(
+            model: new GPT54Nano(GPT54Nano::VERSION_2026_03_17),
+            conversation: $this->createConversation(),
+            temperature: 1.0,
+            maxTokens: 6000,
+            reasoningConfig: ReasoningEffort::LOW,
+        ));
+    }
+
+    public function testGeminiReasoningEffort(): void {
+        $client = new GeminiClient($this->requireKey('GEMINI_API_KEY'), $this->cache);
+
+        $this->runReasoningRequest($client, new LLMRequest(
+            model: new Gemini25FlashLite(),
+            conversation: $this->createConversation(),
+            maxTokens: 6000,
+            reasoningConfig: ReasoningEffort::LOW,
+        ));
+    }
+}
diff --git a/tests/Integration/StreamingIntegrationTest.php b/tests/Integration/StreamingIntegrationTest.php
new file mode 100644
index 0000000..ac0045c
--- /dev/null
+++ b/tests/Integration/StreamingIntegrationTest.php
@@ -0,0 +1,76 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests\Integration;
+
+use Soukicz\Llm\Client\LLMAgentClient;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Stream\CallableStreamListener;
+use Soukicz\Llm\Stream\StreamEvent;
+use Soukicz\Llm\Stream\StreamEventType;
+
+/**
+ * Verifies that live provider SSE streams are parsed correctly: text deltas must
+ * accumulate to exactly the final response text and the stream must terminate with
+ * MESSAGE_COMPLETE. This is the only place where real (current) provider stream
+ * formats are exercised - the unit tests only cover recorded formats.
+ *
+ * @group integration
+ */
+class StreamingIntegrationTest extends IntegrationTestBase {
+    public static function clientProvider(): array {
+        $instance = new self('clientProvider');
+        $instance::loadEnvironmentStatic();
+
+        $clients = [];
+        foreach ($instance->getAllClients() as $clientData) {
+            $clients[$clientData['name']] = [$clientData['client'], $clientData['model'], $clientData['name']];
+        }
+
+        return $clients;
+    }
+
+    /**
+     * @dataProvider clientProvider
+     */
+    public function testTextStreamingMatchesFinalText($client, $model, $name): void {
+        $streamedText = '';
+        $eventTypes = [];
+        $listener = new CallableStreamListener(function (StreamEvent $event) use (&$streamedText, &$eventTypes): void {
+            $eventTypes[] = $event->type;
+            if ($event->type === StreamEventType::TEXT_DELTA) {
+                $streamedText .= $event->delta;
+            }
+        });
+
+        $request = new LLMRequest(
+            model: $model,
+            conversation: new LLMConversation([
+                LLMMessage::createFromUserString('Reply with one short sentence about the sun.'),
+            ]),
+            maxTokens: 1000,
+            streamListener: $listener,
+        );
+
+        $response = (new LLMAgentClient())->run($client, $request);
+
+        $this->trackCost(($response->getInputPriceUsd() ?? 0) + ($response->getOutputPriceUsd() ?? 0));
+
+        $this->assertEquals(StopReason::FINISHED, $response->getStopReason(), "$name did not finish cleanly");
+        $this->assertNotSame('', $streamedText, "$name emitted no TEXT_DELTA events");
+        $this->assertSame(
+            $response->getLastText(),
+            $streamedText,
+            "$name: accumulated stream deltas differ from the final response text"
+        );
+        $this->assertContains(StreamEventType::MESSAGE_COMPLETE, $eventTypes, "$name never emitted MESSAGE_COMPLETE");
+
+        if ($this->verbose) {
+            echo "\n[$name] Streamed: $streamedText";
+        }
+    }
+}
diff --git a/tests/Integration/StructuredOutputIntegrationTest.php b/tests/Integration/StructuredOutputIntegrationTest.php
index f3c248f..7370a1b 100644
--- a/tests/Integration/StructuredOutputIntegrationTest.php
+++ b/tests/Integration/StructuredOutputIntegrationTest.php
@@ -7,7 +7,7 @@
 use Soukicz\Llm\Client\Anthropic\AnthropicClient;
 use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku;
 use Soukicz\Llm\Client\Gemini\GeminiClient;
-use Soukicz\Llm\Client\Gemini\Model\Gemini20Flash;
+use Soukicz\Llm\Client\Gemini\Model\Gemini25FlashLite;
 use Soukicz\Llm\Client\LLMAgentClient;
 use Soukicz\Llm\Client\LLMClient;
 use Soukicz\Llm\Client\ModelInterface;
@@ -177,8 +177,8 @@ protected function getStructuredOutputClients(): array {
         if (!empty($_ENV['GEMINI_API_KEY'])) {
             $clients[] = [
                 'client' => new GeminiClient($_ENV['GEMINI_API_KEY'], $this->cache),
-                'model' => new Gemini20Flash(),
-                'name' => 'Google Gemini 2.0 Flash',
+                'model' => new Gemini25FlashLite(),
+                'name' => 'Google Gemini 2.5 Flash Lite',
             ];
         }
 
diff --git a/tests/Integration/UniversalLLMIntegrationTest.php b/tests/Integration/UniversalLLMIntegrationTest.php
index 3815c7a..77b4142 100644
--- a/tests/Integration/UniversalLLMIntegrationTest.php
+++ b/tests/Integration/UniversalLLMIntegrationTest.php
@@ -377,7 +377,7 @@ public function testSystemPrompt($client, $model, $name): void {
     public function testStopSequence($client, $model, $name): void {
         $conversation = new LLMConversation([
             LLMMessage::createFromUserString(
-                'Count from 1 to 10 with "STOP" after 5. Like this: 1 2 3 4 5 STOP'
+                'Output the numbers 1 to 10 separated by spaces, with no other text. Like this: 1 2 3 ...'
             ),
         ]);
 
@@ -386,7 +386,7 @@ public function testStopSequence($client, $model, $name): void {
             conversation: $conversation,
             temperature: 0.1,
             maxTokens: 200,
-            stopSequences: ['STOP']
+            stopSequences: ['8']
         );
 
         $response = $this->agentClient->run($client, $request);
@@ -401,12 +401,13 @@ public function testStopSequence($client, $model, $name): void {
         $this->assertEquals(StopReason::FINISHED, $response->getStopReason(),
             "Expected stop reason to be FINISHED for $name, but got: " . $response->getStopReason()->value);
 
-        // Should contain numbers 1-5
+        // Should contain numbers up to the stop sequence
         $this->assertContainsAny(['1'], $responseText);
         $this->assertContainsAny(['5'], $responseText);
 
-        // Should not contain numbers after 5 (allowing some flexibility)
-        $this->assertStringNotContainsString('10', $responseText);
+        // Should not contain numbers after the stop sequence ("10" is excluded because
+        // models often echo the "1 to 10" instruction in a preamble)
+        $this->assertStringNotContainsString('9', $responseText);
 
         if ($this->verbose) {
             echo "\n[$name] Stop sequence response: " . $responseText;
diff --git a/tests/Integration/fixtures/test-document.pdf b/tests/Integration/fixtures/test-document.pdf
new file mode 100644
index 0000000..e2f39b7
Binary files /dev/null and b/tests/Integration/fixtures/test-document.pdf differ
diff --git a/tests/LLMConversationTest.php b/tests/LLMConversationTest.php
index eaf8388..4d6046d 100644
--- a/tests/LLMConversationTest.php
+++ b/tests/LLMConversationTest.php
@@ -132,4 +132,33 @@ public function testWithMessage(): void {
 
         $this->assertCount(2, $deserializedConversation->getMessages());
     }
+
+    public function testErrorFlagSurvivesSerializationRoundTrip(): void {
+        $conversation = new LLMConversation([
+            LLMMessage::createFromUser(new LLMMessageContents([
+                new LLMMessageToolResult('tool-1', LLMMessageContents::fromErrorString('ERROR: something failed')),
+            ])),
+        ]);
+
+        $data = json_decode(json_encode($conversation, JSON_THROW_ON_ERROR), true, 512, JSON_THROW_ON_ERROR);
+        $deserialized = LLMConversation::fromJson($data);
+
+        $toolResult = $deserialized->getMessages()[0]->getContents()[0];
+        $this->assertInstanceOf(LLMMessageToolResult::class, $toolResult);
+        $this->assertTrue($toolResult->getContent()->isError());
+    }
+
+    public function testFromJsonAcceptsLegacyContentFormat(): void {
+        // Before the isError flag was serialized, contents were stored as a plain list
+        $legacyContents = [
+            ['class' => LLMMessageText::class, 'data' => ['text' => 'Hello', 'cached' => false]],
+        ];
+
+        $contents = LLMMessageContents::fromJson($legacyContents);
+
+        $this->assertCount(1, $contents);
+        $this->assertInstanceOf(LLMMessageText::class, $contents[0]);
+        $this->assertSame('Hello', $contents[0]->getText());
+        $this->assertFalse($contents->isError());
+    }
 }
diff --git a/tests/MarkdownFormatterTest.php b/tests/MarkdownFormatterTest.php
new file mode 100644
index 0000000..c97b3c9
--- /dev/null
+++ b/tests/MarkdownFormatterTest.php
@@ -0,0 +1,79 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Soukicz\Llm\Tests;
+
+use PHPUnit\Framework\TestCase;
+use Soukicz\Llm\Client\Anthropic\Model\AnthropicClaude45Haiku;
+use Soukicz\Llm\Client\StopReason;
+use Soukicz\Llm\LLMConversation;
+use Soukicz\Llm\LLMRequest;
+use Soukicz\Llm\LLMResponse;
+use Soukicz\Llm\MarkdownFormatter;
+use Soukicz\Llm\Message\LLMMessage;
+use Soukicz\Llm\Message\LLMMessageContents;
+use Soukicz\Llm\Message\LLMMessageToolResult;
+use Soukicz\Llm\Message\LLMMessageToolUse;
+
+class MarkdownFormatterTest extends TestCase {
+    private MarkdownFormatter $formatter;
+
+    protected function setUp(): void {
+        $this->formatter = new MarkdownFormatter();
+    }
+
+    private function createRequest(): LLMRequest {
+        return new LLMRequest(
+            model: new AnthropicClaude45Haiku(AnthropicClaude45Haiku::VERSION_20251001),
+            conversation: new LLMConversation([
+                LLMMessage::createFromSystem(LLMMessageContents::fromString('You are a helpful assistant')),
+                LLMMessage::createFromUserString('What is 2+2?'),
+                LLMMessage::createFromAssistant(new LLMMessageContents([
+                    new LLMMessageToolUse('tool-1', 'calculator', ['expression' => '2+2']),
+                ])),
+                LLMMessage::createFromUser(new LLMMessageContents([
+                    new LLMMessageToolResult('tool-1', LLMMessageContents::fromArrayData(['result' => 4])),
+                ])),
+                LLMMessage::createFromAssistantString('The answer is 4'),
+            ]),
+        );
+    }
+
+    public function testRequestFormatting(): void {
+        $markdown = $this->formatter->responseToMarkdown($this->createRequest());
+
+        $this->assertStringContainsString(' - **Model:** claude-haiku-4-5-20251001', $markdown);
+        // Each role gets its own heading (system messages used to render as "## User:")
+        $this->assertStringContainsString('## System:', $markdown);
+        $this->assertStringContainsString('## User:', $markdown);
+        $this->assertStringContainsString('## Assistant:', $markdown);
+        $this->assertStringContainsString('You are a helpful assistant', $markdown);
+        $this->assertStringContainsString('**Tool use:** calculator (tool-1)', $markdown);
+        $this->assertStringContainsString('**Tool result:** tool-1', $markdown);
+        $this->assertStringContainsString('The answer is 4', $markdown);
+    }
+
+    public function testResponseFormattingIncludesStats(): void {
+        $response = new LLMResponse($this->createRequest(), StopReason::FINISHED, 1000, 200, 200, 0.5, 0.25, 1500);
+
+        $markdown = $this->formatter->responseToMarkdown($response);
+
+        $this->assertStringContainsString('##### Total stats', $markdown);
+        $this->assertStringContainsString('prompt tokens: 1000', $markdown);
+        $this->assertStringContainsString('completion tokens: 200', $markdown);
+        $this->assertStringContainsString('price: $0.750', $markdown);
+        $this->assertStringContainsString('Finished in 1.500s', $markdown);
+    }
+
+    /**
+     * Models without configured pricing produce null prices - formatting must not fail
+     */
+    public function testResponseFormattingWithNullPrices(): void {
+        $response = new LLMResponse($this->createRequest(), StopReason::FINISHED, 1000, 200, 200, null, null, 1500);
+
+        $markdown = $this->formatter->responseToMarkdown($response);
+
+        $this->assertStringContainsString('price: $0.000', $markdown);
+    }
+}
diff --git a/tests/Stream/GeminiStreamAccumulatorTest.php b/tests/Stream/GeminiStreamAccumulatorTest.php
index f50c9d0..9310cfe 100644
--- a/tests/Stream/GeminiStreamAccumulatorTest.php
+++ b/tests/Stream/GeminiStreamAccumulatorTest.php
@@ -25,19 +25,21 @@ public function testTextOnlyResponse(): void {
 
         $result = GeminiStreamAccumulator::consume(Utils::streamFor($sse), $listener);
 
-        // Verify reconstructed response
-        $this->assertCount(2, $result['candidates'][0]['content']['parts']);
-        $this->assertEquals('Hello', $result['candidates'][0]['content']['parts'][0]['text']);
-        $this->assertEquals(' world', $result['candidates'][0]['content']['parts'][1]['text']);
+        // Consecutive text chunks must be merged into a single part so the reconstructed
+        // response matches the non-streaming format (getLastText() returns the full text)
+        $this->assertCount(1, $result['candidates'][0]['content']['parts']);
+        $this->assertEquals('Hello world', $result['candidates'][0]['content']['parts'][0]['text']);
         $this->assertEquals('STOP', $result['candidates'][0]['finishReason']);
         $this->assertEquals(10, $result['usageMetadata']['promptTokenCount']);
         $this->assertEquals(5, $result['usageMetadata']['candidatesTokenCount']);
 
-        // Verify listener events
+        // Listener still receives one delta per chunk, all within the same block
         $textDeltas = array_values(array_filter($events, fn(StreamEvent $e) => $e->type === StreamEventType::TEXT_DELTA));
         $this->assertCount(2, $textDeltas);
         $this->assertEquals('Hello', $textDeltas[0]->delta);
+        $this->assertEquals(0, $textDeltas[0]->blockIndex);
         $this->assertEquals(' world', $textDeltas[1]->delta);
+        $this->assertEquals(0, $textDeltas[1]->blockIndex);
 
         $this->assertEquals(StreamEventType::MESSAGE_START, $events[0]->type);
         $this->assertEquals(StreamEventType::MESSAGE_COMPLETE, $events[array_key_last($events)]->type);