From 187e474daf25ac55a98f50d3a0a53bc011e7a496 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 20 Jan 2026 10:15:56 +0100
Subject: [PATCH 1/3] fix(reasoning): handle only closing tags

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/chat.go |  27 +++-
 pkg/functions/parse.go             |   5 +
 pkg/functions/reasoning.go         | 138 +++++++++++++++++++-
 pkg/functions/reasoning_test.go    | 199 ++++++++++++++++++++++++-----
 4 files changed, 324 insertions(+), 45 deletions(-)
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 4ece68d5c0a8..a191c612c31f 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -43,10 +43,18 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		lastEmittedReasoning := ""
 		lastEmittedCleanedContent := ""
 
+		// Configure reasoning extraction options
+		// Auto-detect if prompt ends with thinking tag (like llama.cpp does)
+		// or use explicit config setting
+		thinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(s)
+		reasoningOpts := functions.ReasoningOptions{
+			ThinkingForcedOpen: thinkingForcedOpen,
+		}
+
 		_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			accumulatedContent += s
-			// Extract reasoning from accumulated content
-			currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
+			// Extract reasoning from accumulated content with options
+			currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent, reasoningOpts)
 
 			// Calculate new reasoning delta (what we haven't emitted yet)
 			var reasoningDelta *string
@@ -230,7 +238,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			return err
 		}
 		// Extract reasoning before processing tool calls
-		reasoning, cleanedResult := functions.ExtractReasoning(result)
+		// Auto-detect if prompt ends with thinking tag or use explicit config
+		toolsThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(prompt)
+		toolsReasoningOpts := functions.ReasoningOptions{
+			ThinkingForcedOpen: toolsThinkingForcedOpen,
+		}
+		reasoning, cleanedResult := functions.ExtractReasoning(result, toolsReasoningOpts)
 		result = cleanedResult
 
 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -618,9 +631,15 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		// no streaming mode
 		default:
 
+			// Auto-detect if prompt ends with thinking tag for non-streaming mode
+			nonStreamThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(predInput)
+
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				// Extract reasoning from the response
-				reasoning, cleanedS := functions.ExtractReasoning(s)
+				nonStreamReasoningOpts := functions.ReasoningOptions{
+					ThinkingForcedOpen: nonStreamThinkingForcedOpen,
+				}
+				reasoning, cleanedS := functions.ExtractReasoning(s, nonStreamReasoningOpts)
 				s = cleanedS
 
 				if !shouldUseFn {
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 9f14208f1f6d..0fca6514ec1f 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -111,6 +111,11 @@ type FunctionsConfig struct {
 	// XMLFormat is an optional custom XML format configuration
 	// If set, only this format will be tried (overrides XMLFormatPreset)
 	XMLFormat *XMLToolCallFormat `yaml:"xml_format,omitempty" json:"xml_format,omitempty"`
+
+	// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
+	// When true, all content from the start is treated as reasoning until a closing tag is found.
+	// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
+	ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
 }
 
 // @Description ReplaceResult defines a key-value replacement for function results
diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go
index d3cf05808893..96fd098c57c0 100644
--- a/pkg/functions/reasoning.go
+++ b/pkg/functions/reasoning.go
@@ -4,11 +4,107 @@ import (
 	"strings"
 )
 
+// Common thinking/reasoning opening tags used by various models
+var thinkingOpenTags = []string{
+	"<think>\n",
+	"<think>",
+	"<thinking>\n",
+	"<thinking>",
+	"<|inner_prefix|>",   // Apertus
+	"<|START_THINKING|>", // Command R7B
+	"<seed:think>",       // Seed
+	"[THINK]\n",          // Magistral
+	"[THINK]",
+}
+
+// ReasoningOptions configures how reasoning extraction behaves
+type ReasoningOptions struct {
+	// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
+	// When true, all content from the start is treated as reasoning until a closing tag is found.
+	// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
+	ThinkingForcedOpen bool
+}
+
+// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
+// This is used to automatically detect when the model template has already added
+// the opening thinking tag, meaning the model will output reasoning content directly.
+// Returns true if the prompt ends with a known thinking opening tag.
+func DetectThinkingForcedOpen(prompt string) bool {
+	for _, tag := range thinkingOpenTags {
+		if strings.HasSuffix(prompt, tag) {
+			return true
+		}
+	}
+	return false
+}
+
 // ExtractReasoning extracts reasoning content from thinking tags and returns
 // both the extracted reasoning and the cleaned content (with tags removed).
 // It handles <thinking>...</thinking> and <think>...</think> tags.
 // Multiple reasoning blocks are concatenated with newlines.
-func ExtractReasoning(content string) (reasoning string, cleanedContent string) {
+// It also handles the case where only a closing tag is present (no opening tag),
+// in which case everything before the closing tag is treated as reasoning.
+//
+// When opts.ThinkingForcedOpen is true, all content from the start is treated as reasoning
+// until a closing tag (</think> or </thinking>) is found. This is useful for models
+// whose templates add the opening tag, so the model outputs reasoning directly.
+func ExtractReasoning(content string, opts ReasoningOptions) (reasoning string, cleanedContent string) {
+	if content == "" {
+		return "", content
+	}
+
+	if opts.ThinkingForcedOpen {
+		return extractReasoningForcedOpen(content)
+	}
+
+	return extractReasoningFromTags(content)
+}
+
+// extractReasoningForcedOpen handles the case where reasoning starts without an opening tag.
+// All content from the start is treated as reasoning until a closing tag is found.
+func extractReasoningForcedOpen(content string) (reasoning string, cleanedContent string) {
+	// Look for the earliest closing tag
+	closingTags := []string{"</thinking>", "</think>"}
+
+	earliestCloseIdx := -1
+	var matchedCloseTag string
+
+	for _, closeTag := range closingTags {
+		idx := strings.Index(content, closeTag)
+		if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) {
+			earliestCloseIdx = idx
+			matchedCloseTag = closeTag
+		}
+	}
+
+	if earliestCloseIdx == -1 {
+		// No closing tag found - all content is reasoning (still streaming)
+		return strings.TrimSpace(content), ""
+	}
+
+	// Found closing tag - everything before is reasoning, everything after is content
+	reasoning = strings.TrimSpace(content[:earliestCloseIdx])
+	cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):]
+
+	// Continue processing the rest for any additional reasoning blocks
+	if cleanedContent != "" {
+		additionalReasoning, finalContent := extractReasoningFromTags(cleanedContent)
+		if additionalReasoning != "" {
+			if reasoning != "" {
+				reasoning = reasoning + "\n\n" + additionalReasoning
+			} else {
+				reasoning = additionalReasoning
+			}
+		}
+		cleanedContent = finalContent
+	}
+
+	return reasoning, cleanedContent
+}
+
+// extractReasoningFromTags extracts reasoning content from thinking tags.
+// This is the core implementation that handles standard tag-based extraction.
+func extractReasoningFromTags(content string) (reasoning string, cleanedContent string) {
 	if content == "" {
 		return "", content
 	}
@@ -34,6 +130,7 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 		earliestStart := -1
 		earliestEnd := -1
 		isUnclosed := false
+		isClosingOnly := false
 		var matchedTag struct {
 			start string
 			end   string
@@ -41,30 +138,48 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 
 		for _, tagPair := range tagPairs {
 			startIdx := strings.Index(remaining[lastPos:], tagPair.start)
+			endIdx := strings.Index(remaining[lastPos:], tagPair.end)
+
+			// Check for closing-only tag (closing tag appears before or without opening tag)
+			if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) {
+				// Found a closing tag without a preceding opening tag
+				closingTagPos := endIdx + lastPos
+				if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) {
+					earliestStart = lastPos
+					earliestEnd = closingTagPos + len(tagPair.end)
+					isClosingOnly = true
+					isUnclosed = false
+					matchedTag = tagPair
+				}
+				continue
+			}
+
 			if startIdx == -1 {
 				continue
 			}
 			startIdx += lastPos
 
-			// Find the corresponding end tag
-			endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
-			if endIdx == -1 {
+			// Find the corresponding end tag after the start tag
+			endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end)
+			if endIdxAfterStart == -1 {
 				// Unclosed tag - extract what we have
 				if earliestStart == -1 || startIdx < earliestStart {
 					earliestStart = startIdx
 					earliestEnd = len(remaining)
 					isUnclosed = true
+					isClosingOnly = false
 					matchedTag = tagPair
 				}
 				continue
 			}
-			endIdx += startIdx + len(tagPair.start)
+			endIdxAfterStart += startIdx + len(tagPair.start)
 
 			// Found a complete tag pair
 			if earliestStart == -1 || startIdx < earliestStart {
 				earliestStart = startIdx
-				earliestEnd = endIdx + len(tagPair.end)
+				earliestEnd = endIdxAfterStart + len(tagPair.end)
 				isUnclosed = false
+				isClosingOnly = false
 				matchedTag = tagPair
 			}
 		}
@@ -77,6 +192,17 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string)
 			break
 		}
 
+		if isClosingOnly {
+			// Closing tag without opening tag - content before closing tag is reasoning
+			reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)])
+			if reasoningContent != "" {
+				reasoningParts = append(reasoningParts, reasoningContent)
+			}
+			// Move past the closing tag
+			lastPos = earliestEnd
+			continue
+		}
+
 		// Add content before the tag
 		if earliestStart > lastPos {
 			cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart])
diff --git a/pkg/functions/reasoning_test.go b/pkg/functions/reasoning_test.go
index 3f7d0754195b..d60bb23e1456 100644
--- a/pkg/functions/reasoning_test.go
+++ b/pkg/functions/reasoning_test.go
@@ -8,25 +8,58 @@ import (
 	. "github.com/onsi/gomega"
 )
 
+var _ = Describe("DetectThinkingForcedOpen", func() {
+	It("should detect <think> at end of prompt", func() {
+		Expect(DetectThinkingForcedOpen("Some prompt<think>")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt<think>\n")).To(BeTrue())
+	})
+
+	It("should detect <thinking> at end of prompt", func() {
+		Expect(DetectThinkingForcedOpen("Some prompt<thinking>")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt<thinking>\n")).To(BeTrue())
+	})
+
+	It("should detect model-specific tags", func() {
+		Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt<seed:think>")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue())
+		Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue())
+	})
+
+	It("should not detect if tag is in the middle", func() {
+		Expect(DetectThinkingForcedOpen("Some <think> prompt")).To(BeFalse())
+		Expect(DetectThinkingForcedOpen("<think>reasoning</think>")).To(BeFalse())
+	})
+
+	It("should not detect if no thinking tag", func() {
+		Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse())
+		Expect(DetectThinkingForcedOpen("")).To(BeFalse())
+	})
+})
+
 var _ = Describe("ExtractReasoning", func() {
+	// Default options (ThinkingForcedOpen = false)
+	defaultOpts := ReasoningOptions{}
+
 	Context("when content has no reasoning tags", func() {
 		It("should return empty reasoning and original content", func() {
 			content := "This is regular content without any tags."
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
 
 		It("should handle empty string", func() {
 			content := ""
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should handle content with only whitespace", func() {
 			content := "   \n\t  "
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
@@ -35,42 +68,42 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <thinking> tags", func() {
 		It("should extract reasoning from single thinking block", func() {
 			content := "Some text <thinking>This is my reasoning</thinking> More text"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("This is my reasoning"))
 			Expect(cleaned).To(Equal("Some text  More text"))
 		})
 
 		It("should extract reasoning and preserve surrounding content", func() {
 			content := "Before <thinking>Reasoning here</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Reasoning here"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle thinking block at the start", func() {
 			content := "<thinking>Start reasoning</thinking> Regular content"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Start reasoning"))
 			Expect(cleaned).To(Equal(" Regular content"))
 		})
 
 		It("should handle thinking block at the end", func() {
 			content := "Regular content <thinking>End reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("End reasoning"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
 
 		It("should handle only thinking block", func() {
 			content := "<thinking>Only reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Only reasoning"))
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should trim whitespace from reasoning content", func() {
 			content := "Text <thinking>  \n  Reasoning with spaces  \n  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Reasoning with spaces"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -79,21 +112,21 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <think> tags", func() {
 		It("should extract reasoning from redacted_reasoning block", func() {
 			content := "Text <think>Redacted reasoning</think> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Redacted reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle redacted_reasoning with multiline content", func() {
 			content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle redacted_reasoning with complex content", func() {
 			content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
 			Expect(cleaned).To(Equal("Start  End"))
 		})
@@ -102,14 +135,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has multiple reasoning blocks", func() {
 		It("should concatenate multiple thinking blocks with newlines", func() {
 			content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("First\n\nSecond"))
 			Expect(cleaned).To(Equal("Text  Middle  End"))
 		})
 
 		It("should handle multiple different tag types", func() {
 			content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(ContainSubstring("One"))
 			Expect(reasoning).To(ContainSubstring("Two"))
 			Expect(reasoning).To(ContainSubstring("Three"))
@@ -118,7 +151,7 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle nested tags correctly (extracts first match)", func() {
 			content := "Text <thinking>Outer <think>Inner</think></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			// Should extract the outer thinking block
 			Expect(reasoning).To(ContainSubstring("Outer"))
 			Expect(reasoning).To(ContainSubstring("Inner"))
@@ -129,28 +162,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has unclosed reasoning tags", func() {
 		It("should extract unclosed thinking block", func() {
 			content := "Text <thinking>Unclosed reasoning"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Unclosed reasoning"))
 			Expect(cleaned).To(Equal("Text "))
 		})
 
 		It("should extract unclosed think block", func() {
 			content := "Before <think>Incomplete"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Incomplete"))
 			Expect(cleaned).To(Equal("Before "))
 		})
 
 		It("should extract unclosed redacted_reasoning block", func() {
 			content := "Start <think>Partial reasoning content"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Partial reasoning content"))
 			Expect(cleaned).To(Equal("Start "))
 		})
 
 		It("should handle unclosed tag at the end", func() {
 			content := "Regular content <thinking>Unclosed at end"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Unclosed at end"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
@@ -159,14 +192,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has empty reasoning blocks", func() {
 		It("should ignore empty thinking block", func() {
 			content := "Text <thinking></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should ignore thinking block with only whitespace", func() {
 			content := "Text <thinking>   \n\t  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -175,28 +208,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning tags with special characters", func() {
 		It("should handle reasoning with newlines", func() {
 			content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with code blocks", func() {
 			content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle reasoning with JSON", func() {
 			content := "Before <think>{\"key\": \"value\"}</think> After"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with HTML-like content", func() {
 			content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -205,7 +238,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning mixed with regular content", func() {
 		It("should preserve content order correctly", func() {
 			content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(ContainSubstring("Reasoning"))
 			Expect(reasoning).To(ContainSubstring("More reasoning"))
 			Expect(cleaned).To(Equal("Start  Middle  End"))
@@ -213,30 +246,59 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning in the middle of a sentence", func() {
 			content := "This is a <thinking>reasoning</thinking> sentence."
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("reasoning"))
 			Expect(cleaned).To(Equal("This is a  sentence."))
 		})
 	})
 
-	Context("edge cases", func() {
+	Context("edge cases without ThinkingForcedOpen", func() {
 		It("should handle content with only opening tag", func() {
 			content := "<thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(""))
 		})
 
-		It("should handle content with only closing tag", func() {
+		It("should handle content with only closing tag (no content before)", func() {
 			content := "</thinking>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(BeEmpty())
-			Expect(cleaned).To(Equal("</thinking>"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should extract reasoning when only closing tag is present", func() {
+			// GLM-4 style: reasoning content followed by closing tag without opening tag
+			content := "This is reasoning content</think>this is the actual response"
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			Expect(reasoning).To(Equal("This is reasoning content"))
+			Expect(cleaned).To(Equal("this is the actual response"))
+		})
+
+		It("should handle closing-only tag with multiline reasoning", func() {
+			content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
+			Expect(cleaned).To(Equal("Final answer"))
+		})
+
+		It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
+			content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
+			Expect(cleaned).To(Equal("this is a test"))
+		})
+
+		It("should handle closing-only thinking tag", func() {
+			content := "Some reasoning here</thinking>actual content"
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			Expect(reasoning).To(Equal("Some reasoning here"))
+			Expect(cleaned).To(Equal("actual content"))
 		})
 
 		It("should handle mismatched tags", func() {
 			content := "<thinking>Content</think>"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			// Should extract unclosed thinking block
 			Expect(reasoning).To(ContainSubstring("Content"))
 			Expect(cleaned).To(Equal(""))
@@ -245,7 +307,7 @@ var _ = Describe("ExtractReasoning", func() {
 		It("should handle very long reasoning content", func() {
 			longReasoning := strings.Repeat("This is reasoning content. ", 100)
 			content := "Text <thinking>" + longReasoning + "</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			// TrimSpace is applied, so we need to account for that
 			Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
 			Expect(cleaned).To(Equal("Text  More"))
@@ -253,9 +315,76 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning with unicode characters", func() {
 			content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content)
+			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
 			Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 	})
+
+	Context("when ThinkingForcedOpen is true", func() {
+		forcedOpenOpts := ReasoningOptions{ThinkingForcedOpen: true}
+
+		It("should treat all content as reasoning until closing tag", func() {
+			content := "This is reasoning</think>this is content"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("This is reasoning"))
+			Expect(cleaned).To(Equal("this is content"))
+		})
+
+		It("should treat all content as reasoning when no closing tag (streaming)", func() {
+			content := "This is reasoning content still streaming"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("This is reasoning content still streaming"))
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle GLM-4 style output", func() {
+			content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
+			Expect(cleaned).To(Equal("this is a test"))
+		})
+
+		It("should handle multiline reasoning with closing tag", func() {
+			content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
+			Expect(cleaned).To(Equal("Final answer"))
+		})
+
+		It("should handle </thinking> closing tag", func() {
+			content := "Some reasoning here</thinking>actual content"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("Some reasoning here"))
+			Expect(cleaned).To(Equal("actual content"))
+		})
+
+		It("should handle additional reasoning blocks after initial forced open", func() {
+			content := "Initial reasoning</think>content<think>more reasoning</think>final content"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
+			Expect(cleaned).To(Equal("contentfinal content"))
+		})
+
+		It("should handle empty content", func() {
+			reasoning, cleaned := ExtractReasoning("", forcedOpenOpts)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(BeEmpty())
+		})
+
+		It("should handle only closing tag", func() {
+			content := "</think>only content"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(BeEmpty())
+			Expect(cleaned).To(Equal("only content"))
+		})
+
+		It("should find earliest closing tag", func() {
+			// </think> comes before </thinking>
+			content := "Reasoning</think>content</thinking>more"
+			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			Expect(reasoning).To(Equal("Reasoning"))
+			Expect(cleaned).To(Equal("content</thinking>more"))
+		})
+	})
 })

From a35212572653f6388ae94b4b355f5b20ba109842 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 20 Jan 2026 11:48:00 +0100
Subject: [PATCH 2/3] chore: refactorings

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/config/model_config.go                   |   2 +
 core/http/endpoints/openai/chat.go            |  56 +++++-----
 pkg/functions/parse.go                        |   5 -
 pkg/reasoning/config.go                       |   8 ++
 pkg/reasoning/options.go                      |  18 +++
 pkg/{functions => reasoning}/reasoning.go     |  40 +++----
 pkg/reasoning/reasoning_suite_test.go         |  13 +++
 .../reasoning_test.go                         | 105 +++++++++---------
 8 files changed, 139 insertions(+), 108 deletions(-)
 create mode 100644 pkg/reasoning/config.go
 create mode 100644 pkg/reasoning/options.go
 rename pkg/{functions => reasoning}/reasoning.go (81%)
 create mode 100644 pkg/reasoning/reasoning_suite_test.go
 rename pkg/{functions => reasoning}/reasoning_test.go (79%)

diff --git a/core/config/model_config.go b/core/config/model_config.go
index 9010c84e60c3..794e9db56fef 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -10,6 +10,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/reasoning"
 	"github.com/mudler/cogito"
 	"gopkg.in/yaml.v3"
 )
@@ -51,6 +52,7 @@ type ModelConfig struct {
 	ResponseFormatMap                          map[string]interface{} `yaml:"-" json:"-"`
 
 	FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
+	ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
 
 	FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index a191c612c31f..d4aaed20cc90 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -13,6 +13,7 @@ import (
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/reasoning"
 
 	"github.com/mudler/LocalAI/core/templates"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -44,17 +45,18 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		lastEmittedCleanedContent := ""
 
 		// Configure reasoning extraction options
-		// Auto-detect if prompt ends with thinking tag (like llama.cpp does)
+		// Auto-detect if prompt ends with thinking tag
 		// or use explicit config setting
-		thinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(s)
-		reasoningOpts := functions.ReasoningOptions{
-			ThinkingForcedOpen: thinkingForcedOpen,
-		}
+		thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
 
 		_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			accumulatedContent += s
-			// Extract reasoning from accumulated content with options
-			currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent, reasoningOpts)
+			// Extract reasoning from accumulated content
+			opts := []reasoning.Option{}
+			if thinkingForcedOpen {
+				opts = append(opts, reasoning.WithThinkingForcedOpen())
+			}
+			currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
 
 			// Calculate new reasoning delta (what we haven't emitted yet)
 			var reasoningDelta *string
@@ -239,11 +241,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		}
 		// Extract reasoning before processing tool calls
 		// Auto-detect if prompt ends with thinking tag or use explicit config
-		toolsThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(prompt)
-		toolsReasoningOpts := functions.ReasoningOptions{
-			ThinkingForcedOpen: toolsThinkingForcedOpen,
+		toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
+		opts := []reasoning.Option{}
+		if toolsThinkingForcedOpen {
+			opts = append(opts, reasoning.WithThinkingForcedOpen())
 		}
-		reasoning, cleanedResult := functions.ExtractReasoning(result, toolsReasoningOpts)
+		extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
 		result = cleanedResult
 
 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -279,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			}
 
 			var deltaReasoning *string
-			if reasoning != "" {
-				deltaReasoning = &reasoning
+			if extractedReasoning != "" {
+				deltaReasoning = &extractedReasoning
 			}
 			delta := &schema.Message{Content: &result}
 			if deltaReasoning != nil {
@@ -632,22 +635,23 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		default:
 
 			// Auto-detect if prompt ends with thinking tag for non-streaming mode
-			nonStreamThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(predInput)
+			nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
 
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				// Extract reasoning from the response
-				nonStreamReasoningOpts := functions.ReasoningOptions{
-					ThinkingForcedOpen: nonStreamThinkingForcedOpen,
+				var extractedReasoning string
+				opts := []reasoning.Option{}
+				if nonStreamThinkingForcedOpen {
+					opts = append(opts, reasoning.WithThinkingForcedOpen())
 				}
-				reasoning, cleanedS := functions.ExtractReasoning(s, nonStreamReasoningOpts)
-				s = cleanedS
+				extractedReasoning, s = reasoning.Extract(s, opts...)
 
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
 					stopReason := FinishReasonStop
 					message := &schema.Message{Role: "assistant", Content: &s}
-					if reasoning != "" {
-						message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						message.Reasoning = &extractedReasoning
 					}
 					*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
 					return
@@ -669,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 
 					stopReason := FinishReasonStop
 					message := &schema.Message{Role: "assistant", Content: &result}
-					if reasoning != "" {
-						message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						message.Reasoning = &extractedReasoning
 					}
 					*c = append(*c, schema.Choice{
 						FinishReason: &stopReason,
@@ -683,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 							Role: "assistant",
 						},
 					}
-					if reasoning != "" {
-						toolChoice.Message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						toolChoice.Message.Reasoning = &extractedReasoning
 					}
 
 					for _, ss := range results {
@@ -714,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									"arguments": args,
 								},
 							}
-							if reasoning != "" {
-								message.Reasoning = &reasoning
+							if extractedReasoning != "" {
+								message.Reasoning = &extractedReasoning
 							}
 							*c = append(*c, schema.Choice{
 								FinishReason: &functionCallReason,
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 0fca6514ec1f..9f14208f1f6d 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -111,11 +111,6 @@ type FunctionsConfig struct {
 	// XMLFormat is an optional custom XML format configuration
 	// If set, only this format will be tried (overrides XMLFormatPreset)
 	XMLFormat *XMLToolCallFormat `yaml:"xml_format,omitempty" json:"xml_format,omitempty"`
-
-	// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
-	// When true, all content from the start is treated as reasoning until a closing tag is found.
-	// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
-	ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
 }
 
 // @Description ReplaceResult defines a key-value replacement for function results
diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go
new file mode 100644
index 000000000000..d8edb9d750c8
--- /dev/null
+++ b/pkg/reasoning/config.go
@@ -0,0 +1,8 @@
+package reasoning
+
+type ReasoningConfig struct {
+	// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
+	// When true, all content from the start is treated as reasoning until a closing tag is found.
+	// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
+	ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"`
+}
diff --git a/pkg/reasoning/options.go b/pkg/reasoning/options.go
new file mode 100644
index 000000000000..146f2618f41e
--- /dev/null
+++ b/pkg/reasoning/options.go
@@ -0,0 +1,18 @@
+package reasoning
+
+// options holds the configuration for reasoning extraction
+type options struct {
+	thinkingForcedOpen bool
+}
+
+// Option is a functional option for configuring reasoning extraction
+type Option func(*options)
+
+// WithThinkingForcedOpen configures the extractor to treat all content from the start
+// as reasoning until a closing tag is found. This is useful for models like GLM-4
+// that output reasoning without <think> but end with </think>.
+func WithThinkingForcedOpen() Option {
+	return func(o *options) {
+		o.thinkingForcedOpen = true
+	}
+}
diff --git a/pkg/functions/reasoning.go b/pkg/reasoning/reasoning.go
similarity index 81%
rename from pkg/functions/reasoning.go
rename to pkg/reasoning/reasoning.go
index 96fd098c57c0..6d85566cc6de 100644
--- a/pkg/functions/reasoning.go
+++ b/pkg/reasoning/reasoning.go
@@ -1,4 +1,4 @@
-package functions
+package reasoning
 
 import (
 	"strings"
@@ -17,14 +17,6 @@ var thinkingOpenTags = []string{
 	"[THINK]",
 }
 
-// ReasoningOptions configures how reasoning extraction behaves
-type ReasoningOptions struct {
-	// ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag.
-	// When true, all content from the start is treated as reasoning until a closing tag is found.
-	// This is useful for models like GLM-4 that output reasoning without <think> but end with </think>.
-	ThinkingForcedOpen bool
-}
-
 // DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag.
 // This is used to automatically detect when the model template has already added
 // the opening thinking tag, meaning the model will output reasoning content directly.
@@ -38,31 +30,35 @@ func DetectThinkingForcedOpen(prompt string) bool {
 	return false
 }
 
-// ExtractReasoning extracts reasoning content from thinking tags and returns
+// Extract extracts reasoning content from thinking tags and returns
 // both the extracted reasoning and the cleaned content (with tags removed).
 // It handles <thinking>...</thinking> and <think>...</think> tags.
 // Multiple reasoning blocks are concatenated with newlines.
 // It also handles the case where only a closing tag is present (no opening tag),
 // in which case everything before the closing tag is treated as reasoning.
 //
-// When opts.ThinkingForcedOpen is true, all content from the start is treated as reasoning
-// until a closing tag (</think> or </thinking>) is found. This is useful for models
-// whose templates add the opening tag, so the model outputs reasoning directly.
-func ExtractReasoning(content string, opts ReasoningOptions) (reasoning string, cleanedContent string) {
+// Use WithThinkingForcedOpen() option when all content from the start should be
+// treated as reasoning until a closing tag is found.
+func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) {
 	if content == "" {
 		return "", content
 	}
 
-	if opts.ThinkingForcedOpen {
-		return extractReasoningForcedOpen(content)
+	cfg := &options{}
+	for _, opt := range opts {
+		opt(cfg)
+	}
+
+	if cfg.thinkingForcedOpen {
+		return extractForcedOpen(content)
 	}
 
-	return extractReasoningFromTags(content)
+	return extractFromTags(content)
 }
 
-// extractReasoningForcedOpen handles the case where reasoning starts without an opening tag.
+// extractForcedOpen handles the case where reasoning starts without an opening tag.
 // All content from the start is treated as reasoning until a closing tag is found.
-func extractReasoningForcedOpen(content string) (reasoning string, cleanedContent string) {
+func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
 	// Look for the earliest closing tag
 	closingTags := []string{"</thinking>", "</think>"}
 
@@ -88,7 +84,7 @@ func extractReasoningForcedOpen(content string) (reasoning string, cleanedConten
 
 	// Continue processing the rest for any additional reasoning blocks
 	if cleanedContent != "" {
-		additionalReasoning, finalContent := extractReasoningFromTags(cleanedContent)
+		additionalReasoning, finalContent := extractFromTags(cleanedContent)
 		if additionalReasoning != "" {
 			if reasoning != "" {
 				reasoning = reasoning + "\n\n" + additionalReasoning
@@ -102,9 +98,9 @@ func extractReasoningForcedOpen(content string) (reasoning string, cleanedConten
 	return reasoning, cleanedContent
 }
 
-// extractReasoningFromTags extracts reasoning content from thinking tags.
+// extractFromTags extracts reasoning content from thinking tags.
 // This is the core implementation that handles standard tag-based extraction.
-func extractReasoningFromTags(content string) (reasoning string, cleanedContent string) {
+func extractFromTags(content string) (reasoning string, cleanedContent string) {
 	if content == "" {
 		return "", content
 	}
diff --git a/pkg/reasoning/reasoning_suite_test.go b/pkg/reasoning/reasoning_suite_test.go
new file mode 100644
index 000000000000..bfd983c33c6e
--- /dev/null
+++ b/pkg/reasoning/reasoning_suite_test.go
@@ -0,0 +1,13 @@
+package reasoning_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestReasoning(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Reasoning Suite")
+}
diff --git a/pkg/functions/reasoning_test.go b/pkg/reasoning/reasoning_test.go
similarity index 79%
rename from pkg/functions/reasoning_test.go
rename to pkg/reasoning/reasoning_test.go
index d60bb23e1456..a22cb9e22f91 100644
--- a/pkg/functions/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go
@@ -1,9 +1,9 @@
-package functions_test
+package reasoning_test
 
 import (
 	"strings"
 
-	. "github.com/mudler/LocalAI/pkg/functions"
+	. "github.com/mudler/LocalAI/pkg/reasoning"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -38,28 +38,25 @@ var _ = Describe("DetectThinkingForcedOpen", func() {
 	})
 })
 
-var _ = Describe("ExtractReasoning", func() {
-	// Default options (ThinkingForcedOpen = false)
-	defaultOpts := ReasoningOptions{}
-
+var _ = Describe("Extract", func() {
 	Context("when content has no reasoning tags", func() {
 		It("should return empty reasoning and original content", func() {
 			content := "This is regular content without any tags."
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
 
 		It("should handle empty string", func() {
 			content := ""
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should handle content with only whitespace", func() {
 			content := "   \n\t  "
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(content))
 		})
@@ -68,42 +65,42 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <thinking> tags", func() {
 		It("should extract reasoning from single thinking block", func() {
 			content := "Some text <thinking>This is my reasoning</thinking> More text"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("This is my reasoning"))
 			Expect(cleaned).To(Equal("Some text  More text"))
 		})
 
 		It("should extract reasoning and preserve surrounding content", func() {
 			content := "Before <thinking>Reasoning here</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Reasoning here"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle thinking block at the start", func() {
 			content := "<thinking>Start reasoning</thinking> Regular content"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Start reasoning"))
 			Expect(cleaned).To(Equal(" Regular content"))
 		})
 
 		It("should handle thinking block at the end", func() {
 			content := "Regular content <thinking>End reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("End reasoning"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
 
 		It("should handle only thinking block", func() {
 			content := "<thinking>Only reasoning</thinking>"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Only reasoning"))
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should trim whitespace from reasoning content", func() {
 			content := "Text <thinking>  \n  Reasoning with spaces  \n  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Reasoning with spaces"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -112,21 +109,21 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has <think> tags", func() {
 		It("should extract reasoning from redacted_reasoning block", func() {
 			content := "Text <think>Redacted reasoning</think> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Redacted reasoning"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle redacted_reasoning with multiline content", func() {
 			content := "Before <think>Line 1\nLine 2\nLine 3</think> After"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle redacted_reasoning with complex content", func() {
 			content := "Start <think>Complex reasoning\nwith\nmultiple\nlines</think> End"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines"))
 			Expect(cleaned).To(Equal("Start  End"))
 		})
@@ -135,14 +132,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has multiple reasoning blocks", func() {
 		It("should concatenate multiple thinking blocks with newlines", func() {
 			content := "Text <thinking>First</thinking> Middle <thinking>Second</thinking> End"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("First\n\nSecond"))
 			Expect(cleaned).To(Equal("Text  Middle  End"))
 		})
 
 		It("should handle multiple different tag types", func() {
 			content := "A <thinking>One</thinking> B <think>Two</think> C <think>Three</think> D"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(ContainSubstring("One"))
 			Expect(reasoning).To(ContainSubstring("Two"))
 			Expect(reasoning).To(ContainSubstring("Three"))
@@ -151,7 +148,7 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle nested tags correctly (extracts first match)", func() {
 			content := "Text <thinking>Outer <think>Inner</think></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			// Should extract the outer thinking block
 			Expect(reasoning).To(ContainSubstring("Outer"))
 			Expect(reasoning).To(ContainSubstring("Inner"))
@@ -162,28 +159,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has unclosed reasoning tags", func() {
 		It("should extract unclosed thinking block", func() {
 			content := "Text <thinking>Unclosed reasoning"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Unclosed reasoning"))
 			Expect(cleaned).To(Equal("Text "))
 		})
 
 		It("should extract unclosed think block", func() {
 			content := "Before <think>Incomplete"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Incomplete"))
 			Expect(cleaned).To(Equal("Before "))
 		})
 
 		It("should extract unclosed redacted_reasoning block", func() {
 			content := "Start <think>Partial reasoning content"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Partial reasoning content"))
 			Expect(cleaned).To(Equal("Start "))
 		})
 
 		It("should handle unclosed tag at the end", func() {
 			content := "Regular content <thinking>Unclosed at end"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Unclosed at end"))
 			Expect(cleaned).To(Equal("Regular content "))
 		})
@@ -192,14 +189,14 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has empty reasoning blocks", func() {
 		It("should ignore empty thinking block", func() {
 			content := "Text <thinking></thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should ignore thinking block with only whitespace", func() {
 			content := "Text <thinking>   \n\t  </thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -208,28 +205,28 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning tags with special characters", func() {
 		It("should handle reasoning with newlines", func() {
 			content := "Before <thinking>Line 1\nLine 2\nLine 3</thinking> After"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with code blocks", func() {
 			content := "Text <thinking>Reasoning with ```code``` blocks</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Reasoning with ```code``` blocks"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 
 		It("should handle reasoning with JSON", func() {
 			content := "Before <think>{\"key\": \"value\"}</think> After"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("{\"key\": \"value\"}"))
 			Expect(cleaned).To(Equal("Before  After"))
 		})
 
 		It("should handle reasoning with HTML-like content", func() {
 			content := "Text <thinking>Reasoning with <tags> inside</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Reasoning with <tags> inside"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
@@ -238,7 +235,7 @@ var _ = Describe("ExtractReasoning", func() {
 	Context("when content has reasoning mixed with regular content", func() {
 		It("should preserve content order correctly", func() {
 			content := "Start <thinking>Reasoning</thinking> Middle <think>More reasoning</think> End"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(ContainSubstring("Reasoning"))
 			Expect(reasoning).To(ContainSubstring("More reasoning"))
 			Expect(cleaned).To(Equal("Start  Middle  End"))
@@ -246,23 +243,23 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning in the middle of a sentence", func() {
 			content := "This is a <thinking>reasoning</thinking> sentence."
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("reasoning"))
 			Expect(cleaned).To(Equal("This is a  sentence."))
 		})
 	})
 
-	Context("edge cases without ThinkingForcedOpen", func() {
+	Context("edge cases without WithThinkingForcedOpen", func() {
 		It("should handle content with only opening tag", func() {
 			content := "<thinking>"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal(""))
 		})
 
 		It("should handle content with only closing tag (no content before)", func() {
 			content := "</thinking>"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(BeEmpty())
 		})
@@ -270,35 +267,35 @@ var _ = Describe("ExtractReasoning", func() {
 		It("should extract reasoning when only closing tag is present", func() {
 			// GLM-4 style: reasoning content followed by closing tag without opening tag
 			content := "This is reasoning content</think>this is the actual response"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("This is reasoning content"))
 			Expect(cleaned).To(Equal("this is the actual response"))
 		})
 
 		It("should handle closing-only tag with multiline reasoning", func() {
 			content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
 			Expect(cleaned).To(Equal("Final answer"))
 		})
 
 		It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() {
 			content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text."))
 			Expect(cleaned).To(Equal("this is a test"))
 		})
 
 		It("should handle closing-only thinking tag", func() {
 			content := "Some reasoning here</thinking>actual content"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Some reasoning here"))
 			Expect(cleaned).To(Equal("actual content"))
 		})
 
 		It("should handle mismatched tags", func() {
 			content := "<thinking>Content</think>"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			// Should extract unclosed thinking block
 			Expect(reasoning).To(ContainSubstring("Content"))
 			Expect(cleaned).To(Equal(""))
@@ -307,7 +304,7 @@ var _ = Describe("ExtractReasoning", func() {
 		It("should handle very long reasoning content", func() {
 			longReasoning := strings.Repeat("This is reasoning content. ", 100)
 			content := "Text <thinking>" + longReasoning + "</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			// TrimSpace is applied, so we need to account for that
 			Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning)))
 			Expect(cleaned).To(Equal("Text  More"))
@@ -315,66 +312,64 @@ var _ = Describe("ExtractReasoning", func() {
 
 		It("should handle reasoning with unicode characters", func() {
 			content := "Text <thinking>Reasoning with 中文 and emoji 🧠</thinking> More"
-			reasoning, cleaned := ExtractReasoning(content, defaultOpts)
+			reasoning, cleaned := Extract(content)
 			Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠"))
 			Expect(cleaned).To(Equal("Text  More"))
 		})
 	})
 
-	Context("when ThinkingForcedOpen is true", func() {
-		forcedOpenOpts := ReasoningOptions{ThinkingForcedOpen: true}
-
+	Context("with WithThinkingForcedOpen option", func() {
 		It("should treat all content as reasoning until closing tag", func() {
 			content := "This is reasoning</think>this is content"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("This is reasoning"))
 			Expect(cleaned).To(Equal("this is content"))
 		})
 
 		It("should treat all content as reasoning when no closing tag (streaming)", func() {
 			content := "This is reasoning content still streaming"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("This is reasoning content still streaming"))
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should handle GLM-4 style output", func() {
 			content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.</think>this is a test"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text."))
 			Expect(cleaned).To(Equal("this is a test"))
 		})
 
 		It("should handle multiline reasoning with closing tag", func() {
 			content := "1. First point\n2. Second point\n3. Third point</think>Final answer"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point"))
 			Expect(cleaned).To(Equal("Final answer"))
 		})
 
 		It("should handle </thinking> closing tag", func() {
 			content := "Some reasoning here</thinking>actual content"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("Some reasoning here"))
 			Expect(cleaned).To(Equal("actual content"))
 		})
 
 		It("should handle additional reasoning blocks after initial forced open", func() {
 			content := "Initial reasoning</think>content<think>more reasoning</think>final content"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning"))
 			Expect(cleaned).To(Equal("contentfinal content"))
 		})
 
 		It("should handle empty content", func() {
-			reasoning, cleaned := ExtractReasoning("", forcedOpenOpts)
+			reasoning, cleaned := Extract("", WithThinkingForcedOpen())
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(BeEmpty())
 		})
 
 		It("should handle only closing tag", func() {
 			content := "</think>only content"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(BeEmpty())
 			Expect(cleaned).To(Equal("only content"))
 		})
@@ -382,7 +377,7 @@ var _ = Describe("ExtractReasoning", func() {
 		It("should find earliest closing tag", func() {
 			// </think> comes before </thinking>
 			content := "Reasoning</think>content</thinking>more"
-			reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts)
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
 			Expect(reasoning).To(Equal("Reasoning"))
 			Expect(cleaned).To(Equal("content</thinking>more"))
 		})

From 61a6e95f7d04fddb1b8412aa58a5c741acaea967 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 20 Jan 2026 12:02:35 +0100
Subject: [PATCH 3/3] Additional thinking tags

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 pkg/reasoning/reasoning.go      |  32 +++++++--
 pkg/reasoning/reasoning_test.go | 114 ++++++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 6 deletions(-)

diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go
index 6d85566cc6de..e07b5954d33b 100644
--- a/pkg/reasoning/reasoning.go
+++ b/pkg/reasoning/reasoning.go
@@ -4,16 +4,23 @@ import (
 	"strings"
 )
 
-// Common thinking/reasoning opening tags used by various models
+// Common thinking/reasoning opening tags used by various models.
+// These match the tags detected by llama.cpp in common/chat.cpp
 var thinkingOpenTags = []string{
+	// DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE
 	"<think>\n",
 	"<think>",
+	// Generic thinking tags
 	"<thinking>\n",
 	"<thinking>",
-	"<|inner_prefix|>",   // Apertus
-	"<|START_THINKING|>", // Command R7B
-	"<seed:think>",       // Seed
-	"[THINK]\n",          // Magistral
+	// Apertus
+	"<|inner_prefix|>",
+	// Command R7B
+	"<|START_THINKING|>",
+	// Seed
+	"<seed:think>",
+	// Magistral (not in llama.cpp but common)
+	"[THINK]\n",
 	"[THINK]",
 }
 
@@ -60,7 +67,15 @@ func Extract(content string, opts ...Option) (reasoning string, cleanedContent s
 // All content from the start is treated as reasoning until a closing tag is found.
 func extractForcedOpen(content string) (reasoning string, cleanedContent string) {
 	// Look for the earliest closing tag
-	closingTags := []string{"</thinking>", "</think>"}
+	// These match the closing tags used by llama.cpp for various models
+	closingTags := []string{
+		"</thinking>",
+		"</think>",
+		"<|END_THINKING|>", // Command R7B
+		"<|inner_suffix|>", // Apertus
+		"</seed:think>",    // Seed
+		"[/THINK]",         // Magistral
+	}
 
 	earliestCloseIdx := -1
 	var matchedCloseTag string
@@ -110,12 +125,17 @@ func extractFromTags(content string) (reasoning string, cleanedContent string) {
 	remaining := content
 
 	// Define tag pairs to look for
+	// These match the tags used by llama.cpp for various models
 	tagPairs := []struct {
 		start string
 		end   string
 	}{
 		{"<thinking>", "</thinking>"},
 		{"<think>", "</think>"},
+		{"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B
+		{"<|inner_prefix|>", "<|inner_suffix|>"},   // Apertus
+		{"<seed:think>", "</seed:think>"},          // Seed
+		{"[THINK]", "[/THINK]"},                    // Magistral
 	}
 
 	// Track the last position we've processed
diff --git a/pkg/reasoning/reasoning_test.go b/pkg/reasoning/reasoning_test.go
index a22cb9e22f91..796f106d9c82 100644
--- a/pkg/reasoning/reasoning_test.go
+++ b/pkg/reasoning/reasoning_test.go
@@ -381,5 +381,119 @@ var _ = Describe("Extract", func() {
 			Expect(reasoning).To(Equal("Reasoning"))
 			Expect(cleaned).To(Equal("content</thinking>more"))
 		})
+
+		It("should handle Command R7B closing tag", func() {
+			content := "Reasoning content<|END_THINKING|>actual response"
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle Apertus closing tag", func() {
+			content := "Reasoning content<|inner_suffix|>actual response"
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle Seed closing tag", func() {
+			content := "Reasoning content</seed:think>actual response"
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle Magistral closing tag", func() {
+			content := "Reasoning content[/THINK]actual response"
+			reasoning, cleaned := Extract(content, WithThinkingForcedOpen())
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+	})
+
+	Context("with model-specific tag pairs", func() {
+		It("should extract Command R7B reasoning tags", func() {
+			content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning here"))
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should extract Apertus reasoning tags", func() {
+			content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning here"))
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should extract Seed reasoning tags", func() {
+			content := "Before <seed:think>reasoning here</seed:think> After"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning here"))
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should extract Magistral reasoning tags", func() {
+			content := "Before [THINK]reasoning here[/THINK] After"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning here"))
+			Expect(cleaned).To(Equal("Before  After"))
+		})
+
+		It("should handle unclosed Command R7B tag", func() {
+			content := "Before <|START_THINKING|>reasoning still streaming"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning still streaming"))
+			Expect(cleaned).To(Equal("Before "))
+		})
+
+		It("should handle unclosed Apertus tag", func() {
+			content := "Before <|inner_prefix|>reasoning still streaming"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning still streaming"))
+			Expect(cleaned).To(Equal("Before "))
+		})
+
+		It("should handle unclosed Seed tag", func() {
+			content := "Before <seed:think>reasoning still streaming"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning still streaming"))
+			Expect(cleaned).To(Equal("Before "))
+		})
+
+		It("should handle unclosed Magistral tag", func() {
+			content := "Before [THINK]reasoning still streaming"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("reasoning still streaming"))
+			Expect(cleaned).To(Equal("Before "))
+		})
+
+		It("should handle closing-only Command R7B tag", func() {
+			content := "Reasoning content<|END_THINKING|>actual response"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle closing-only Apertus tag", func() {
+			content := "Reasoning content<|inner_suffix|>actual response"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle closing-only Seed tag", func() {
+			content := "Reasoning content</seed:think>actual response"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
+
+		It("should handle closing-only Magistral tag", func() {
+			content := "Reasoning content[/THINK]actual response"
+			reasoning, cleaned := Extract(content)
+			Expect(reasoning).To(Equal("Reasoning content"))
+			Expect(cleaned).To(Equal("actual response"))
+		})
 	})
 })