From 187e474daf25ac55a98f50d3a0a53bc011e7a496 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Jan 2026 10:15:56 +0100 Subject: [PATCH 1/3] fix(reasoning): handle only closing tags Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/chat.go | 27 +++- pkg/functions/parse.go | 5 + pkg/functions/reasoning.go | 138 +++++++++++++++++++- pkg/functions/reasoning_test.go | 199 ++++++++++++++++++++++++----- 4 files changed, 324 insertions(+), 45 deletions(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 4ece68d5c0a8..a191c612c31f 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -43,10 +43,18 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator lastEmittedReasoning := "" lastEmittedCleanedContent := "" + // Configure reasoning extraction options + // Auto-detect if prompt ends with thinking tag (like llama.cpp does) + // or use explicit config setting + thinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(s) + reasoningOpts := functions.ReasoningOptions{ + ThinkingForcedOpen: thinkingForcedOpen, + } + _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { accumulatedContent += s - // Extract reasoning from accumulated content - currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent) + // Extract reasoning from accumulated content with options + currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent, reasoningOpts) // Calculate new reasoning delta (what we haven't emitted yet) var reasoningDelta *string @@ -230,7 +238,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator return err } // Extract reasoning before processing tool calls - reasoning, cleanedResult := functions.ExtractReasoning(result) + // Auto-detect if prompt ends with thinking tag or use explicit config + toolsThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(prompt) + toolsReasoningOpts := functions.ReasoningOptions{ + ThinkingForcedOpen: toolsThinkingForcedOpen, + } + reasoning, cleanedResult := functions.ExtractReasoning(result, toolsReasoningOpts) result = cleanedResult textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) @@ -618,9 +631,15 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator // no streaming mode default: + // Auto-detect if prompt ends with thinking tag for non-streaming mode + nonStreamThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(predInput) + tokenCallback := func(s string, c *[]schema.Choice) { // Extract reasoning from the response - reasoning, cleanedS := functions.ExtractReasoning(s) + nonStreamReasoningOpts := functions.ReasoningOptions{ + ThinkingForcedOpen: nonStreamThinkingForcedOpen, + } + reasoning, cleanedS := functions.ExtractReasoning(s, nonStreamReasoningOpts) s = cleanedS if !shouldUseFn { diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 9f14208f1f6d..0fca6514ec1f 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -111,6 +111,11 @@ type FunctionsConfig struct { // XMLFormat is an optional custom XML format configuration // If set, only this format will be tried (overrides XMLFormatPreset) XMLFormat *XMLToolCallFormat `yaml:"xml_format,omitempty" json:"xml_format,omitempty"` + + // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. + // When true, all content from the start is treated as reasoning until a closing tag is found. + // This is useful for models like GLM-4 that output reasoning without but end with . + ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"` } // @Description ReplaceResult defines a key-value replacement for function results diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go index d3cf05808893..96fd098c57c0 100644 --- a/pkg/functions/reasoning.go +++ b/pkg/functions/reasoning.go @@ -4,11 +4,107 @@ import ( "strings" ) +// Common thinking/reasoning opening tags used by various models +var thinkingOpenTags = []string{ + "\n", + "", + "\n", + "", + "<|inner_prefix|>", // Apertus + "<|START_THINKING|>", // Command R7B + "", // Seed + "[THINK]\n", // Magistral + "[THINK]", +} + +// ReasoningOptions configures how reasoning extraction behaves +type ReasoningOptions struct { + // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. + // When true, all content from the start is treated as reasoning until a closing tag is found. + // This is useful for models like GLM-4 that output reasoning without but end with . + ThinkingForcedOpen bool +} + +// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag. +// This is used to automatically detect when the model template has already added +// the opening thinking tag, meaning the model will output reasoning content directly. +// Returns true if the prompt ends with a known thinking opening tag. +func DetectThinkingForcedOpen(prompt string) bool { + for _, tag := range thinkingOpenTags { + if strings.HasSuffix(prompt, tag) { + return true + } + } + return false +} + // ExtractReasoning extracts reasoning content from thinking tags and returns // both the extracted reasoning and the cleaned content (with tags removed). // It handles ... and ... tags. // Multiple reasoning blocks are concatenated with newlines. -func ExtractReasoning(content string) (reasoning string, cleanedContent string) { +// It also handles the case where only a closing tag is present (no opening tag), +// in which case everything before the closing tag is treated as reasoning. +// +// When opts.ThinkingForcedOpen is true, all content from the start is treated as reasoning +// until a closing tag ( or ) is found. This is useful for models +// whose templates add the opening tag, so the model outputs reasoning directly. +func ExtractReasoning(content string, opts ReasoningOptions) (reasoning string, cleanedContent string) { + if content == "" { + return "", content + } + + if opts.ThinkingForcedOpen { + return extractReasoningForcedOpen(content) + } + + return extractReasoningFromTags(content) +} + +// extractReasoningForcedOpen handles the case where reasoning starts without an opening tag. +// All content from the start is treated as reasoning until a closing tag is found. +func extractReasoningForcedOpen(content string) (reasoning string, cleanedContent string) { + // Look for the earliest closing tag + closingTags := []string{"", ""} + + earliestCloseIdx := -1 + var matchedCloseTag string + + for _, closeTag := range closingTags { + idx := strings.Index(content, closeTag) + if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) { + earliestCloseIdx = idx + matchedCloseTag = closeTag + } + } + + if earliestCloseIdx == -1 { + // No closing tag found - all content is reasoning (still streaming) + return strings.TrimSpace(content), "" + } + + // Found closing tag - everything before is reasoning, everything after is content + reasoning = strings.TrimSpace(content[:earliestCloseIdx]) + cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):] + + // Continue processing the rest for any additional reasoning blocks + if cleanedContent != "" { + additionalReasoning, finalContent := extractReasoningFromTags(cleanedContent) + if additionalReasoning != "" { + if reasoning != "" { + reasoning = reasoning + "\n\n" + additionalReasoning + } else { + reasoning = additionalReasoning + } + } + cleanedContent = finalContent + } + + return reasoning, cleanedContent +} + +// extractReasoningFromTags extracts reasoning content from thinking tags. +// This is the core implementation that handles standard tag-based extraction. +func extractReasoningFromTags(content string) (reasoning string, cleanedContent string) { if content == "" { return "", content } @@ -34,6 +130,7 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string) earliestStart := -1 earliestEnd := -1 isUnclosed := false + isClosingOnly := false var matchedTag struct { start string end string @@ -41,30 +138,48 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string) for _, tagPair := range tagPairs { startIdx := strings.Index(remaining[lastPos:], tagPair.start) + endIdx := strings.Index(remaining[lastPos:], tagPair.end) + + // Check for closing-only tag (closing tag appears before or without opening tag) + if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) { + // Found a closing tag without a preceding opening tag + closingTagPos := endIdx + lastPos + if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) { + earliestStart = lastPos + earliestEnd = closingTagPos + len(tagPair.end) + isClosingOnly = true + isUnclosed = false + matchedTag = tagPair + } + continue + } + if startIdx == -1 { continue } startIdx += lastPos - // Find the corresponding end tag - endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) - if endIdx == -1 { + // Find the corresponding end tag after the start tag + endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) + if endIdxAfterStart == -1 { // Unclosed tag - extract what we have if earliestStart == -1 || startIdx < earliestStart { earliestStart = startIdx earliestEnd = len(remaining) isUnclosed = true + isClosingOnly = false matchedTag = tagPair } continue } - endIdx += startIdx + len(tagPair.start) + endIdxAfterStart += startIdx + len(tagPair.start) // Found a complete tag pair if earliestStart == -1 || startIdx < earliestStart { earliestStart = startIdx - earliestEnd = endIdx + len(tagPair.end) + earliestEnd = endIdxAfterStart + len(tagPair.end) isUnclosed = false + isClosingOnly = false matchedTag = tagPair } } @@ -77,6 +192,17 @@ func ExtractReasoning(content string) (reasoning string, cleanedContent string) break } + if isClosingOnly { + // Closing tag without opening tag - content before closing tag is reasoning + reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)]) + if reasoningContent != "" { + reasoningParts = append(reasoningParts, reasoningContent) + } + // Move past the closing tag + lastPos = earliestEnd + continue + } + // Add content before the tag if earliestStart > lastPos { cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart]) diff --git a/pkg/functions/reasoning_test.go b/pkg/functions/reasoning_test.go index 3f7d0754195b..d60bb23e1456 100644 --- a/pkg/functions/reasoning_test.go +++ b/pkg/functions/reasoning_test.go @@ -8,25 +8,58 @@ import ( . "github.com/onsi/gomega" ) +var _ = Describe("DetectThinkingForcedOpen", func() { + It("should detect at end of prompt", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt\n")).To(BeTrue()) + }) + + It("should detect at end of prompt", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt\n")).To(BeTrue()) + }) + + It("should detect model-specific tags", func() { + Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue()) + }) + + It("should not detect if tag is in the middle", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeFalse()) + Expect(DetectThinkingForcedOpen("reasoning")).To(BeFalse()) + }) + + It("should not detect if no thinking tag", func() { + Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse()) + Expect(DetectThinkingForcedOpen("")).To(BeFalse()) + }) +}) + var _ = Describe("ExtractReasoning", func() { + // Default options (ThinkingForcedOpen = false) + defaultOpts := ReasoningOptions{} + Context("when content has no reasoning tags", func() { It("should return empty reasoning and original content", func() { content := "This is regular content without any tags." - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal(content)) }) It("should handle empty string", func() { content := "" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(BeEmpty()) }) It("should handle content with only whitespace", func() { content := " \n\t " - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal(content)) }) @@ -35,42 +68,42 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has tags", func() { It("should extract reasoning from single thinking block", func() { content := "Some text This is my reasoning More text" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("This is my reasoning")) Expect(cleaned).To(Equal("Some text More text")) }) It("should extract reasoning and preserve surrounding content", func() { content := "Before Reasoning here After" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Reasoning here")) Expect(cleaned).To(Equal("Before After")) }) It("should handle thinking block at the start", func() { content := "Start reasoning Regular content" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Start reasoning")) Expect(cleaned).To(Equal(" Regular content")) }) It("should handle thinking block at the end", func() { content := "Regular content End reasoning" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("End reasoning")) Expect(cleaned).To(Equal("Regular content ")) }) It("should handle only thinking block", func() { content := "Only reasoning" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Only reasoning")) Expect(cleaned).To(BeEmpty()) }) It("should trim whitespace from reasoning content", func() { content := "Text \n Reasoning with spaces \n More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Reasoning with spaces")) Expect(cleaned).To(Equal("Text More")) }) @@ -79,21 +112,21 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has tags", func() { It("should extract reasoning from redacted_reasoning block", func() { content := "Text Redacted reasoning More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Redacted reasoning")) Expect(cleaned).To(Equal("Text More")) }) It("should handle redacted_reasoning with multiline content", func() { content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) Expect(cleaned).To(Equal("Before After")) }) It("should handle redacted_reasoning with complex content", func() { content := "Start Complex reasoning\nwith\nmultiple\nlines End" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines")) Expect(cleaned).To(Equal("Start End")) }) @@ -102,14 +135,14 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has multiple reasoning blocks", func() { It("should concatenate multiple thinking blocks with newlines", func() { content := "Text First Middle Second End" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("First\n\nSecond")) Expect(cleaned).To(Equal("Text Middle End")) }) It("should handle multiple different tag types", func() { content := "A One B Two C Three D" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(ContainSubstring("One")) Expect(reasoning).To(ContainSubstring("Two")) Expect(reasoning).To(ContainSubstring("Three")) @@ -118,7 +151,7 @@ var _ = Describe("ExtractReasoning", func() { It("should handle nested tags correctly (extracts first match)", func() { content := "Text Outer Inner More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) // Should extract the outer thinking block Expect(reasoning).To(ContainSubstring("Outer")) Expect(reasoning).To(ContainSubstring("Inner")) @@ -129,28 +162,28 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has unclosed reasoning tags", func() { It("should extract unclosed thinking block", func() { content := "Text Unclosed reasoning" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Unclosed reasoning")) Expect(cleaned).To(Equal("Text ")) }) It("should extract unclosed think block", func() { content := "Before Incomplete" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Incomplete")) Expect(cleaned).To(Equal("Before ")) }) It("should extract unclosed redacted_reasoning block", func() { content := "Start Partial reasoning content" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Partial reasoning content")) Expect(cleaned).To(Equal("Start ")) }) It("should handle unclosed tag at the end", func() { content := "Regular content Unclosed at end" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Unclosed at end")) Expect(cleaned).To(Equal("Regular content ")) }) @@ -159,14 +192,14 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has empty reasoning blocks", func() { It("should ignore empty thinking block", func() { content := "Text More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("Text More")) }) It("should ignore thinking block with only whitespace", func() { content := "Text \n\t More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("Text More")) }) @@ -175,28 +208,28 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has reasoning tags with special characters", func() { It("should handle reasoning with newlines", func() { content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) Expect(cleaned).To(Equal("Before After")) }) It("should handle reasoning with code blocks", func() { content := "Text Reasoning with ```code``` blocks More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Reasoning with ```code``` blocks")) Expect(cleaned).To(Equal("Text More")) }) It("should handle reasoning with JSON", func() { content := "Before {\"key\": \"value\"} After" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("{\"key\": \"value\"}")) Expect(cleaned).To(Equal("Before After")) }) It("should handle reasoning with HTML-like content", func() { content := "Text Reasoning with inside More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Reasoning with inside")) Expect(cleaned).To(Equal("Text More")) }) @@ -205,7 +238,7 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has reasoning mixed with regular content", func() { It("should preserve content order correctly", func() { content := "Start Reasoning Middle More reasoning End" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(ContainSubstring("Reasoning")) Expect(reasoning).To(ContainSubstring("More reasoning")) Expect(cleaned).To(Equal("Start Middle End")) @@ -213,30 +246,59 @@ var _ = Describe("ExtractReasoning", func() { It("should handle reasoning in the middle of a sentence", func() { content := "This is a reasoning sentence." - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("reasoning")) Expect(cleaned).To(Equal("This is a sentence.")) }) }) - Context("edge cases", func() { + Context("edge cases without ThinkingForcedOpen", func() { It("should handle content with only opening tag", func() { content := "" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("")) }) - It("should handle content with only closing tag", func() { + It("should handle content with only closing tag (no content before)", func() { content := "" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal("")) + Expect(cleaned).To(BeEmpty()) + }) + + It("should extract reasoning when only closing tag is present", func() { + // GLM-4 style: reasoning content followed by closing tag without opening tag + content := "This is reasoning contentthis is the actual response" + reasoning, cleaned := ExtractReasoning(content, defaultOpts) + Expect(reasoning).To(Equal("This is reasoning content")) + Expect(cleaned).To(Equal("this is the actual response")) + }) + + It("should handle closing-only tag with multiline reasoning", func() { + content := "1. First point\n2. Second point\n3. Third pointFinal answer" + reasoning, cleaned := ExtractReasoning(content, defaultOpts) + Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) + Expect(cleaned).To(Equal("Final answer")) + }) + + It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() { + content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.this is a test" + reasoning, cleaned := ExtractReasoning(content, defaultOpts) + Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.")) + Expect(cleaned).To(Equal("this is a test")) + }) + + It("should handle closing-only thinking tag", func() { + content := "Some reasoning hereactual content" + reasoning, cleaned := ExtractReasoning(content, defaultOpts) + Expect(reasoning).To(Equal("Some reasoning here")) + Expect(cleaned).To(Equal("actual content")) }) It("should handle mismatched tags", func() { content := "Content" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) // Should extract unclosed thinking block Expect(reasoning).To(ContainSubstring("Content")) Expect(cleaned).To(Equal("")) @@ -245,7 +307,7 @@ var _ = Describe("ExtractReasoning", func() { It("should handle very long reasoning content", func() { longReasoning := strings.Repeat("This is reasoning content. ", 100) content := "Text " + longReasoning + " More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) // TrimSpace is applied, so we need to account for that Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning))) Expect(cleaned).To(Equal("Text More")) @@ -253,9 +315,76 @@ var _ = Describe("ExtractReasoning", func() { It("should handle reasoning with unicode characters", func() { content := "Text Reasoning with 中文 and emoji 🧠 More" - reasoning, cleaned := ExtractReasoning(content) + reasoning, cleaned := ExtractReasoning(content, defaultOpts) Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠")) Expect(cleaned).To(Equal("Text More")) }) }) + + Context("when ThinkingForcedOpen is true", func() { + forcedOpenOpts := ReasoningOptions{ThinkingForcedOpen: true} + + It("should treat all content as reasoning until closing tag", func() { + content := "This is reasoningthis is content" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("This is reasoning")) + Expect(cleaned).To(Equal("this is content")) + }) + + It("should treat all content as reasoning when no closing tag (streaming)", func() { + content := "This is reasoning content still streaming" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("This is reasoning content still streaming")) + Expect(cleaned).To(BeEmpty()) + }) + + It("should handle GLM-4 style output", func() { + content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.this is a test" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text.")) + Expect(cleaned).To(Equal("this is a test")) + }) + + It("should handle multiline reasoning with closing tag", func() { + content := "1. First point\n2. Second point\n3. Third pointFinal answer" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) + Expect(cleaned).To(Equal("Final answer")) + }) + + It("should handle closing tag", func() { + content := "Some reasoning hereactual content" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("Some reasoning here")) + Expect(cleaned).To(Equal("actual content")) + }) + + It("should handle additional reasoning blocks after initial forced open", func() { + content := "Initial reasoningcontentmore reasoningfinal content" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning")) + Expect(cleaned).To(Equal("contentfinal content")) + }) + + It("should handle empty content", func() { + reasoning, cleaned := ExtractReasoning("", forcedOpenOpts) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(BeEmpty()) + }) + + It("should handle only closing tag", func() { + content := "only content" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal("only content")) + }) + + It("should find earliest closing tag", func() { + // comes before + content := "Reasoningcontentmore" + reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + Expect(reasoning).To(Equal("Reasoning")) + Expect(cleaned).To(Equal("contentmore")) + }) + }) }) From a35212572653f6388ae94b4b355f5b20ba109842 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Jan 2026 11:48:00 +0100 Subject: [PATCH 2/3] chore: refactorings Signed-off-by: Ettore Di Giacinto --- core/config/model_config.go | 2 + core/http/endpoints/openai/chat.go | 56 +++++----- pkg/functions/parse.go | 5 - pkg/reasoning/config.go | 8 ++ pkg/reasoning/options.go | 18 +++ pkg/{functions => reasoning}/reasoning.go | 40 +++---- pkg/reasoning/reasoning_suite_test.go | 13 +++ .../reasoning_test.go | 105 +++++++++--------- 8 files changed, 139 insertions(+), 108 deletions(-) create mode 100644 pkg/reasoning/config.go create mode 100644 pkg/reasoning/options.go rename pkg/{functions => reasoning}/reasoning.go (81%) create mode 100644 pkg/reasoning/reasoning_suite_test.go rename pkg/{functions => reasoning}/reasoning_test.go (79%) diff --git a/core/config/model_config.go b/core/config/model_config.go index 9010c84e60c3..794e9db56fef 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -10,6 +10,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/cogito" "gopkg.in/yaml.v3" ) @@ -51,6 +52,7 @@ type ModelConfig struct { ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"` FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"` + ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"` FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index a191c612c31f..d4aaed20cc90 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -13,6 +13,7 @@ import ( "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" @@ -44,17 +45,18 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator lastEmittedCleanedContent := "" // Configure reasoning extraction options - // Auto-detect if prompt ends with thinking tag (like llama.cpp does) + // Auto-detect if prompt ends with thinking tag // or use explicit config setting - thinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(s) - reasoningOpts := functions.ReasoningOptions{ - ThinkingForcedOpen: thinkingForcedOpen, - } + thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s) _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { accumulatedContent += s - // Extract reasoning from accumulated content with options - currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent, reasoningOpts) + // Extract reasoning from accumulated content + opts := []reasoning.Option{} + if thinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) + } + currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...) // Calculate new reasoning delta (what we haven't emitted yet) var reasoningDelta *string @@ -239,11 +241,12 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } // Extract reasoning before processing tool calls // Auto-detect if prompt ends with thinking tag or use explicit config - toolsThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(prompt) - toolsReasoningOpts := functions.ReasoningOptions{ - ThinkingForcedOpen: toolsThinkingForcedOpen, + toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt) + opts := []reasoning.Option{} + if toolsThinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) } - reasoning, cleanedResult := functions.ExtractReasoning(result, toolsReasoningOpts) + extractedReasoning, cleanedResult := reasoning.Extract(result, opts...) result = cleanedResult textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) @@ -279,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } var deltaReasoning *string - if reasoning != "" { - deltaReasoning = &reasoning + if extractedReasoning != "" { + deltaReasoning = &extractedReasoning } delta := &schema.Message{Content: &result} if deltaReasoning != nil { @@ -632,22 +635,23 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator default: // Auto-detect if prompt ends with thinking tag for non-streaming mode - nonStreamThinkingForcedOpen := config.FunctionsConfig.ThinkingForcedOpen || functions.DetectThinkingForcedOpen(predInput) + nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput) tokenCallback := func(s string, c *[]schema.Choice) { // Extract reasoning from the response - nonStreamReasoningOpts := functions.ReasoningOptions{ - ThinkingForcedOpen: nonStreamThinkingForcedOpen, + var extractedReasoning string + opts := []reasoning.Option{} + if nonStreamThinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) } - reasoning, cleanedS := functions.ExtractReasoning(s, nonStreamReasoningOpts) - s = cleanedS + extractedReasoning, s = reasoning.Extract(s, opts...) if !shouldUseFn { // no function is called, just reply and use stop as finish reason stopReason := FinishReasonStop message := &schema.Message{Role: "assistant", Content: &s} - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message}) return @@ -669,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator stopReason := FinishReasonStop message := &schema.Message{Role: "assistant", Content: &result} - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{ FinishReason: &stopReason, @@ -683,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator Role: "assistant", }, } - if reasoning != "" { - toolChoice.Message.Reasoning = &reasoning + if extractedReasoning != "" { + toolChoice.Message.Reasoning = &extractedReasoning } for _, ss := range results { @@ -714,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator "arguments": args, }, } - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{ FinishReason: &functionCallReason, diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 0fca6514ec1f..9f14208f1f6d 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -111,11 +111,6 @@ type FunctionsConfig struct { // XMLFormat is an optional custom XML format configuration // If set, only this format will be tried (overrides XMLFormatPreset) XMLFormat *XMLToolCallFormat `yaml:"xml_format,omitempty" json:"xml_format,omitempty"` - - // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. - // When true, all content from the start is treated as reasoning until a closing tag is found. - // This is useful for models like GLM-4 that output reasoning without but end with . - ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"` } // @Description ReplaceResult defines a key-value replacement for function results diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go new file mode 100644 index 000000000000..d8edb9d750c8 --- /dev/null +++ b/pkg/reasoning/config.go @@ -0,0 +1,8 @@ +package reasoning + +type ReasoningConfig struct { + // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. + // When true, all content from the start is treated as reasoning until a closing tag is found. + // This is useful for models like GLM-4 that output reasoning without but end with . + ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"` +} diff --git a/pkg/reasoning/options.go b/pkg/reasoning/options.go new file mode 100644 index 000000000000..146f2618f41e --- /dev/null +++ b/pkg/reasoning/options.go @@ -0,0 +1,18 @@ +package reasoning + +// options holds the configuration for reasoning extraction +type options struct { + thinkingForcedOpen bool +} + +// Option is a functional option for configuring reasoning extraction +type Option func(*options) + +// WithThinkingForcedOpen configures the extractor to treat all content from the start +// as reasoning until a closing tag is found. This is useful for models like GLM-4 +// that output reasoning without but end with . +func WithThinkingForcedOpen() Option { + return func(o *options) { + o.thinkingForcedOpen = true + } +} diff --git a/pkg/functions/reasoning.go b/pkg/reasoning/reasoning.go similarity index 81% rename from pkg/functions/reasoning.go rename to pkg/reasoning/reasoning.go index 96fd098c57c0..6d85566cc6de 100644 --- a/pkg/functions/reasoning.go +++ b/pkg/reasoning/reasoning.go @@ -1,4 +1,4 @@ -package functions +package reasoning import ( "strings" @@ -17,14 +17,6 @@ var thinkingOpenTags = []string{ "[THINK]", } -// ReasoningOptions configures how reasoning extraction behaves -type ReasoningOptions struct { - // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. - // When true, all content from the start is treated as reasoning until a closing tag is found. - // This is useful for models like GLM-4 that output reasoning without but end with . - ThinkingForcedOpen bool -} - // DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag. // This is used to automatically detect when the model template has already added // the opening thinking tag, meaning the model will output reasoning content directly. @@ -38,31 +30,35 @@ func DetectThinkingForcedOpen(prompt string) bool { return false } -// ExtractReasoning extracts reasoning content from thinking tags and returns +// Extract extracts reasoning content from thinking tags and returns // both the extracted reasoning and the cleaned content (with tags removed). // It handles ... and ... tags. // Multiple reasoning blocks are concatenated with newlines. // It also handles the case where only a closing tag is present (no opening tag), // in which case everything before the closing tag is treated as reasoning. // -// When opts.ThinkingForcedOpen is true, all content from the start is treated as reasoning -// until a closing tag ( or ) is found. This is useful for models -// whose templates add the opening tag, so the model outputs reasoning directly. -func ExtractReasoning(content string, opts ReasoningOptions) (reasoning string, cleanedContent string) { +// Use WithThinkingForcedOpen() option when all content from the start should be +// treated as reasoning until a closing tag is found. +func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) { if content == "" { return "", content } - if opts.ThinkingForcedOpen { - return extractReasoningForcedOpen(content) + cfg := &options{} + for _, opt := range opts { + opt(cfg) + } + + if cfg.thinkingForcedOpen { + return extractForcedOpen(content) } - return extractReasoningFromTags(content) + return extractFromTags(content) } -// extractReasoningForcedOpen handles the case where reasoning starts without an opening tag. +// extractForcedOpen handles the case where reasoning starts without an opening tag. // All content from the start is treated as reasoning until a closing tag is found. -func extractReasoningForcedOpen(content string) (reasoning string, cleanedContent string) { +func extractForcedOpen(content string) (reasoning string, cleanedContent string) { // Look for the earliest closing tag closingTags := []string{"", ""} @@ -88,7 +84,7 @@ func extractReasoningForcedOpen(content string) (reasoning string, cleanedConten // Continue processing the rest for any additional reasoning blocks if cleanedContent != "" { - additionalReasoning, finalContent := extractReasoningFromTags(cleanedContent) + additionalReasoning, finalContent := extractFromTags(cleanedContent) if additionalReasoning != "" { if reasoning != "" { reasoning = reasoning + "\n\n" + additionalReasoning @@ -102,9 +98,9 @@ func extractReasoningForcedOpen(content string) (reasoning string, cleanedConten return reasoning, cleanedContent } -// extractReasoningFromTags extracts reasoning content from thinking tags. +// extractFromTags extracts reasoning content from thinking tags. // This is the core implementation that handles standard tag-based extraction. -func extractReasoningFromTags(content string) (reasoning string, cleanedContent string) { +func extractFromTags(content string) (reasoning string, cleanedContent string) { if content == "" { return "", content } diff --git a/pkg/reasoning/reasoning_suite_test.go b/pkg/reasoning/reasoning_suite_test.go new file mode 100644 index 000000000000..bfd983c33c6e --- /dev/null +++ b/pkg/reasoning/reasoning_suite_test.go @@ -0,0 +1,13 @@ +package reasoning_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestReasoning(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Reasoning Suite") +} diff --git a/pkg/functions/reasoning_test.go b/pkg/reasoning/reasoning_test.go similarity index 79% rename from pkg/functions/reasoning_test.go rename to pkg/reasoning/reasoning_test.go index d60bb23e1456..a22cb9e22f91 100644 --- a/pkg/functions/reasoning_test.go +++ b/pkg/reasoning/reasoning_test.go @@ -1,9 +1,9 @@ -package functions_test +package reasoning_test import ( "strings" - . "github.com/mudler/LocalAI/pkg/functions" + . "github.com/mudler/LocalAI/pkg/reasoning" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -38,28 +38,25 @@ var _ = Describe("DetectThinkingForcedOpen", func() { }) }) -var _ = Describe("ExtractReasoning", func() { - // Default options (ThinkingForcedOpen = false) - defaultOpts := ReasoningOptions{} - +var _ = Describe("Extract", func() { Context("when content has no reasoning tags", func() { It("should return empty reasoning and original content", func() { content := "This is regular content without any tags." - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal(content)) }) It("should handle empty string", func() { content := "" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(BeEmpty()) }) It("should handle content with only whitespace", func() { content := " \n\t " - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal(content)) }) @@ -68,42 +65,42 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has tags", func() { It("should extract reasoning from single thinking block", func() { content := "Some text This is my reasoning More text" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("This is my reasoning")) Expect(cleaned).To(Equal("Some text More text")) }) It("should extract reasoning and preserve surrounding content", func() { content := "Before Reasoning here After" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Reasoning here")) Expect(cleaned).To(Equal("Before After")) }) It("should handle thinking block at the start", func() { content := "Start reasoning Regular content" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Start reasoning")) Expect(cleaned).To(Equal(" Regular content")) }) It("should handle thinking block at the end", func() { content := "Regular content End reasoning" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("End reasoning")) Expect(cleaned).To(Equal("Regular content ")) }) It("should handle only thinking block", func() { content := "Only reasoning" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Only reasoning")) Expect(cleaned).To(BeEmpty()) }) It("should trim whitespace from reasoning content", func() { content := "Text \n Reasoning with spaces \n More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Reasoning with spaces")) Expect(cleaned).To(Equal("Text More")) }) @@ -112,21 +109,21 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has tags", func() { It("should extract reasoning from redacted_reasoning block", func() { content := "Text Redacted reasoning More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Redacted reasoning")) Expect(cleaned).To(Equal("Text More")) }) It("should handle redacted_reasoning with multiline content", func() { content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) Expect(cleaned).To(Equal("Before After")) }) It("should handle redacted_reasoning with complex content", func() { content := "Start Complex reasoning\nwith\nmultiple\nlines End" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines")) Expect(cleaned).To(Equal("Start End")) }) @@ -135,14 +132,14 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has multiple reasoning blocks", func() { It("should concatenate multiple thinking blocks with newlines", func() { content := "Text First Middle Second End" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("First\n\nSecond")) Expect(cleaned).To(Equal("Text Middle End")) }) It("should handle multiple different tag types", func() { content := "A One B Two C Three D" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(ContainSubstring("One")) Expect(reasoning).To(ContainSubstring("Two")) Expect(reasoning).To(ContainSubstring("Three")) @@ -151,7 +148,7 @@ var _ = Describe("ExtractReasoning", func() { It("should handle nested tags correctly (extracts first match)", func() { content := "Text Outer Inner More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) // Should extract the outer thinking block Expect(reasoning).To(ContainSubstring("Outer")) Expect(reasoning).To(ContainSubstring("Inner")) @@ -162,28 +159,28 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has unclosed reasoning tags", func() { It("should extract unclosed thinking block", func() { content := "Text Unclosed reasoning" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Unclosed reasoning")) Expect(cleaned).To(Equal("Text ")) }) It("should extract unclosed think block", func() { content := "Before Incomplete" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Incomplete")) Expect(cleaned).To(Equal("Before ")) }) It("should extract unclosed redacted_reasoning block", func() { content := "Start Partial reasoning content" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Partial reasoning content")) Expect(cleaned).To(Equal("Start ")) }) It("should handle unclosed tag at the end", func() { content := "Regular content Unclosed at end" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Unclosed at end")) Expect(cleaned).To(Equal("Regular content ")) }) @@ -192,14 +189,14 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has empty reasoning blocks", func() { It("should ignore empty thinking block", func() { content := "Text More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("Text More")) }) It("should ignore thinking block with only whitespace", func() { content := "Text \n\t More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("Text More")) }) @@ -208,28 +205,28 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has reasoning tags with special characters", func() { It("should handle reasoning with newlines", func() { content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) Expect(cleaned).To(Equal("Before After")) }) It("should handle reasoning with code blocks", func() { content := "Text Reasoning with ```code``` blocks More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Reasoning with ```code``` blocks")) Expect(cleaned).To(Equal("Text More")) }) It("should handle reasoning with JSON", func() { content := "Before {\"key\": \"value\"} After" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("{\"key\": \"value\"}")) Expect(cleaned).To(Equal("Before After")) }) It("should handle reasoning with HTML-like content", func() { content := "Text Reasoning with inside More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Reasoning with inside")) Expect(cleaned).To(Equal("Text More")) }) @@ -238,7 +235,7 @@ var _ = Describe("ExtractReasoning", func() { Context("when content has reasoning mixed with regular content", func() { It("should preserve content order correctly", func() { content := "Start Reasoning Middle More reasoning End" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(ContainSubstring("Reasoning")) Expect(reasoning).To(ContainSubstring("More reasoning")) Expect(cleaned).To(Equal("Start Middle End")) @@ -246,23 +243,23 @@ var _ = Describe("ExtractReasoning", func() { It("should handle reasoning in the middle of a sentence", func() { content := "This is a reasoning sentence." - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("reasoning")) Expect(cleaned).To(Equal("This is a sentence.")) }) }) - Context("edge cases without ThinkingForcedOpen", func() { + Context("edge cases without WithThinkingForcedOpen", func() { It("should handle content with only opening tag", func() { content := "" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("")) }) It("should handle content with only closing tag (no content before)", func() { content := "" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(BeEmpty()) }) @@ -270,35 +267,35 @@ var _ = Describe("ExtractReasoning", func() { It("should extract reasoning when only closing tag is present", func() { // GLM-4 style: reasoning content followed by closing tag without opening tag content := "This is reasoning contentthis is the actual response" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("This is reasoning content")) Expect(cleaned).To(Equal("this is the actual response")) }) It("should handle closing-only tag with multiline reasoning", func() { content := "1. First point\n2. Second point\n3. Third pointFinal answer" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) Expect(cleaned).To(Equal("Final answer")) }) It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() { content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.this is a test" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.")) Expect(cleaned).To(Equal("this is a test")) }) It("should handle closing-only thinking tag", func() { content := "Some reasoning hereactual content" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Some reasoning here")) Expect(cleaned).To(Equal("actual content")) }) It("should handle mismatched tags", func() { content := "Content" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) // Should extract unclosed thinking block Expect(reasoning).To(ContainSubstring("Content")) Expect(cleaned).To(Equal("")) @@ -307,7 +304,7 @@ var _ = Describe("ExtractReasoning", func() { It("should handle very long reasoning content", func() { longReasoning := strings.Repeat("This is reasoning content. ", 100) content := "Text " + longReasoning + " More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) // TrimSpace is applied, so we need to account for that Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning))) Expect(cleaned).To(Equal("Text More")) @@ -315,66 +312,64 @@ var _ = Describe("ExtractReasoning", func() { It("should handle reasoning with unicode characters", func() { content := "Text Reasoning with 中文 and emoji 🧠 More" - reasoning, cleaned := ExtractReasoning(content, defaultOpts) + reasoning, cleaned := Extract(content) Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠")) Expect(cleaned).To(Equal("Text More")) }) }) - Context("when ThinkingForcedOpen is true", func() { - forcedOpenOpts := ReasoningOptions{ThinkingForcedOpen: true} - + Context("with WithThinkingForcedOpen option", func() { It("should treat all content as reasoning until closing tag", func() { content := "This is reasoningthis is content" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("This is reasoning")) Expect(cleaned).To(Equal("this is content")) }) It("should treat all content as reasoning when no closing tag (streaming)", func() { content := "This is reasoning content still streaming" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("This is reasoning content still streaming")) Expect(cleaned).To(BeEmpty()) }) It("should handle GLM-4 style output", func() { content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.this is a test" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text.")) Expect(cleaned).To(Equal("this is a test")) }) It("should handle multiline reasoning with closing tag", func() { content := "1. First point\n2. Second point\n3. Third pointFinal answer" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) Expect(cleaned).To(Equal("Final answer")) }) It("should handle closing tag", func() { content := "Some reasoning hereactual content" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("Some reasoning here")) Expect(cleaned).To(Equal("actual content")) }) It("should handle additional reasoning blocks after initial forced open", func() { content := "Initial reasoningcontentmore reasoningfinal content" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning")) Expect(cleaned).To(Equal("contentfinal content")) }) It("should handle empty content", func() { - reasoning, cleaned := ExtractReasoning("", forcedOpenOpts) + reasoning, cleaned := Extract("", WithThinkingForcedOpen()) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(BeEmpty()) }) It("should handle only closing tag", func() { content := "only content" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(BeEmpty()) Expect(cleaned).To(Equal("only content")) }) @@ -382,7 +377,7 @@ var _ = Describe("ExtractReasoning", func() { It("should find earliest closing tag", func() { // comes before content := "Reasoningcontentmore" - reasoning, cleaned := ExtractReasoning(content, forcedOpenOpts) + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) Expect(reasoning).To(Equal("Reasoning")) Expect(cleaned).To(Equal("contentmore")) }) From 61a6e95f7d04fddb1b8412aa58a5c741acaea967 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 20 Jan 2026 12:02:35 +0100 Subject: [PATCH 3/3] Additional thinking tags Signed-off-by: Ettore Di Giacinto --- pkg/reasoning/reasoning.go | 32 +++++++-- pkg/reasoning/reasoning_test.go | 114 ++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 6 deletions(-) diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go index 6d85566cc6de..e07b5954d33b 100644 --- a/pkg/reasoning/reasoning.go +++ b/pkg/reasoning/reasoning.go @@ -4,16 +4,23 @@ import ( "strings" ) -// Common thinking/reasoning opening tags used by various models +// Common thinking/reasoning opening tags used by various models. +// These match the tags detected by llama.cpp in common/chat.cpp var thinkingOpenTags = []string{ + // DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE "\n", "", + // Generic thinking tags "\n", "", - "<|inner_prefix|>", // Apertus - "<|START_THINKING|>", // Command R7B - "", // Seed - "[THINK]\n", // Magistral + // Apertus + "<|inner_prefix|>", + // Command R7B + "<|START_THINKING|>", + // Seed + "", + // Magistral (not in llama.cpp but common) + "[THINK]\n", "[THINK]", } @@ -60,7 +67,15 @@ func Extract(content string, opts ...Option) (reasoning string, cleanedContent s // All content from the start is treated as reasoning until a closing tag is found. func extractForcedOpen(content string) (reasoning string, cleanedContent string) { // Look for the earliest closing tag - closingTags := []string{"", ""} + // These match the closing tags used by llama.cpp for various models + closingTags := []string{ + "", + "", + "<|END_THINKING|>", // Command R7B + "<|inner_suffix|>", // Apertus + "", // Seed + "[/THINK]", // Magistral + } earliestCloseIdx := -1 var matchedCloseTag string @@ -110,12 +125,17 @@ func extractFromTags(content string) (reasoning string, cleanedContent string) { remaining := content // Define tag pairs to look for + // These match the tags used by llama.cpp for various models tagPairs := []struct { start string end string }{ {"", ""}, {"", ""}, + {"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B + {"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus + {"", ""}, // Seed + {"[THINK]", "[/THINK]"}, // Magistral } // Track the last position we've processed diff --git a/pkg/reasoning/reasoning_test.go b/pkg/reasoning/reasoning_test.go index a22cb9e22f91..796f106d9c82 100644 --- a/pkg/reasoning/reasoning_test.go +++ b/pkg/reasoning/reasoning_test.go @@ -381,5 +381,119 @@ var _ = Describe("Extract", func() { Expect(reasoning).To(Equal("Reasoning")) Expect(cleaned).To(Equal("contentmore")) }) + + It("should handle Command R7B closing tag", func() { + content := "Reasoning content<|END_THINKING|>actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Apertus closing tag", func() { + content := "Reasoning content<|inner_suffix|>actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Seed closing tag", func() { + content := "Reasoning contentactual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Magistral closing tag", func() { + content := "Reasoning content[/THINK]actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + }) + + Context("with model-specific tag pairs", func() { + It("should extract Command R7B reasoning tags", func() { + content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Apertus reasoning tags", func() { + content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Seed reasoning tags", func() { + content := "Before reasoning here After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Magistral reasoning tags", func() { + content := "Before [THINK]reasoning here[/THINK] After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle unclosed Command R7B tag", func() { + content := "Before <|START_THINKING|>reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Apertus tag", func() { + content := "Before <|inner_prefix|>reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Seed tag", func() { + content := "Before reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Magistral tag", func() { + content := "Before [THINK]reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle closing-only Command R7B tag", func() { + content := "Reasoning content<|END_THINKING|>actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Apertus tag", func() { + content := "Reasoning content<|inner_suffix|>actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Seed tag", func() { + content := "Reasoning contentactual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Magistral tag", func() { + content := "Reasoning content[/THINK]actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) }) })