diff --git a/core/config/model_config.go b/core/config/model_config.go index 9010c84e60c3..794e9db56fef 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -10,6 +10,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/cogito" "gopkg.in/yaml.v3" ) @@ -51,6 +52,7 @@ type ModelConfig struct { ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"` FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"` + ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"` FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 4ece68d5c0a8..d4aaed20cc90 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -13,6 +13,7 @@ import ( "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/reasoning" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" @@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator lastEmittedReasoning := "" lastEmittedCleanedContent := "" + // Configure reasoning extraction options + // Auto-detect if prompt ends with thinking tag + // or use explicit config setting + thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s) + _, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool { accumulatedContent += s // Extract reasoning from accumulated content - currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent) + opts := []reasoning.Option{} + if thinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) + } + currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...) // Calculate new reasoning delta (what we haven't emitted yet) var reasoningDelta *string @@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator return err } // Extract reasoning before processing tool calls - reasoning, cleanedResult := functions.ExtractReasoning(result) + // Auto-detect if prompt ends with thinking tag or use explicit config + toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt) + opts := []reasoning.Option{} + if toolsThinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) + } + extractedReasoning, cleanedResult := reasoning.Extract(result, opts...) result = cleanedResult textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) @@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator } var deltaReasoning *string - if reasoning != "" { - deltaReasoning = &reasoning + if extractedReasoning != "" { + deltaReasoning = &extractedReasoning } delta := &schema.Message{Content: &result} if deltaReasoning != nil { @@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator // no streaming mode default: + // Auto-detect if prompt ends with thinking tag for non-streaming mode + nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput) + tokenCallback := func(s string, c *[]schema.Choice) { // Extract reasoning from the response - reasoning, cleanedS := functions.ExtractReasoning(s) - s = cleanedS + var extractedReasoning string + opts := []reasoning.Option{} + if nonStreamThinkingForcedOpen { + opts = append(opts, reasoning.WithThinkingForcedOpen()) + } + extractedReasoning, s = reasoning.Extract(s, opts...) if !shouldUseFn { // no function is called, just reply and use stop as finish reason stopReason := FinishReasonStop message := &schema.Message{Role: "assistant", Content: &s} - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message}) return @@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator stopReason := FinishReasonStop message := &schema.Message{Role: "assistant", Content: &result} - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{ FinishReason: &stopReason, @@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator Role: "assistant", }, } - if reasoning != "" { - toolChoice.Message.Reasoning = &reasoning + if extractedReasoning != "" { + toolChoice.Message.Reasoning = &extractedReasoning } for _, ss := range results { @@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator "arguments": args, }, } - if reasoning != "" { - message.Reasoning = &reasoning + if extractedReasoning != "" { + message.Reasoning = &extractedReasoning } *c = append(*c, schema.Choice{ FinishReason: &functionCallReason, diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go deleted file mode 100644 index d3cf05808893..000000000000 --- a/pkg/functions/reasoning.go +++ /dev/null @@ -1,114 +0,0 @@ -package functions - -import ( - "strings" -) - -// ExtractReasoning extracts reasoning content from thinking tags and returns -// both the extracted reasoning and the cleaned content (with tags removed). -// It handles ... and ... tags. -// Multiple reasoning blocks are concatenated with newlines. -func ExtractReasoning(content string) (reasoning string, cleanedContent string) { - if content == "" { - return "", content - } - - var reasoningParts []string - var cleanedParts []string - remaining := content - - // Define tag pairs to look for - tagPairs := []struct { - start string - end string - }{ - {"", ""}, - {"", ""}, - } - - // Track the last position we've processed - lastPos := 0 - - for { - // Find the earliest tag start - earliestStart := -1 - earliestEnd := -1 - isUnclosed := false - var matchedTag struct { - start string - end string - } - - for _, tagPair := range tagPairs { - startIdx := strings.Index(remaining[lastPos:], tagPair.start) - if startIdx == -1 { - continue - } - startIdx += lastPos - - // Find the corresponding end tag - endIdx := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) - if endIdx == -1 { - // Unclosed tag - extract what we have - if earliestStart == -1 || startIdx < earliestStart { - earliestStart = startIdx - earliestEnd = len(remaining) - isUnclosed = true - matchedTag = tagPair - } - continue - } - endIdx += startIdx + len(tagPair.start) - - // Found a complete tag pair - if earliestStart == -1 || startIdx < earliestStart { - earliestStart = startIdx - earliestEnd = endIdx + len(tagPair.end) - isUnclosed = false - matchedTag = tagPair - } - } - - if earliestStart == -1 { - // No more tags found, add remaining content - if lastPos < len(remaining) { - cleanedParts = append(cleanedParts, remaining[lastPos:]) - } - break - } - - // Add content before the tag - if earliestStart > lastPos { - cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart]) - } - - // Extract reasoning content - reasoningStart := earliestStart + len(matchedTag.start) - // For unclosed tags, earliestEnd is already at the end of the string - // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length - var reasoningEnd int - if isUnclosed { - // Unclosed tag - extract everything to the end - reasoningEnd = len(remaining) - } else { - // Closed tag - exclude the end tag - reasoningEnd = earliestEnd - len(matchedTag.end) - } - if reasoningEnd > reasoningStart { - reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd]) - if reasoningContent != "" { - reasoningParts = append(reasoningParts, reasoningContent) - } - } - - // Move past this tag - lastPos = earliestEnd - } - - // Combine reasoning parts - reasoning = strings.Join(reasoningParts, "\n\n") - // Combine cleaned content parts - cleanedContent = strings.Join(cleanedParts, "") - - return reasoning, cleanedContent -} diff --git a/pkg/functions/reasoning_test.go b/pkg/functions/reasoning_test.go deleted file mode 100644 index 3f7d0754195b..000000000000 --- a/pkg/functions/reasoning_test.go +++ /dev/null @@ -1,261 +0,0 @@ -package functions_test - -import ( - "strings" - - . "github.com/mudler/LocalAI/pkg/functions" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -var _ = Describe("ExtractReasoning", func() { - Context("when content has no reasoning tags", func() { - It("should return empty reasoning and original content", func() { - content := "This is regular content without any tags." - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal(content)) - }) - - It("should handle empty string", func() { - content := "" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(BeEmpty()) - }) - - It("should handle content with only whitespace", func() { - content := " \n\t " - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal(content)) - }) - }) - - Context("when content has tags", func() { - It("should extract reasoning from single thinking block", func() { - content := "Some text This is my reasoning More text" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("This is my reasoning")) - Expect(cleaned).To(Equal("Some text More text")) - }) - - It("should extract reasoning and preserve surrounding content", func() { - content := "Before Reasoning here After" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Reasoning here")) - Expect(cleaned).To(Equal("Before After")) - }) - - It("should handle thinking block at the start", func() { - content := "Start reasoning Regular content" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Start reasoning")) - Expect(cleaned).To(Equal(" Regular content")) - }) - - It("should handle thinking block at the end", func() { - content := "Regular content End reasoning" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("End reasoning")) - Expect(cleaned).To(Equal("Regular content ")) - }) - - It("should handle only thinking block", func() { - content := "Only reasoning" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Only reasoning")) - Expect(cleaned).To(BeEmpty()) - }) - - It("should trim whitespace from reasoning content", func() { - content := "Text \n Reasoning with spaces \n More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Reasoning with spaces")) - Expect(cleaned).To(Equal("Text More")) - }) - }) - - Context("when content has tags", func() { - It("should extract reasoning from redacted_reasoning block", func() { - content := "Text Redacted reasoning More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Redacted reasoning")) - Expect(cleaned).To(Equal("Text More")) - }) - - It("should handle redacted_reasoning with multiline content", func() { - content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) - Expect(cleaned).To(Equal("Before After")) - }) - - It("should handle redacted_reasoning with complex content", func() { - content := "Start Complex reasoning\nwith\nmultiple\nlines End" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines")) - Expect(cleaned).To(Equal("Start End")) - }) - }) - - Context("when content has multiple reasoning blocks", func() { - It("should concatenate multiple thinking blocks with newlines", func() { - content := "Text First Middle Second End" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("First\n\nSecond")) - Expect(cleaned).To(Equal("Text Middle End")) - }) - - It("should handle multiple different tag types", func() { - content := "A One B Two C Three D" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(ContainSubstring("One")) - Expect(reasoning).To(ContainSubstring("Two")) - Expect(reasoning).To(ContainSubstring("Three")) - Expect(cleaned).To(Equal("A B C D")) - }) - - It("should handle nested tags correctly (extracts first match)", func() { - content := "Text Outer Inner More" - reasoning, cleaned := ExtractReasoning(content) - // Should extract the outer thinking block - Expect(reasoning).To(ContainSubstring("Outer")) - Expect(reasoning).To(ContainSubstring("Inner")) - Expect(cleaned).To(Equal("Text More")) - }) - }) - - Context("when content has unclosed reasoning tags", func() { - It("should extract unclosed thinking block", func() { - content := "Text Unclosed reasoning" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Unclosed reasoning")) - Expect(cleaned).To(Equal("Text ")) - }) - - It("should extract unclosed think block", func() { - content := "Before Incomplete" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Incomplete")) - Expect(cleaned).To(Equal("Before ")) - }) - - It("should extract unclosed redacted_reasoning block", func() { - content := "Start Partial reasoning content" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Partial reasoning content")) - Expect(cleaned).To(Equal("Start ")) - }) - - It("should handle unclosed tag at the end", func() { - content := "Regular content Unclosed at end" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Unclosed at end")) - Expect(cleaned).To(Equal("Regular content ")) - }) - }) - - Context("when content has empty reasoning blocks", func() { - It("should ignore empty thinking block", func() { - content := "Text More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal("Text More")) - }) - - It("should ignore thinking block with only whitespace", func() { - content := "Text \n\t More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal("Text More")) - }) - }) - - Context("when content has reasoning tags with special characters", func() { - It("should handle reasoning with newlines", func() { - content := "Before Line 1\nLine 2\nLine 3 After" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) - Expect(cleaned).To(Equal("Before After")) - }) - - It("should handle reasoning with code blocks", func() { - content := "Text Reasoning with ```code``` blocks More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Reasoning with ```code``` blocks")) - Expect(cleaned).To(Equal("Text More")) - }) - - It("should handle reasoning with JSON", func() { - content := "Before {\"key\": \"value\"} After" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("{\"key\": \"value\"}")) - Expect(cleaned).To(Equal("Before After")) - }) - - It("should handle reasoning with HTML-like content", func() { - content := "Text Reasoning with inside More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Reasoning with inside")) - Expect(cleaned).To(Equal("Text More")) - }) - }) - - Context("when content has reasoning mixed with regular content", func() { - It("should preserve content order correctly", func() { - content := "Start Reasoning Middle More reasoning End" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(ContainSubstring("Reasoning")) - Expect(reasoning).To(ContainSubstring("More reasoning")) - Expect(cleaned).To(Equal("Start Middle End")) - }) - - It("should handle reasoning in the middle of a sentence", func() { - content := "This is a reasoning sentence." - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("reasoning")) - Expect(cleaned).To(Equal("This is a sentence.")) - }) - }) - - Context("edge cases", func() { - It("should handle content with only opening tag", func() { - content := "" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal("")) - }) - - It("should handle content with only closing tag", func() { - content := "" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(BeEmpty()) - Expect(cleaned).To(Equal("")) - }) - - It("should handle mismatched tags", func() { - content := "Content" - reasoning, cleaned := ExtractReasoning(content) - // Should extract unclosed thinking block - Expect(reasoning).To(ContainSubstring("Content")) - Expect(cleaned).To(Equal("")) - }) - - It("should handle very long reasoning content", func() { - longReasoning := strings.Repeat("This is reasoning content. ", 100) - content := "Text " + longReasoning + " More" - reasoning, cleaned := ExtractReasoning(content) - // TrimSpace is applied, so we need to account for that - Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning))) - Expect(cleaned).To(Equal("Text More")) - }) - - It("should handle reasoning with unicode characters", func() { - content := "Text Reasoning with 中文 and emoji 🧠 More" - reasoning, cleaned := ExtractReasoning(content) - Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠")) - Expect(cleaned).To(Equal("Text More")) - }) - }) -}) diff --git a/pkg/reasoning/config.go b/pkg/reasoning/config.go new file mode 100644 index 000000000000..d8edb9d750c8 --- /dev/null +++ b/pkg/reasoning/config.go @@ -0,0 +1,8 @@ +package reasoning + +type ReasoningConfig struct { + // ThinkingForcedOpen indicates that the model outputs reasoning without an opening tag. + // When true, all content from the start is treated as reasoning until a closing tag is found. + // This is useful for models like GLM-4 that output reasoning without but end with . + ThinkingForcedOpen bool `yaml:"thinking_forced_open,omitempty" json:"thinking_forced_open,omitempty"` +} diff --git a/pkg/reasoning/options.go b/pkg/reasoning/options.go new file mode 100644 index 000000000000..146f2618f41e --- /dev/null +++ b/pkg/reasoning/options.go @@ -0,0 +1,18 @@ +package reasoning + +// options holds the configuration for reasoning extraction +type options struct { + thinkingForcedOpen bool +} + +// Option is a functional option for configuring reasoning extraction +type Option func(*options) + +// WithThinkingForcedOpen configures the extractor to treat all content from the start +// as reasoning until a closing tag is found. This is useful for models like GLM-4 +// that output reasoning without but end with . +func WithThinkingForcedOpen() Option { + return func(o *options) { + o.thinkingForcedOpen = true + } +} diff --git a/pkg/reasoning/reasoning.go b/pkg/reasoning/reasoning.go new file mode 100644 index 000000000000..e07b5954d33b --- /dev/null +++ b/pkg/reasoning/reasoning.go @@ -0,0 +1,256 @@ +package reasoning + +import ( + "strings" +) + +// Common thinking/reasoning opening tags used by various models. +// These match the tags detected by llama.cpp in common/chat.cpp +var thinkingOpenTags = []string{ + // DeepSeek R1, V3.1, Nemotron V2, MiniMax M2, Hermes 2 Pro, Granite, Exaone MOE + "\n", + "", + // Generic thinking tags + "\n", + "", + // Apertus + "<|inner_prefix|>", + // Command R7B + "<|START_THINKING|>", + // Seed + "", + // Magistral (not in llama.cpp but common) + "[THINK]\n", + "[THINK]", +} + +// DetectThinkingForcedOpen checks if a prompt ends with a thinking opening tag. +// This is used to automatically detect when the model template has already added +// the opening thinking tag, meaning the model will output reasoning content directly. +// Returns true if the prompt ends with a known thinking opening tag. +func DetectThinkingForcedOpen(prompt string) bool { + for _, tag := range thinkingOpenTags { + if strings.HasSuffix(prompt, tag) { + return true + } + } + return false +} + +// Extract extracts reasoning content from thinking tags and returns +// both the extracted reasoning and the cleaned content (with tags removed). +// It handles ... and ... tags. +// Multiple reasoning blocks are concatenated with newlines. +// It also handles the case where only a closing tag is present (no opening tag), +// in which case everything before the closing tag is treated as reasoning. +// +// Use WithThinkingForcedOpen() option when all content from the start should be +// treated as reasoning until a closing tag is found. +func Extract(content string, opts ...Option) (reasoning string, cleanedContent string) { + if content == "" { + return "", content + } + + cfg := &options{} + for _, opt := range opts { + opt(cfg) + } + + if cfg.thinkingForcedOpen { + return extractForcedOpen(content) + } + + return extractFromTags(content) +} + +// extractForcedOpen handles the case where reasoning starts without an opening tag. +// All content from the start is treated as reasoning until a closing tag is found. +func extractForcedOpen(content string) (reasoning string, cleanedContent string) { + // Look for the earliest closing tag + // These match the closing tags used by llama.cpp for various models + closingTags := []string{ + "", + "", + "<|END_THINKING|>", // Command R7B + "<|inner_suffix|>", // Apertus + "", // Seed + "[/THINK]", // Magistral + } + + earliestCloseIdx := -1 + var matchedCloseTag string + + for _, closeTag := range closingTags { + idx := strings.Index(content, closeTag) + if idx != -1 && (earliestCloseIdx == -1 || idx < earliestCloseIdx) { + earliestCloseIdx = idx + matchedCloseTag = closeTag + } + } + + if earliestCloseIdx == -1 { + // No closing tag found - all content is reasoning (still streaming) + return strings.TrimSpace(content), "" + } + + // Found closing tag - everything before is reasoning, everything after is content + reasoning = strings.TrimSpace(content[:earliestCloseIdx]) + cleanedContent = content[earliestCloseIdx+len(matchedCloseTag):] + + // Continue processing the rest for any additional reasoning blocks + if cleanedContent != "" { + additionalReasoning, finalContent := extractFromTags(cleanedContent) + if additionalReasoning != "" { + if reasoning != "" { + reasoning = reasoning + "\n\n" + additionalReasoning + } else { + reasoning = additionalReasoning + } + } + cleanedContent = finalContent + } + + return reasoning, cleanedContent +} + +// extractFromTags extracts reasoning content from thinking tags. +// This is the core implementation that handles standard tag-based extraction. +func extractFromTags(content string) (reasoning string, cleanedContent string) { + if content == "" { + return "", content + } + + var reasoningParts []string + var cleanedParts []string + remaining := content + + // Define tag pairs to look for + // These match the tags used by llama.cpp for various models + tagPairs := []struct { + start string + end string + }{ + {"", ""}, + {"", ""}, + {"<|START_THINKING|>", "<|END_THINKING|>"}, // Command R7B + {"<|inner_prefix|>", "<|inner_suffix|>"}, // Apertus + {"", ""}, // Seed + {"[THINK]", "[/THINK]"}, // Magistral + } + + // Track the last position we've processed + lastPos := 0 + + for { + // Find the earliest tag start + earliestStart := -1 + earliestEnd := -1 + isUnclosed := false + isClosingOnly := false + var matchedTag struct { + start string + end string + } + + for _, tagPair := range tagPairs { + startIdx := strings.Index(remaining[lastPos:], tagPair.start) + endIdx := strings.Index(remaining[lastPos:], tagPair.end) + + // Check for closing-only tag (closing tag appears before or without opening tag) + if endIdx != -1 && (startIdx == -1 || endIdx < startIdx) { + // Found a closing tag without a preceding opening tag + closingTagPos := endIdx + lastPos + if earliestStart == -1 || closingTagPos < earliestStart || (isClosingOnly && closingTagPos < earliestEnd) { + earliestStart = lastPos + earliestEnd = closingTagPos + len(tagPair.end) + isClosingOnly = true + isUnclosed = false + matchedTag = tagPair + } + continue + } + + if startIdx == -1 { + continue + } + startIdx += lastPos + + // Find the corresponding end tag after the start tag + endIdxAfterStart := strings.Index(remaining[startIdx+len(tagPair.start):], tagPair.end) + if endIdxAfterStart == -1 { + // Unclosed tag - extract what we have + if earliestStart == -1 || startIdx < earliestStart { + earliestStart = startIdx + earliestEnd = len(remaining) + isUnclosed = true + isClosingOnly = false + matchedTag = tagPair + } + continue + } + endIdxAfterStart += startIdx + len(tagPair.start) + + // Found a complete tag pair + if earliestStart == -1 || startIdx < earliestStart { + earliestStart = startIdx + earliestEnd = endIdxAfterStart + len(tagPair.end) + isUnclosed = false + isClosingOnly = false + matchedTag = tagPair + } + } + + if earliestStart == -1 { + // No more tags found, add remaining content + if lastPos < len(remaining) { + cleanedParts = append(cleanedParts, remaining[lastPos:]) + } + break + } + + if isClosingOnly { + // Closing tag without opening tag - content before closing tag is reasoning + reasoningContent := strings.TrimSpace(remaining[lastPos : earliestEnd-len(matchedTag.end)]) + if reasoningContent != "" { + reasoningParts = append(reasoningParts, reasoningContent) + } + // Move past the closing tag + lastPos = earliestEnd + continue + } + + // Add content before the tag + if earliestStart > lastPos { + cleanedParts = append(cleanedParts, remaining[lastPos:earliestStart]) + } + + // Extract reasoning content + reasoningStart := earliestStart + len(matchedTag.start) + // For unclosed tags, earliestEnd is already at the end of the string + // For closed tags, earliestEnd points to after the closing tag, so we subtract the end tag length + var reasoningEnd int + if isUnclosed { + // Unclosed tag - extract everything to the end + reasoningEnd = len(remaining) + } else { + // Closed tag - exclude the end tag + reasoningEnd = earliestEnd - len(matchedTag.end) + } + if reasoningEnd > reasoningStart { + reasoningContent := strings.TrimSpace(remaining[reasoningStart:reasoningEnd]) + if reasoningContent != "" { + reasoningParts = append(reasoningParts, reasoningContent) + } + } + + // Move past this tag + lastPos = earliestEnd + } + + // Combine reasoning parts + reasoning = strings.Join(reasoningParts, "\n\n") + // Combine cleaned content parts + cleanedContent = strings.Join(cleanedParts, "") + + return reasoning, cleanedContent +} diff --git a/pkg/reasoning/reasoning_suite_test.go b/pkg/reasoning/reasoning_suite_test.go new file mode 100644 index 000000000000..bfd983c33c6e --- /dev/null +++ b/pkg/reasoning/reasoning_suite_test.go @@ -0,0 +1,13 @@ +package reasoning_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestReasoning(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Reasoning Suite") +} diff --git a/pkg/reasoning/reasoning_test.go b/pkg/reasoning/reasoning_test.go new file mode 100644 index 000000000000..796f106d9c82 --- /dev/null +++ b/pkg/reasoning/reasoning_test.go @@ -0,0 +1,499 @@ +package reasoning_test + +import ( + "strings" + + . "github.com/mudler/LocalAI/pkg/reasoning" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("DetectThinkingForcedOpen", func() { + It("should detect at end of prompt", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt\n")).To(BeTrue()) + }) + + It("should detect at end of prompt", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt\n")).To(BeTrue()) + }) + + It("should detect model-specific tags", func() { + Expect(DetectThinkingForcedOpen("Some prompt<|inner_prefix|>")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt<|START_THINKING|>")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt[THINK]")).To(BeTrue()) + Expect(DetectThinkingForcedOpen("Some prompt[THINK]\n")).To(BeTrue()) + }) + + It("should not detect if tag is in the middle", func() { + Expect(DetectThinkingForcedOpen("Some prompt")).To(BeFalse()) + Expect(DetectThinkingForcedOpen("reasoning")).To(BeFalse()) + }) + + It("should not detect if no thinking tag", func() { + Expect(DetectThinkingForcedOpen("Some regular prompt")).To(BeFalse()) + Expect(DetectThinkingForcedOpen("")).To(BeFalse()) + }) +}) + +var _ = Describe("Extract", func() { + Context("when content has no reasoning tags", func() { + It("should return empty reasoning and original content", func() { + content := "This is regular content without any tags." + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal(content)) + }) + + It("should handle empty string", func() { + content := "" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(BeEmpty()) + }) + + It("should handle content with only whitespace", func() { + content := " \n\t " + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal(content)) + }) + }) + + Context("when content has tags", func() { + It("should extract reasoning from single thinking block", func() { + content := "Some text This is my reasoning More text" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("This is my reasoning")) + Expect(cleaned).To(Equal("Some text More text")) + }) + + It("should extract reasoning and preserve surrounding content", func() { + content := "Before Reasoning here After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle thinking block at the start", func() { + content := "Start reasoning Regular content" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Start reasoning")) + Expect(cleaned).To(Equal(" Regular content")) + }) + + It("should handle thinking block at the end", func() { + content := "Regular content End reasoning" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("End reasoning")) + Expect(cleaned).To(Equal("Regular content ")) + }) + + It("should handle only thinking block", func() { + content := "Only reasoning" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Only reasoning")) + Expect(cleaned).To(BeEmpty()) + }) + + It("should trim whitespace from reasoning content", func() { + content := "Text \n Reasoning with spaces \n More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning with spaces")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has tags", func() { + It("should extract reasoning from redacted_reasoning block", func() { + content := "Text Redacted reasoning More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Redacted reasoning")) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should handle redacted_reasoning with multiline content", func() { + content := "Before Line 1\nLine 2\nLine 3 After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle redacted_reasoning with complex content", func() { + content := "Start Complex reasoning\nwith\nmultiple\nlines End" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Complex reasoning\nwith\nmultiple\nlines")) + Expect(cleaned).To(Equal("Start End")) + }) + }) + + Context("when content has multiple reasoning blocks", func() { + It("should concatenate multiple thinking blocks with newlines", func() { + content := "Text First Middle Second End" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("First\n\nSecond")) + Expect(cleaned).To(Equal("Text Middle End")) + }) + + It("should handle multiple different tag types", func() { + content := "A One B Two C Three D" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(ContainSubstring("One")) + Expect(reasoning).To(ContainSubstring("Two")) + Expect(reasoning).To(ContainSubstring("Three")) + Expect(cleaned).To(Equal("A B C D")) + }) + + It("should handle nested tags correctly (extracts first match)", func() { + content := "Text Outer Inner More" + reasoning, cleaned := Extract(content) + // Should extract the outer thinking block + Expect(reasoning).To(ContainSubstring("Outer")) + Expect(reasoning).To(ContainSubstring("Inner")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has unclosed reasoning tags", func() { + It("should extract unclosed thinking block", func() { + content := "Text Unclosed reasoning" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Unclosed reasoning")) + Expect(cleaned).To(Equal("Text ")) + }) + + It("should extract unclosed think block", func() { + content := "Before Incomplete" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Incomplete")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should extract unclosed redacted_reasoning block", func() { + content := "Start Partial reasoning content" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Partial reasoning content")) + Expect(cleaned).To(Equal("Start ")) + }) + + It("should handle unclosed tag at the end", func() { + content := "Regular content Unclosed at end" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Unclosed at end")) + Expect(cleaned).To(Equal("Regular content ")) + }) + }) + + Context("when content has empty reasoning blocks", func() { + It("should ignore empty thinking block", func() { + content := "Text More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should ignore thinking block with only whitespace", func() { + content := "Text \n\t More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has reasoning tags with special characters", func() { + It("should handle reasoning with newlines", func() { + content := "Before Line 1\nLine 2\nLine 3 After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Line 1\nLine 2\nLine 3")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle reasoning with code blocks", func() { + content := "Text Reasoning with ```code``` blocks More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning with ```code``` blocks")) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should handle reasoning with JSON", func() { + content := "Before {\"key\": \"value\"} After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("{\"key\": \"value\"}")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle reasoning with HTML-like content", func() { + content := "Text Reasoning with inside More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning with inside")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("when content has reasoning mixed with regular content", func() { + It("should preserve content order correctly", func() { + content := "Start Reasoning Middle More reasoning End" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(ContainSubstring("Reasoning")) + Expect(reasoning).To(ContainSubstring("More reasoning")) + Expect(cleaned).To(Equal("Start Middle End")) + }) + + It("should handle reasoning in the middle of a sentence", func() { + content := "This is a reasoning sentence." + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning")) + Expect(cleaned).To(Equal("This is a sentence.")) + }) + }) + + Context("edge cases without WithThinkingForcedOpen", func() { + It("should handle content with only opening tag", func() { + content := "" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal("")) + }) + + It("should handle content with only closing tag (no content before)", func() { + content := "" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(BeEmpty()) + }) + + It("should extract reasoning when only closing tag is present", func() { + // GLM-4 style: reasoning content followed by closing tag without opening tag + content := "This is reasoning contentthis is the actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("This is reasoning content")) + Expect(cleaned).To(Equal("this is the actual response")) + }) + + It("should handle closing-only tag with multiline reasoning", func() { + content := "1. First point\n2. Second point\n3. Third pointFinal answer" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) + Expect(cleaned).To(Equal("Final answer")) + }) + + It("should handle closing-only tag with complex reasoning (GLM-4 example)", func() { + content := "**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.this is a test" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("**Analyze the user's input:** The user says something.\n\n**Final Decision:** Output the text.")) + Expect(cleaned).To(Equal("this is a test")) + }) + + It("should handle closing-only thinking tag", func() { + content := "Some reasoning hereactual content" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Some reasoning here")) + Expect(cleaned).To(Equal("actual content")) + }) + + It("should handle mismatched tags", func() { + content := "Content" + reasoning, cleaned := Extract(content) + // Should extract unclosed thinking block + Expect(reasoning).To(ContainSubstring("Content")) + Expect(cleaned).To(Equal("")) + }) + + It("should handle very long reasoning content", func() { + longReasoning := strings.Repeat("This is reasoning content. ", 100) + content := "Text " + longReasoning + " More" + reasoning, cleaned := Extract(content) + // TrimSpace is applied, so we need to account for that + Expect(reasoning).To(Equal(strings.TrimSpace(longReasoning))) + Expect(cleaned).To(Equal("Text More")) + }) + + It("should handle reasoning with unicode characters", func() { + content := "Text Reasoning with 中文 and emoji 🧠 More" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning with 中文 and emoji 🧠")) + Expect(cleaned).To(Equal("Text More")) + }) + }) + + Context("with WithThinkingForcedOpen option", func() { + It("should treat all content as reasoning until closing tag", func() { + content := "This is reasoningthis is content" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("This is reasoning")) + Expect(cleaned).To(Equal("this is content")) + }) + + It("should treat all content as reasoning when no closing tag (streaming)", func() { + content := "This is reasoning content still streaming" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("This is reasoning content still streaming")) + Expect(cleaned).To(BeEmpty()) + }) + + It("should handle GLM-4 style output", func() { + content := "**Analyze:** The user says something.\n\n**Final Decision:** Output the text.this is a test" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("**Analyze:** The user says something.\n\n**Final Decision:** Output the text.")) + Expect(cleaned).To(Equal("this is a test")) + }) + + It("should handle multiline reasoning with closing tag", func() { + content := "1. First point\n2. Second point\n3. Third pointFinal answer" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("1. First point\n2. Second point\n3. Third point")) + Expect(cleaned).To(Equal("Final answer")) + }) + + It("should handle closing tag", func() { + content := "Some reasoning hereactual content" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Some reasoning here")) + Expect(cleaned).To(Equal("actual content")) + }) + + It("should handle additional reasoning blocks after initial forced open", func() { + content := "Initial reasoningcontentmore reasoningfinal content" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Initial reasoning\n\nmore reasoning")) + Expect(cleaned).To(Equal("contentfinal content")) + }) + + It("should handle empty content", func() { + reasoning, cleaned := Extract("", WithThinkingForcedOpen()) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(BeEmpty()) + }) + + It("should handle only closing tag", func() { + content := "only content" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(BeEmpty()) + Expect(cleaned).To(Equal("only content")) + }) + + It("should find earliest closing tag", func() { + // comes before + content := "Reasoningcontentmore" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning")) + Expect(cleaned).To(Equal("contentmore")) + }) + + It("should handle Command R7B closing tag", func() { + content := "Reasoning content<|END_THINKING|>actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Apertus closing tag", func() { + content := "Reasoning content<|inner_suffix|>actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Seed closing tag", func() { + content := "Reasoning contentactual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle Magistral closing tag", func() { + content := "Reasoning content[/THINK]actual response" + reasoning, cleaned := Extract(content, WithThinkingForcedOpen()) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + }) + + Context("with model-specific tag pairs", func() { + It("should extract Command R7B reasoning tags", func() { + content := "Before <|START_THINKING|>reasoning here<|END_THINKING|> After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Apertus reasoning tags", func() { + content := "Before <|inner_prefix|>reasoning here<|inner_suffix|> After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Seed reasoning tags", func() { + content := "Before reasoning here After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should extract Magistral reasoning tags", func() { + content := "Before [THINK]reasoning here[/THINK] After" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning here")) + Expect(cleaned).To(Equal("Before After")) + }) + + It("should handle unclosed Command R7B tag", func() { + content := "Before <|START_THINKING|>reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Apertus tag", func() { + content := "Before <|inner_prefix|>reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Seed tag", func() { + content := "Before reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle unclosed Magistral tag", func() { + content := "Before [THINK]reasoning still streaming" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("reasoning still streaming")) + Expect(cleaned).To(Equal("Before ")) + }) + + It("should handle closing-only Command R7B tag", func() { + content := "Reasoning content<|END_THINKING|>actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Apertus tag", func() { + content := "Reasoning content<|inner_suffix|>actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Seed tag", func() { + content := "Reasoning contentactual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + + It("should handle closing-only Magistral tag", func() { + content := "Reasoning content[/THINK]actual response" + reasoning, cleaned := Extract(content) + Expect(reasoning).To(Equal("Reasoning content")) + Expect(cleaned).To(Equal("actual response")) + }) + }) +})