test-org-codity · chay2199 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/core/config/model_config.go b/core/config/model_config.go
@@ -10,6 +10,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/reasoning"
 	"github.com/mudler/cogito"
 	"gopkg.in/yaml.v3"
 )
@@ -51,6 +52,7 @@ type ModelConfig struct {
 	ResponseFormatMap                          map[string]interface{} `yaml:"-" json:"-"`
 
 	FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
+	ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
 
 	FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
@@ -13,6 +13,7 @@ import (
 	"github.com/mudler/LocalAI/core/http/middleware"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/reasoning"
 
 	"github.com/mudler/LocalAI/core/templates"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		lastEmittedReasoning := ""
 		lastEmittedCleanedContent := ""
 
+		// Configure reasoning extraction options
+		// Auto-detect if prompt ends with thinking tag
+		// or use explicit config setting
+		thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)
+
 		_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
 			accumulatedContent += s
 			// Extract reasoning from accumulated content
-			currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
+			opts := []reasoning.Option{}
+			if thinkingForcedOpen {
+				opts = append(opts, reasoning.WithThinkingForcedOpen())
+			}
+			currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)
 
 			// Calculate new reasoning delta (what we haven't emitted yet)
 			var reasoningDelta *string
@@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			return err
 		}
 		// Extract reasoning before processing tool calls
-		reasoning, cleanedResult := functions.ExtractReasoning(result)
+		// Auto-detect if prompt ends with thinking tag or use explicit config
+		toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
+		opts := []reasoning.Option{}
+		if toolsThinkingForcedOpen {
+			opts = append(opts, reasoning.WithThinkingForcedOpen())
+		}
+		extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
 		result = cleanedResult
 
 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
@@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 			}
 
 			var deltaReasoning *string
-			if reasoning != "" {
-				deltaReasoning = &reasoning
+			if extractedReasoning != "" {
+				deltaReasoning = &extractedReasoning
 			}
 			delta := &schema.Message{Content: &result}
 			if deltaReasoning != nil {
@@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 		// no streaming mode
 		default:
 
+			// Auto-detect if prompt ends with thinking tag for non-streaming mode
+			nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)
+
 			tokenCallback := func(s string, c *[]schema.Choice) {
 				// Extract reasoning from the response
-				reasoning, cleanedS := functions.ExtractReasoning(s)
-				s = cleanedS
+				var extractedReasoning string
+				opts := []reasoning.Option{}
+				if nonStreamThinkingForcedOpen {
+					opts = append(opts, reasoning.WithThinkingForcedOpen())
+				}
+				extractedReasoning, s = reasoning.Extract(s, opts...)
 
 				if !shouldUseFn {
 					// no function is called, just reply and use stop as finish reason
 					stopReason := FinishReasonStop
 					message := &schema.Message{Role: "assistant", Content: &s}
-					if reasoning != "" {
-						message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						message.Reasoning = &extractedReasoning
 					}
 					*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
 					return
@@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 
 					stopReason := FinishReasonStop
 					message := &schema.Message{Role: "assistant", Content: &result}
-					if reasoning != "" {
-						message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						message.Reasoning = &extractedReasoning
 					}
 					*c = append(*c, schema.Choice{
 						FinishReason: &stopReason,
@@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 							Role: "assistant",
 						},
 					}
-					if reasoning != "" {
-						toolChoice.Message.Reasoning = &reasoning
+					if extractedReasoning != "" {
+						toolChoice.Message.Reasoning = &extractedReasoning
 					}
 
 					for _, ss := range results {
@@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 									"arguments": args,
 								},
 							}
-							if reasoning != "" {
-								message.Reasoning = &reasoning
+							if extractedReasoning != "" {
+								message.Reasoning = &extractedReasoning
 							}
 							*c = append(*c, schema.Choice{
 								FinishReason: &functionCallReason,

diff --git a/pkg/functions/reasoning.go b/pkg/functions/reasoning.go