Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/config/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"
"github.com/mudler/cogito"
"gopkg.in/yaml.v3"
)
Expand Down Expand Up @@ -51,6 +52,7 @@ type ModelConfig struct {
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`

FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
ReasoningConfig reasoning.ReasoningConfig `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`

FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
Expand Down
51 changes: 37 additions & 14 deletions core/http/endpoints/openai/chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/reasoning"

"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
Expand Down Expand Up @@ -43,10 +44,19 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
lastEmittedReasoning := ""
lastEmittedCleanedContent := ""

// Configure reasoning extraction options
// Auto-detect if prompt ends with thinking tag
// or use explicit config setting
thinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(s)

_, _, err := ComputeChoices(req, s, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
accumulatedContent += s
// Extract reasoning from accumulated content
currentReasoning, cleanedContent := functions.ExtractReasoning(accumulatedContent)
opts := []reasoning.Option{}
if thinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
currentReasoning, cleanedContent := reasoning.Extract(accumulatedContent, opts...)

// Calculate new reasoning delta (what we haven't emitted yet)
var reasoningDelta *string
Expand Down Expand Up @@ -230,7 +240,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return err
}
// Extract reasoning before processing tool calls
reasoning, cleanedResult := functions.ExtractReasoning(result)
// Auto-detect if prompt ends with thinking tag or use explicit config
toolsThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(prompt)
opts := []reasoning.Option{}
if toolsThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, cleanedResult := reasoning.Extract(result, opts...)
result = cleanedResult

textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
Expand Down Expand Up @@ -266,8 +282,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}

var deltaReasoning *string
if reasoning != "" {
deltaReasoning = &reasoning
if extractedReasoning != "" {
deltaReasoning = &extractedReasoning
}
delta := &schema.Message{Content: &result}
if deltaReasoning != nil {
Expand Down Expand Up @@ -618,17 +634,24 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
// no streaming mode
default:

// Auto-detect if prompt ends with thinking tag for non-streaming mode
nonStreamThinkingForcedOpen := config.ReasoningConfig.ThinkingForcedOpen || reasoning.DetectThinkingForcedOpen(predInput)

tokenCallback := func(s string, c *[]schema.Choice) {
// Extract reasoning from the response
reasoning, cleanedS := functions.ExtractReasoning(s)
s = cleanedS
var extractedReasoning string
opts := []reasoning.Option{}
if nonStreamThinkingForcedOpen {
opts = append(opts, reasoning.WithThinkingForcedOpen())
}
extractedReasoning, s = reasoning.Extract(s, opts...)

if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &s}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: message})
return
Expand All @@ -650,8 +673,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator

stopReason := FinishReasonStop
message := &schema.Message{Role: "assistant", Content: &result}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &stopReason,
Expand All @@ -664,8 +687,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
Role: "assistant",
},
}
if reasoning != "" {
toolChoice.Message.Reasoning = &reasoning
if extractedReasoning != "" {
toolChoice.Message.Reasoning = &extractedReasoning
}

for _, ss := range results {
Expand Down Expand Up @@ -695,8 +718,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
"arguments": args,
},
}
if reasoning != "" {
message.Reasoning = &reasoning
if extractedReasoning != "" {
message.Reasoning = &extractedReasoning
}
*c = append(*c, schema.Choice{
FinishReason: &functionCallReason,
Expand Down
114 changes: 0 additions & 114 deletions pkg/functions/reasoning.go

This file was deleted.

Loading