diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index ac932c0b..d4b0afcb 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -54,13 +54,14 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("openai") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream) - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + modelOverride := e.resolveUpstreamModel(req.Model, auth) + if modelOverride != "" { translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel != "" { + if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } translated = normalizeThinkingConfig(translated, upstreamModel) @@ -148,13 +149,14 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy from := opts.SourceFormat to := sdktranslator.FromString("openai") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + modelOverride := e.resolveUpstreamModel(req.Model, auth) + if modelOverride != "" { translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel != "" { + if upstreamModel != "" && modelOverride == "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } translated = normalizeThinkingConfig(translated, upstreamModel) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9bc82f1f..3d19c2a8 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -52,10 +52,14 @@ func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model if len(metadata) == 0 { return payload } - if !util.ModelSupportsThinking(model) { + if field == "" { return payload } - if field == "" { + baseModel := util.ResolveOriginalModel(model, metadata) + if baseModel == "" { + baseModel = model + } + if !util.ModelSupportsThinking(baseModel) && !util.IsOpenAICompatibilityModel(baseModel) { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { @@ -226,6 +230,9 @@ func normalizeThinkingConfig(payload []byte, model string) []byte { } if !util.ModelSupportsThinking(model) { + if util.IsOpenAICompatibilityModel(model) { + return payload + } return stripThinkingFields(payload) } diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 9671f20b..793134fc 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -25,33 +25,33 @@ func ModelSupportsThinking(model string) bool { // or min (0 if zero is allowed and mid <= 0). func NormalizeThinkingBudget(model string, budget int) int { if budget == -1 { // dynamic - if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { + if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { if dynamicAllowed { return -1 } - mid := (min + max) / 2 + mid := (minBudget + maxBudget) / 2 if mid <= 0 && zeroAllowed { return 0 } if mid <= 0 { - return min + return minBudget } return mid } return -1 } - if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found { + if found, minBudget, maxBudget, zeroAllowed, _ := thinkingRangeFromRegistry(model); found { if budget == 0 { if zeroAllowed { return 0 } - return min + return minBudget } - if budget < min { - return min + if budget < minBudget { + return minBudget } - if budget > max { - return max + if budget > maxBudget { + return maxBudget } return budget } @@ -105,3 +105,16 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { } return "", false } + +// IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model. +// These models may not advertise Thinking metadata in the registry. +func IsOpenAICompatibilityModel(model string) bool { + if model == "" { + return false + } + info := registry.GetGlobalRegistry().GetModelInfo(model) + if info == nil { + return false + } + return strings.EqualFold(strings.TrimSpace(info.Type), "openai-compatibility") +}