From cf9daf470ca7698c08970f6cb22aede8af2677c5 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:23:44 +0800 Subject: [PATCH 1/6] feat(translator): report cached token usage in Claude output --- .../codex/claude/codex_claude_response.go | 39 +++++++++++--- .../openai/claude/openai_claude_response.go | 53 +++++++++++++------ 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go index c700ef84..5223cd94 100644 --- a/internal/translator/codex/claude/codex_claude_response.go +++ b/internal/translator/codex/claude/codex_claude_response.go @@ -117,8 +117,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else { template, _ = sjson.Set(template, "delta.stop_reason", "end_turn") } - template, _ = sjson.Set(template, "usage.input_tokens", rootResult.Get("response.usage.input_tokens").Int()) - template, _ = sjson.Set(template, "usage.output_tokens", rootResult.Get("response.usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(rootResult.Get("response.usage")) + template, _ = sjson.Set(template, "usage.input_tokens", inputTokens) + template, _ = sjson.Set(template, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + template, _ = sjson.Set(template, "usage.cache_read_input_tokens", cachedTokens) + } output = "event: message_delta\n" output += fmt.Sprintf("data: %s\n\n", template) @@ -204,8 +208,12 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}` out, _ = sjson.Set(out, "id", responseData.Get("id").String()) out, _ = sjson.Set(out, "model", responseData.Get("model").String()) - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractResponsesUsage(responseData.Get("usage")) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } hasToolCall := false @@ -308,12 +316,27 @@ func ConvertCodexResponseToClaudeNonStream(_ context.Context, _ string, original out, _ = sjson.SetRaw(out, "stop_sequence", stopSequence.Raw) } - if responseData.Get("usage.input_tokens").Exists() || responseData.Get("usage.output_tokens").Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", responseData.Get("usage.input_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", responseData.Get("usage.output_tokens").Int()) + return out +} + +func extractResponsesUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 } - return out + inputTokens := usage.Get("input_tokens").Int() + outputTokens := usage.Get("output_tokens").Int() + cachedTokens := usage.Get("input_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens } // buildReverseMapFromClaudeOriginalShortToOriginal builds a map[short]original from original Claude request tools. diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go index 1629545d..b6e0d005 100644 --- a/internal/translator/openai/claude/openai_claude_response.go +++ b/internal/translator/openai/claude/openai_claude_response.go @@ -289,21 +289,17 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI // Only process if usage has actual values (not null) if param.FinishReason != "" { usage := root.Get("usage") - var inputTokens, outputTokens int64 + var inputTokens, outputTokens, cachedTokens int64 if usage.Exists() && usage.Type != gjson.Null { - // Check if usage has actual token counts - promptTokens := usage.Get("prompt_tokens") - completionTokens := usage.Get("completion_tokens") - - if promptTokens.Exists() && completionTokens.Exists() { - inputTokens = promptTokens.Int() - outputTokens = completionTokens.Int() - } + inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage) // Send message_delta with usage messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}` messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason)) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens) messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens) + } results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n") param.MessageDeltaSent = true @@ -423,13 +419,12 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string { // Set usage information if usage := root.Get("usage"); usage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", usage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", usage.Get("completion_tokens").Int()) - reasoningTokens := int64(0) - if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() { - reasoningTokens = v.Int() + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) } - out, _ = sjson.Set(out, "usage.reasoning_tokens", reasoningTokens) } return []string{out} @@ -674,8 +669,12 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina } if respUsage := root.Get("usage"); respUsage.Exists() { - out, _ = sjson.Set(out, "usage.input_tokens", respUsage.Get("prompt_tokens").Int()) - out, _ = sjson.Set(out, "usage.output_tokens", respUsage.Get("completion_tokens").Int()) + inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage) + out, _ = sjson.Set(out, "usage.input_tokens", inputTokens) + out, _ = sjson.Set(out, "usage.output_tokens", outputTokens) + if cachedTokens > 0 { + out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens) + } } if !stopReasonSet { @@ -692,3 +691,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina func ClaudeTokenCount(ctx context.Context, count int64) string { return fmt.Sprintf(`{"input_tokens":%d}`, count) } + +func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) { + if !usage.Exists() || usage.Type == gjson.Null { + return 0, 0, 0 + } + + inputTokens := usage.Get("prompt_tokens").Int() + outputTokens := usage.Get("completion_tokens").Int() + cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int() + + if cachedTokens > 0 { + if inputTokens >= cachedTokens { + inputTokens -= cachedTokens + } else { + inputTokens = 0 + } + } + + return inputTokens, outputTokens, cachedTokens +} From 52e46ced1bd33a3fd8ff8f7cd136216b734bd8fc Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:33:27 +0800 Subject: [PATCH 2/6] fix(translator): avoid forcing RFC 8259 system prompt --- internal/translator/openai/claude/openai_claude_request.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index 3817b77b..c268ec62 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -88,7 +88,7 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream var messagesJSON = "[]" // Handle system message first - systemMsgJSON := `{"role":"system","content":[{"type":"text","text":"Use ANY tool, the parameters MUST accord with RFC 8259 (The JavaScript Object Notation (JSON) Data Interchange Format), the keys and value MUST be enclosed in double quotes."}]}` + systemMsgJSON := `{"role":"system","content":[]}` if system := root.Get("system"); system.Exists() { if system.Type == gjson.String { if system.String() != "" { From c421d653e75e3eb161d6f1d96578c40510e1fbb8 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:50:35 +0800 Subject: [PATCH 3/6] refactor(claude): move max_tokens constraint enforcement to Apply method --- internal/runtime/executor/claude_executor.go | 82 -------------------- internal/thinking/provider/claude/apply.go | 45 +++++++++++ 2 files changed, 45 insertions(+), 82 deletions(-) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 17c5a143..b6d5418a 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -17,7 +17,6 @@ import ( claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" @@ -119,9 +118,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(baseModel, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -250,9 +246,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) - // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(baseModel, body) - // Extract betas from body and convert to header var extraBetas []string extraBetas, body = extractAndRemoveBetas(body) @@ -541,81 +534,6 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte { return body } -// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. -// Anthropic API requires this constraint; violating it returns a 400 error. -// This function should be called after all thinking configuration is finalized. -// It looks up the model's MaxCompletionTokens from the registry to use as the cap. -func ensureMaxTokensForThinking(modelName string, body []byte) []byte { - thinkingType := gjson.GetBytes(body, "thinking.type").String() - if thinkingType != "enabled" { - return body - } - - budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int() - if budgetTokens <= 0 { - return body - } - - maxTokens := gjson.GetBytes(body, "max_tokens").Int() - - // Look up the model's max completion tokens from the registry - maxCompletionTokens := 0 - if modelInfo := registry.LookupModelInfo(modelName); modelInfo != nil { - maxCompletionTokens = modelInfo.MaxCompletionTokens - } - - // Fall back to budget + buffer if registry lookup fails or returns 0 - const fallbackBuffer = 4000 - requiredMaxTokens := budgetTokens + fallbackBuffer - if maxCompletionTokens > 0 { - requiredMaxTokens = int64(maxCompletionTokens) - } - - if maxTokens < requiredMaxTokens { - body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) - } - return body -} - -func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey { - if auth == nil || e.cfg == nil { - return nil - } - var attrKey, attrBase string - if auth.Attributes != nil { - attrKey = strings.TrimSpace(auth.Attributes["api_key"]) - attrBase = strings.TrimSpace(auth.Attributes["base_url"]) - } - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - cfgKey := strings.TrimSpace(entry.APIKey) - cfgBase := strings.TrimSpace(entry.BaseURL) - if attrKey != "" && attrBase != "" { - if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { - return entry - } - continue - } - if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { - if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { - return entry - } - } - if attrKey != "" { - for i := range e.cfg.ClaudeKey { - entry := &e.cfg.ClaudeKey[i] - if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) { - return entry - } - } - } - return nil -} - type compositeReadCloser struct { io.Reader closers []func() error diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index b7833072..babc2f76 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -80,9 +80,54 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo * result, _ := sjson.SetBytes(body, "thinking.type", "enabled") result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + + // Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint) + result = a.normalizeClaudeBudget(result, config.Budget, modelInfo) return result, nil } +// normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens. +// Anthropic API requires this constraint; violating it returns a 400 error. +func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo *registry.ModelInfo) []byte { + if budgetTokens <= 0 { + return body + } + + effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) + if effectiveMax > 0 && effectiveMax > budgetTokens { + if setDefaultMax { + body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) + } + return body + } + + // Fall back to budget + buffer if no effective max or max <= budget + const fallbackBuffer = 4000 + requiredMaxTokens := budgetTokens + fallbackBuffer + if effectiveMax > 0 && effectiveMax > requiredMaxTokens { + requiredMaxTokens = effectiveMax + } + + currentMax := gjson.GetBytes(body, "max_tokens").Int() + if currentMax < int64(requiredMaxTokens) { + body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) + } + return body +} + +// effectiveMaxTokens returns the max tokens to cap thinking: +// prefer request-provided max_tokens; otherwise fall back to model default. +// The boolean indicates whether the value came from the model default (and thus should be written back). +func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) (max int, fromModel bool) { + if maxTok := gjson.GetBytes(body, "max_tokens"); maxTok.Exists() && maxTok.Int() > 0 { + return int(maxTok.Int()), false + } + if modelInfo != nil && modelInfo.MaxCompletionTokens > 0 { + return modelInfo.MaxCompletionTokens, true + } + return 0, false +} + func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) { if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { return body, nil From 239a28793c3b0229a0cefe7673c4e72c54c3288e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:32:20 +0800 Subject: [PATCH 4/6] feat(claude): clamp thinking budget to max_tokens constraints --- internal/thinking/provider/claude/apply.go | 38 ++++++++++++++-------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index babc2f76..3c74d514 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -93,25 +93,37 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo return body } + // Ensure the request satisfies Claude constraints: + // 1) Determine effective max_tokens (request overrides model default) + // 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1 + // 3) If the adjusted budget falls below the model minimum, leave the request unchanged + // 4) If max_tokens came from model default, write it back into the request + effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) - if effectiveMax > 0 && effectiveMax > budgetTokens { - if setDefaultMax { - body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) - } + if setDefaultMax && effectiveMax > 0 { + body, _ = sjson.SetBytes(body, "max_tokens", effectiveMax) + } + + // Compute the budget we would apply after enforcing budget_tokens < max_tokens. + adjustedBudget := budgetTokens + if effectiveMax > 0 && adjustedBudget >= effectiveMax { + adjustedBudget = effectiveMax - 1 + } + + minBudget := 0 + if modelInfo != nil && modelInfo.Thinking != nil { + minBudget = modelInfo.Thinking.Min + } + if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget { + // If enforcing the max_tokens constraint would push the budget below the model minimum, + // leave the request unchanged. return body } - // Fall back to budget + buffer if no effective max or max <= budget - const fallbackBuffer = 4000 - requiredMaxTokens := budgetTokens + fallbackBuffer - if effectiveMax > 0 && effectiveMax > requiredMaxTokens { - requiredMaxTokens = effectiveMax + if adjustedBudget != budgetTokens { + body, _ = sjson.SetBytes(body, "thinking.budget_tokens", adjustedBudget) } - currentMax := gjson.GetBytes(body, "max_tokens").Int() - if currentMax < int64(requiredMaxTokens) { - body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens) - } return body } From c175821cc4a3960e633f6de1e880e43f79309e75 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 18:07:22 +0800 Subject: [PATCH 5/6] feat(registry): expand antigravity model config Remove static Name mapping and add entries for claude-sonnet-4-5, tab_flash_lite_preview, and gpt-oss-120b-medium configs --- internal/registry/model_definitions.go | 19 ++++++++++--------- .../runtime/executor/antigravity_executor.go | 3 --- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 77669e4b..080c2726 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -765,21 +765,23 @@ func GetIFlowModels() []*ModelInfo { type AntigravityModelConfig struct { Thinking *ThinkingSupport MaxCompletionTokens int - Name string } // GetAntigravityModelConfig returns static configuration for antigravity models. // Keys use upstream model names returned by the Antigravity models endpoint. func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { return map[string]*AntigravityModelConfig{ - "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"}, - "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"}, - "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/rev19-uic3-1p"}, - "gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-high"}, - "gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image"}, - "gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash"}, + "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, + "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}}, + "gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, + "gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}}, "claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "claude-sonnet-4-5": {MaxCompletionTokens: 64000}, + "gpt-oss-120b-medium": {}, + "tab_flash_lite_preview": {}, } } @@ -809,10 +811,9 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { } // Check Antigravity static config - if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil && cfg.Thinking != nil { + if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil { return &ModelInfo{ ID: modelID, - Name: cfg.Name, Thinking: cfg.Thinking, MaxCompletionTokens: cfg.MaxCompletionTokens, } diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 99392188..602ed628 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -1005,9 +1005,6 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c } modelCfg := modelConfig[modelID] modelName := modelID - if modelCfg != nil && modelCfg.Name != "" { - modelName = modelCfg.Name - } modelInfo := ®istry.ModelInfo{ ID: modelID, Name: modelName, From 1d2fe55310024844ff12aba79b5b2b21f14b7b45 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Mon, 19 Jan 2026 19:49:39 +0800 Subject: [PATCH 6/6] fix(executor): stop rewriting thinkingLevel for gemini --- internal/runtime/executor/antigravity_executor.go | 7 ------- test/thinking_conversion_test.go | 4 ++++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 602ed628..df26e376 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -1407,13 +1407,6 @@ func geminiToAntigravity(modelName string, payload []byte, projectID string) []b template, _ = sjson.Delete(template, "request.safetySettings") template, _ = sjson.Set(template, "request.toolConfig.functionCallingConfig.mode", "VALIDATED") - if !strings.HasPrefix(modelName, "gemini-3-") { - if thinkingLevel := gjson.Get(template, "request.generationConfig.thinkingConfig.thinkingLevel"); thinkingLevel.Exists() { - template, _ = sjson.Delete(template, "request.generationConfig.thinkingConfig.thinkingLevel") - template, _ = sjson.Set(template, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - } - } - if strings.Contains(modelName, "claude") { gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool { tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool { diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index 8f527193..4a7df29a 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -20,6 +20,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) // thinkingTestCase represents a common test case structure for both suffix and body tests. @@ -2707,6 +2708,9 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) { []byte(tc.inputJSON), true, ) + if applyTo == "claude" { + body, _ = sjson.SetBytes(body, "max_tokens", 200000) + } body, err := thinking.ApplyThinking(body, tc.model, tc.from, applyTo)