From 0a3a95521ccae8cfbc6c863e009d853322cd5f1d Mon Sep 17 00:00:00 2001 From: Ravens2121 Date: Tue, 16 Dec 2025 05:01:40 +0800 Subject: [PATCH 1/3] feat: enhance thinking mode support for Kiro translator Changes: --- internal/runtime/executor/kiro_executor.go | 34 +++--- .../kiro/claude/kiro_claude_request.go | 104 ++++++++++------ .../kiro/claude/kiro_claude_stream.go | 10 ++ .../translator/kiro/openai/kiro_openai.go | 8 +- .../kiro/openai/kiro_openai_request.go | 112 +++++++++--------- .../kiro/openai/kiro_openai_response.go | 13 ++ 6 files changed, 175 insertions(+), 106 deletions(-) diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index be6be1ed..ec376fe1 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -166,16 +166,17 @@ type KiroExecutor struct { // This is critical because OpenAI and Claude formats have different tool structures: // - OpenAI: tools[].function.name, tools[].function.description // - Claude: tools[].name, tools[].description +// headers parameter allows checking Anthropic-Beta header for thinking mode detection. // Returns the serialized JSON payload and a boolean indicating whether thinking mode was injected. -func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) ([]byte, bool) { +func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format, headers http.Header) ([]byte, bool) { switch sourceFormat.String() { case "openai": log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String()) - return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly) + return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil) default: // Default to Claude format (also handles "claude", "kiro", etc.) log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String()) - return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly) + return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil) } } @@ -249,7 +250,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) + kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) @@ -359,7 +360,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) // Rebuild payload with new profile ARN if changed - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) + kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Infof("kiro: token refreshed successfully, retrying request") continue } @@ -416,7 +417,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. if refreshedAuth != nil { auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) + kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Infof("kiro: token refreshed for 403, retrying request") continue } @@ -555,10 +556,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) - // Kiro API always returns tags regardless of whether thinking mode was requested - // So we always enable thinking parsing for Kiro responses - thinkingEnabled := true + kiroPayload, thinkingEnabled := buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) @@ -681,7 +679,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) // Rebuild payload with new profile ARN if changed - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) + kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Infof("kiro: token refreshed successfully, retrying stream request") continue } @@ -738,7 +736,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox if refreshedAuth != nil { auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) - kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) + kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers) log.Infof("kiro: token refreshed for 403, retrying stream request") continue } @@ -1702,6 +1700,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out pendingEndTagChars := 0 // Number of chars that might be start of isThinkingBlockOpen := false // Track if thinking content block is open thinkingBlockIndex := -1 // Index of the thinking content block + var accumulatedThinkingContent strings.Builder // Accumulate thinking content for signature generation // Code block state tracking for heuristic thinking tag parsing // When inside a markdown code block, tags should NOT be parsed @@ -1847,6 +1846,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } + // Accumulate thinking content for signature generation + accumulatedThinkingContent.WriteString(pendingText) } else { // Output as regular text if !isTextBlockOpen { @@ -2390,6 +2391,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } + // Accumulate thinking content for signature generation + accumulatedThinkingContent.WriteString(thinkContent) } // Note: Partial tag handling is done via pendingEndTagChars @@ -2397,7 +2400,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Close thinking block if isThinkingBlockOpen { - blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(thinkingBlockIndex) + blockStop := kiroclaude.BuildClaudeThinkingBlockStopEvent(thinkingBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2405,6 +2408,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } isThinkingBlockOpen = false + accumulatedThinkingContent.Reset() // Reset for potential next thinking block } inThinkBlock = false @@ -2450,6 +2454,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } + // Accumulate thinking content for signature generation + accumulatedThinkingContent.WriteString(contentToEmit) } remaining = "" @@ -2592,6 +2598,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Handle tool uses in response (with deduplication) for _, tu := range toolUses { toolUseID := kirocommon.GetString(tu, "toolUseId") + toolName := kirocommon.GetString(tu, "name") // Check for duplicate if processedIDs[toolUseID] { @@ -2615,7 +2622,6 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Emit tool_use content block contentBlockIndex++ - toolName := kirocommon.GetString(tu, "name") blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go index 052d671c..e3e333d1 100644 --- a/internal/translator/kiro/claude/kiro_claude_request.go +++ b/internal/translator/kiro/claude/kiro_claude_request.go @@ -6,6 +6,7 @@ package claude import ( "encoding/json" "fmt" + "net/http" "strings" "time" "unicode/utf8" @@ -33,6 +34,7 @@ type KiroInferenceConfig struct { TopP float64 `json:"topP,omitempty"` } + // KiroConversationState holds the conversation context type KiroConversationState struct { ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field @@ -134,9 +136,11 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo // origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. // isAgentic parameter enables chunked write optimization prompt for -agentic model variants. // isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). -// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint. +// headers parameter allows checking Anthropic-Beta header for thinking mode detection. +// metadata parameter is kept for API compatibility but no longer used for thinking configuration. +// Supports thinking mode - when enabled, injects thinking tags into system prompt. // Returns the payload and a boolean indicating whether thinking mode was injected. -func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) { +func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) { // Extract max_tokens for potential use in inferenceConfig // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens) const kiroMaxOutputTokens = 32000 @@ -181,26 +185,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA // Extract system prompt systemPrompt := extractSystemPrompt(claudeBody) - // Check for thinking mode using the comprehensive IsThinkingEnabled function - // This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format - thinkingEnabled := IsThinkingEnabled(claudeBody) - _, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available - if budgetTokens <= 0 { - // Calculate budgetTokens based on max_tokens if available - // Use 50% of max_tokens for thinking, with min 8000 and max 24000 - if maxTokens > 0 { - budgetTokens = maxTokens / 2 - if budgetTokens < 8000 { - budgetTokens = 8000 - } - if budgetTokens > 24000 { - budgetTokens = 24000 - } - log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens) - } else { - budgetTokens = 16000 // Default budget tokens - } - } + // Check for thinking mode using the comprehensive IsThinkingEnabledWithHeaders function + // This supports Claude API format, OpenAI reasoning_effort, AMP/Cursor format, and Anthropic-Beta header + thinkingEnabled := IsThinkingEnabledWithHeaders(claudeBody, headers) // Inject timestamp context timestamp := time.Now().Format("2006-01-02 15:04:05 MST") @@ -231,19 +218,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA log.Debugf("kiro: injected tool_choice hint into system prompt") } - // Inject thinking hint when thinking mode is enabled - if thinkingEnabled { - if systemPrompt != "" { - systemPrompt += "\n" - } - dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens) - systemPrompt += dynamicThinkingHint - log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens) - } - // Convert Claude tools to Kiro format kiroTools := convertClaudeToolsToKiro(tools) + // Thinking mode implementation: + // Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled + // by injecting and tags into the system prompt. + // We use a fixed max_thinking_length value since Kiro handles the actual budget internally. + if thinkingEnabled { + thinkingHint := `interleaved +200000 + +IMPORTANT: You MUST use ... tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.` + if systemPrompt != "" { + systemPrompt = thinkingHint + "\n\n" + systemPrompt + } else { + systemPrompt = thinkingHint + } + log.Infof("kiro: injected thinking prompt, has_tools: %v", len(kiroTools) > 0) + } + // Process messages and build history history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin) @@ -280,6 +274,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA } // Build inferenceConfig if we have any inference parameters + // Note: Kiro API doesn't actually use max_tokens for thinking budget var inferenceConfig *KiroInferenceConfig if maxTokens > 0 || hasTemperature || hasTopP { inferenceConfig = &KiroInferenceConfig{} @@ -350,7 +345,7 @@ func extractSystemPrompt(claudeBody []byte) string { // checkThinkingMode checks if thinking mode is enabled in the Claude request func checkThinkingMode(claudeBody []byte) (bool, int64) { thinkingEnabled := false - var budgetTokens int64 = 16000 + var budgetTokens int64 = 24000 thinkingField := gjson.GetBytes(claudeBody, "thinking") if thinkingField.Exists() { @@ -373,6 +368,32 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) { return thinkingEnabled, budgetTokens } +// hasThinkingTagInBody checks if the request body already contains thinking configuration tags. +// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config. +func hasThinkingTagInBody(body []byte) bool { + bodyStr := string(body) + return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "") +} + + +// IsThinkingEnabledFromHeader checks if thinking mode is enabled via Anthropic-Beta header. +// Claude CLI uses "Anthropic-Beta: interleaved-thinking-2025-05-14" to enable thinking. +func IsThinkingEnabledFromHeader(headers http.Header) bool { + if headers == nil { + return false + } + betaHeader := headers.Get("Anthropic-Beta") + if betaHeader == "" { + return false + } + // Check for interleaved-thinking beta feature + if strings.Contains(betaHeader, "interleaved-thinking") { + log.Debugf("kiro: thinking mode enabled via Anthropic-Beta header: %s", betaHeader) + return true + } + return false +} + // IsThinkingEnabled is a public wrapper to check if thinking mode is enabled. // This is used by the executor to determine whether to parse tags in responses. // When thinking is NOT enabled in the request, tags in responses should be @@ -383,6 +404,21 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) { // - OpenAI format: reasoning_effort parameter // - AMP/Cursor format: interleaved in system prompt func IsThinkingEnabled(body []byte) bool { + return IsThinkingEnabledWithHeaders(body, nil) +} + +// IsThinkingEnabledWithHeaders checks if thinking mode is enabled from body or headers. +// This is the comprehensive check that supports all thinking detection methods: +// - Claude API format: thinking.type = "enabled" +// - OpenAI format: reasoning_effort parameter +// - AMP/Cursor format: interleaved in system prompt +// - Anthropic-Beta header: interleaved-thinking-2025-05-14 +func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool { + // Check Anthropic-Beta header first (Claude Code uses this) + if IsThinkingEnabledFromHeader(headers) { + return true + } + // Check Claude API format first (thinking.type = "enabled") enabled, _ := checkThinkingMode(body) if enabled { @@ -771,4 +807,4 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage Content: contentBuilder.String(), ToolUses: toolUses, } -} \ No newline at end of file +} diff --git a/internal/translator/kiro/claude/kiro_claude_stream.go b/internal/translator/kiro/claude/kiro_claude_stream.go index 6ea6e4cd..84fd6621 100644 --- a/internal/translator/kiro/claude/kiro_claude_stream.go +++ b/internal/translator/kiro/claude/kiro_claude_stream.go @@ -99,6 +99,16 @@ func BuildClaudeContentBlockStopEvent(index int) []byte { return []byte("event: content_block_stop\ndata: " + string(result)) } +// BuildClaudeThinkingBlockStopEvent creates a content_block_stop SSE event for thinking blocks. +func BuildClaudeThinkingBlockStopEvent(index int) []byte { + event := map[string]interface{}{ + "type": "content_block_stop", + "index": index, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_stop\ndata: " + string(result)) +} + // BuildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage func BuildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte { deltaEvent := map[string]interface{}{ diff --git a/internal/translator/kiro/openai/kiro_openai.go b/internal/translator/kiro/openai/kiro_openai.go index d5822998..cec17e07 100644 --- a/internal/translator/kiro/openai/kiro_openai.go +++ b/internal/translator/kiro/openai/kiro_openai.go @@ -187,6 +187,7 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq // Extract content var content string + var reasoningContent string var toolUses []KiroToolUse var stopReason string @@ -202,7 +203,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq case "text": content += block.Get("text").String() case "thinking": - // Skip thinking blocks for OpenAI format (or convert to reasoning_content if needed) + // Convert thinking blocks to reasoning_content for OpenAI format + reasoningContent += block.Get("thinking").String() case "tool_use": toolUseID := block.Get("id").String() toolName := block.Get("name").String() @@ -233,8 +235,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq } usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens - // Build OpenAI response - openaiResponse := BuildOpenAIResponse(content, toolUses, model, usageInfo, stopReason) + // Build OpenAI response with reasoning_content support + openaiResponse := BuildOpenAIResponseWithReasoning(content, reasoningContent, toolUses, model, usageInfo, stopReason) return string(openaiResponse) } diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go index cb97a340..00a05854 100644 --- a/internal/translator/kiro/openai/kiro_openai_request.go +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -6,11 +6,13 @@ package openai import ( "encoding/json" "fmt" + "net/http" "strings" "time" "unicode/utf8" "github.com/google/uuid" + kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude" kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" @@ -133,8 +135,10 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo // origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. // isAgentic parameter enables chunked write optimization prompt for -agentic model variants. // isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). +// headers parameter allows checking Anthropic-Beta header for thinking mode detection. +// metadata parameter is kept for API compatibility but no longer used for thinking configuration. // Returns the payload and a boolean indicating whether thinking mode was injected. -func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) { +func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) { // Extract max_tokens for potential use in inferenceConfig // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens) const kiroMaxOutputTokens = 32000 @@ -219,35 +223,30 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s log.Debugf("kiro-openai: injected response_format hint into system prompt") } - // Check for thinking mode and inject thinking hint - // Supports OpenAI reasoning_effort parameter and model name hints - thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody) - if thinkingEnabled { - // Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort - // Use 50% of max_tokens for thinking, with min 8000 and max 24000 - if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set - calculatedBudget := maxTokens / 2 - if calculatedBudget < 8000 { - calculatedBudget = 8000 - } - if calculatedBudget > 24000 { - calculatedBudget = 24000 - } - budgetTokens = calculatedBudget - log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens) - } - - if systemPrompt != "" { - systemPrompt += "\n" - } - dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens) - systemPrompt += dynamicThinkingHint - log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens) - } + // Check for thinking mode + // Supports OpenAI reasoning_effort parameter, model name hints, and Anthropic-Beta header + thinkingEnabled := checkThinkingModeFromOpenAIWithHeaders(openaiBody, headers) // Convert OpenAI tools to Kiro format kiroTools := convertOpenAIToolsToKiro(tools) + // Thinking mode implementation: + // Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled + // by injecting and tags into the system prompt. + // We use a fixed max_thinking_length value since Kiro handles the actual budget internally. + if thinkingEnabled { + thinkingHint := `interleaved +200000 + +IMPORTANT: You MUST use ... tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.` + if systemPrompt != "" { + systemPrompt = thinkingHint + "\n\n" + systemPrompt + } else { + systemPrompt = thinkingHint + } + log.Infof("kiro-openai: injected thinking prompt") + } + // Process messages and build history history, currentUserMsg, currentToolResults := processOpenAIMessages(messages, modelID, origin) @@ -284,6 +283,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s } // Build inferenceConfig if we have any inference parameters + // Note: Kiro API doesn't actually use max_tokens for thinking budget var inferenceConfig *KiroInferenceConfig if maxTokens > 0 || hasTemperature || hasTopP { inferenceConfig = &KiroInferenceConfig{} @@ -682,13 +682,28 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul } // checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request. -// Returns (thinkingEnabled, budgetTokens). +// Returns thinkingEnabled. // Supports: // - reasoning_effort parameter (low/medium/high/auto) // - Model name containing "thinking" or "reason" // - tag in system prompt (AMP/Cursor format) -func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) { - var budgetTokens int64 = 16000 // Default budget +func checkThinkingModeFromOpenAI(openaiBody []byte) bool { + return checkThinkingModeFromOpenAIWithHeaders(openaiBody, nil) +} + +// checkThinkingModeFromOpenAIWithHeaders checks if thinking mode is enabled in the OpenAI request. +// Returns thinkingEnabled. +// Supports: +// - Anthropic-Beta header with interleaved-thinking (Claude CLI) +// - reasoning_effort parameter (low/medium/high/auto) +// - Model name containing "thinking" or "reason" +// - tag in system prompt (AMP/Cursor format) +func checkThinkingModeFromOpenAIWithHeaders(openaiBody []byte, headers http.Header) bool { + // Check Anthropic-Beta header first (Claude CLI uses this) + if kiroclaude.IsThinkingEnabledFromHeader(headers) { + log.Debugf("kiro-openai: thinking mode enabled via Anthropic-Beta header") + return true + } // Check OpenAI format: reasoning_effort parameter // Valid values: "low", "medium", "high", "auto" (not "none") @@ -697,18 +712,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) { effort := reasoningEffort.String() if effort != "" && effort != "none" { log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort) - // Adjust budget based on effort level - switch effort { - case "low": - budgetTokens = 8000 - case "medium": - budgetTokens = 16000 - case "high": - budgetTokens = 32000 - case "auto": - budgetTokens = 16000 - } - return true, budgetTokens + return true } } @@ -725,17 +729,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) { thinkingMode := bodyStr[startIdx : startIdx+endIdx] if thinkingMode == "interleaved" || thinkingMode == "enabled" { log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode) - // Try to extract max_thinking_length if present - if maxLenStart := strings.Index(bodyStr, ""); maxLenStart >= 0 { - maxLenStart += len("") - if maxLenEnd := strings.Index(bodyStr[maxLenStart:], ""); maxLenEnd >= 0 { - maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd] - if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 { - log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens) - } - } - } - return true, budgetTokens + return true } } } @@ -746,13 +740,21 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) { modelLower := strings.ToLower(model) if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") { log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model) - return true, budgetTokens + return true } log.Debugf("kiro-openai: no thinking mode detected in OpenAI request") - return false, budgetTokens + return false } +// hasThinkingTagInBody checks if the request body already contains thinking configuration tags. +// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config. +func hasThinkingTagInBody(body []byte) bool { + bodyStr := string(body) + return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "") +} + + // extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint. // OpenAI tool_choice values: // - "none": Don't use any tools @@ -845,4 +847,4 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult { } } return unique -} \ No newline at end of file +} diff --git a/internal/translator/kiro/openai/kiro_openai_response.go b/internal/translator/kiro/openai/kiro_openai_response.go index b7da1373..edc70ad8 100644 --- a/internal/translator/kiro/openai/kiro_openai_response.go +++ b/internal/translator/kiro/openai/kiro_openai_response.go @@ -21,12 +21,25 @@ var functionCallIDCounter uint64 // Supports tool_calls when tools are present in the response. // stopReason is passed from upstream; fallback logic applied if empty. func BuildOpenAIResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte { + return BuildOpenAIResponseWithReasoning(content, "", toolUses, model, usageInfo, stopReason) +} + +// BuildOpenAIResponseWithReasoning constructs an OpenAI Chat Completions-compatible response with reasoning_content support. +// Supports tool_calls when tools are present in the response. +// reasoningContent is included as reasoning_content field in the message when present. +// stopReason is passed from upstream; fallback logic applied if empty. +func BuildOpenAIResponseWithReasoning(content, reasoningContent string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte { // Build the message object message := map[string]interface{}{ "role": "assistant", "content": content, } + // Add reasoning_content if present (for thinking/reasoning models) + if reasoningContent != "" { + message["reasoning_content"] = reasoningContent + } + // Add tool_calls if present if len(toolUses) > 0 { var toolCalls []map[string]interface{} From e889efeda7947825180db65a6d49954b469c2b2d Mon Sep 17 00:00:00 2001 From: Ravens2121 Date: Tue, 16 Dec 2025 05:21:49 +0800 Subject: [PATCH 2/3] fix: add signature field to thinking blocks for non-streaming mode - Add generateThinkingSignature() function in kiro_claude_response.go --- .../kiro/claude/kiro_claude_response.go | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/internal/translator/kiro/claude/kiro_claude_response.go b/internal/translator/kiro/claude/kiro_claude_response.go index 49ebf79e..313c9059 100644 --- a/internal/translator/kiro/claude/kiro_claude_response.go +++ b/internal/translator/kiro/claude/kiro_claude_response.go @@ -4,6 +4,8 @@ package claude import ( + "crypto/sha256" + "encoding/base64" "encoding/json" "strings" @@ -14,6 +16,18 @@ import ( kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" ) +// generateThinkingSignature generates a signature for thinking content. +// This is required by Claude API for thinking blocks in non-streaming responses. +// The signature is a base64-encoded hash of the thinking content. +func generateThinkingSignature(thinkingContent string) string { + if thinkingContent == "" { + return "" + } + // Generate a deterministic signature based on content hash + hash := sha256.Sum256([]byte(thinkingContent)) + return base64.StdEncoding.EncodeToString(hash[:]) +} + // Local references to kirocommon constants for thinking block parsing var ( thinkingStartTag = kirocommon.ThinkingStartTag @@ -149,9 +163,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} { if endIdx == -1 { // No closing tag found, treat rest as thinking content (incomplete response) if strings.TrimSpace(remaining) != "" { + // Generate signature for thinking content (required by Claude API) + signature := generateThinkingSignature(remaining) blocks = append(blocks, map[string]interface{}{ - "type": "thinking", - "thinking": remaining, + "type": "thinking", + "thinking": remaining, + "signature": signature, }) log.Warnf("kiro: extractThinkingFromContent - missing closing tag") } @@ -161,9 +178,12 @@ func ExtractThinkingFromContent(content string) []map[string]interface{} { // Extract thinking content between tags thinkContent := remaining[:endIdx] if strings.TrimSpace(thinkContent) != "" { + // Generate signature for thinking content (required by Claude API) + signature := generateThinkingSignature(thinkContent) blocks = append(blocks, map[string]interface{}{ - "type": "thinking", - "thinking": thinkContent, + "type": "thinking", + "thinking": thinkContent, + "signature": signature, }) log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent)) } From f3d1cc8dc1f03d976980756a32dab1267ce78d0d Mon Sep 17 00:00:00 2001 From: Ravens2121 Date: Tue, 16 Dec 2025 05:32:03 +0800 Subject: [PATCH 3/3] chore: change debug logs from INFO to DEBUG level --- internal/runtime/executor/kiro_executor.go | 4 ++-- internal/translator/kiro/openai/kiro_openai_request.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index ec376fe1..e346b744 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -2894,7 +2894,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if calculatedInputTokens > 0 { localEstimate := totalUsage.InputTokens totalUsage.InputTokens = calculatedInputTokens - log.Infof("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)", + log.Debugf("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)", upstreamContextPercentage, calculatedInputTokens, localEstimate) } } @@ -2903,7 +2903,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Log upstream usage information if received if hasUpstreamUsage { - log.Infof("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d", + log.Debugf("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d", upstreamCreditUsage, upstreamContextPercentage, totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens) } diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go index 00a05854..e4f3e767 100644 --- a/internal/translator/kiro/openai/kiro_openai_request.go +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -244,7 +244,7 @@ IMPORTANT: You MUST use ... tags to show your reasoning pro } else { systemPrompt = thinkingHint } - log.Infof("kiro-openai: injected thinking prompt") + log.Debugf("kiro-openai: injected thinking prompt") } // Process messages and build history