diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go index 7012e644..0ad090ae 100644 --- a/internal/translator/kiro/claude/kiro_claude_request.go +++ b/internal/translator/kiro/claude/kiro_claude_request.go @@ -243,13 +243,11 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA // Process messages and build history history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin) - // Build content with system prompt (only on first turn to avoid re-injection) + // Build content with system prompt. + // Keep thinking tags on subsequent turns so multi-turn Claude sessions + // continue to emit reasoning events. if currentUserMsg != nil { - effectiveSystemPrompt := systemPrompt - if len(history) > 0 { - effectiveSystemPrompt = "" // Don't re-inject on subsequent turns - } - currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, effectiveSystemPrompt, currentToolResults) + currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults) // Deduplicate currentToolResults currentToolResults = deduplicateToolResults(currentToolResults) @@ -475,6 +473,15 @@ func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool { } } + // Check model name directly for thinking hints. + // This enables thinking variants even when clients don't send explicit thinking fields. + model := strings.TrimSpace(gjson.GetBytes(body, "model").String()) + modelLower := strings.ToLower(model) + if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") { + log.Debugf("kiro: thinking mode enabled via model name hint: %s", model) + return true + } + log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)") return false } diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go index 9515848f..474231b3 100644 --- a/internal/translator/kiro/openai/kiro_openai_request.go +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -234,16 +234,16 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s // Kiro API supports official thinking/reasoning mode via tag. // When set to "enabled", Kiro returns reasoning content as official reasoningContentEvent // rather than inline tags in assistantResponseEvent. - // We use a high max_thinking_length to allow extensive reasoning. + // Use a conservative thinking budget to reduce latency/cost spikes in long sessions. if thinkingEnabled { thinkingHint := `enabled -200000` +16000` if systemPrompt != "" { systemPrompt = thinkingHint + "\n\n" + systemPrompt } else { systemPrompt = thinkingHint } - log.Debugf("kiro-openai: injected thinking prompt (official mode)") + log.Infof("kiro-openai: injected thinking prompt (official mode), has_tools: %v", len(kiroTools) > 0) } // Process messages and build history @@ -831,7 +831,6 @@ func hasThinkingTagInBody(body []byte) bool { return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "") } - // extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint. // OpenAI tool_choice values: // - "none": Don't use any tools