diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go
index be6be1ed..ec376fe1 100644
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -166,16 +166,17 @@ type KiroExecutor struct {
// This is critical because OpenAI and Claude formats have different tool structures:
// - OpenAI: tools[].function.name, tools[].function.description
// - Claude: tools[].name, tools[].description
+// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
// Returns the serialized JSON payload and a boolean indicating whether thinking mode was injected.
-func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) ([]byte, bool) {
+func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format, headers http.Header) ([]byte, bool) {
switch sourceFormat.String() {
case "openai":
log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String())
- return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly)
+ return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
default:
// Default to Claude format (also handles "claude", "kiro", etc.)
log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String())
- return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly)
+ return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly, headers, nil)
}
}
@@ -249,7 +250,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
+ kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -359,7 +360,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
+ kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed successfully, retrying request")
continue
}
@@ -416,7 +417,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
+ kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed for 403, retrying request")
continue
}
@@ -555,10 +556,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
- // Kiro API always returns tags regardless of whether thinking mode was requested
- // So we always enable thinking parsing for Kiro responses
- thinkingEnabled := true
+ kiroPayload, thinkingEnabled := buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -681,7 +679,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
+ kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed successfully, retrying stream request")
continue
}
@@ -738,7 +736,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
- kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
+ kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
log.Infof("kiro: token refreshed for 403, retrying stream request")
continue
}
@@ -1702,6 +1700,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
pendingEndTagChars := 0 // Number of chars that might be start of
isThinkingBlockOpen := false // Track if thinking content block is open
thinkingBlockIndex := -1 // Index of the thinking content block
+ var accumulatedThinkingContent strings.Builder // Accumulate thinking content for signature generation
// Code block state tracking for heuristic thinking tag parsing
// When inside a markdown code block, tags should NOT be parsed
@@ -1847,6 +1846,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
+ // Accumulate thinking content for signature generation
+ accumulatedThinkingContent.WriteString(pendingText)
} else {
// Output as regular text
if !isTextBlockOpen {
@@ -2390,6 +2391,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
+ // Accumulate thinking content for signature generation
+ accumulatedThinkingContent.WriteString(thinkContent)
}
// Note: Partial tag handling is done via pendingEndTagChars
@@ -2397,7 +2400,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// Close thinking block
if isThinkingBlockOpen {
- blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(thinkingBlockIndex)
+ blockStop := kiroclaude.BuildClaudeThinkingBlockStopEvent(thinkingBlockIndex)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
@@ -2405,6 +2408,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
}
isThinkingBlockOpen = false
+ accumulatedThinkingContent.Reset() // Reset for potential next thinking block
}
inThinkBlock = false
@@ -2450,6 +2454,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
+ // Accumulate thinking content for signature generation
+ accumulatedThinkingContent.WriteString(contentToEmit)
}
remaining = ""
@@ -2592,6 +2598,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// Handle tool uses in response (with deduplication)
for _, tu := range toolUses {
toolUseID := kirocommon.GetString(tu, "toolUseId")
+ toolName := kirocommon.GetString(tu, "name")
// Check for duplicate
if processedIDs[toolUseID] {
@@ -2615,7 +2622,6 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// Emit tool_use content block
contentBlockIndex++
- toolName := kirocommon.GetString(tu, "name")
blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go
index 052d671c..e3e333d1 100644
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -6,6 +6,7 @@ package claude
import (
"encoding/json"
"fmt"
+ "net/http"
"strings"
"time"
"unicode/utf8"
@@ -33,6 +34,7 @@ type KiroInferenceConfig struct {
TopP float64 `json:"topP,omitempty"`
}
+
// KiroConversationState holds the conversation context
type KiroConversationState struct {
ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
@@ -134,9 +136,11 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
-// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
+// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
+// metadata parameter is kept for API compatibility but no longer used for thinking configuration.
+// Supports thinking mode - when enabled, injects thinking tags into system prompt.
// Returns the payload and a boolean indicating whether thinking mode was injected.
-func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
+func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
@@ -181,26 +185,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Extract system prompt
systemPrompt := extractSystemPrompt(claudeBody)
- // Check for thinking mode using the comprehensive IsThinkingEnabled function
- // This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
- thinkingEnabled := IsThinkingEnabled(claudeBody)
- _, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
- if budgetTokens <= 0 {
- // Calculate budgetTokens based on max_tokens if available
- // Use 50% of max_tokens for thinking, with min 8000 and max 24000
- if maxTokens > 0 {
- budgetTokens = maxTokens / 2
- if budgetTokens < 8000 {
- budgetTokens = 8000
- }
- if budgetTokens > 24000 {
- budgetTokens = 24000
- }
- log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
- } else {
- budgetTokens = 16000 // Default budget tokens
- }
- }
+ // Check for thinking mode using the comprehensive IsThinkingEnabledWithHeaders function
+ // This supports Claude API format, OpenAI reasoning_effort, AMP/Cursor format, and Anthropic-Beta header
+ thinkingEnabled := IsThinkingEnabledWithHeaders(claudeBody, headers)
// Inject timestamp context
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
@@ -231,19 +218,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
log.Debugf("kiro: injected tool_choice hint into system prompt")
}
- // Inject thinking hint when thinking mode is enabled
- if thinkingEnabled {
- if systemPrompt != "" {
- systemPrompt += "\n"
- }
- dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens)
- systemPrompt += dynamicThinkingHint
- log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
- }
-
// Convert Claude tools to Kiro format
kiroTools := convertClaudeToolsToKiro(tools)
+ // Thinking mode implementation:
+ // Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled
+ // by injecting and tags into the system prompt.
+ // We use a fixed max_thinking_length value since Kiro handles the actual budget internally.
+ if thinkingEnabled {
+ thinkingHint := `interleaved
+200000
+
+IMPORTANT: You MUST use ... tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.`
+ if systemPrompt != "" {
+ systemPrompt = thinkingHint + "\n\n" + systemPrompt
+ } else {
+ systemPrompt = thinkingHint
+ }
+ log.Infof("kiro: injected thinking prompt, has_tools: %v", len(kiroTools) > 0)
+ }
+
// Process messages and build history
history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin)
@@ -280,6 +274,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
}
// Build inferenceConfig if we have any inference parameters
+ // Note: Kiro API doesn't actually use max_tokens for thinking budget
var inferenceConfig *KiroInferenceConfig
if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
@@ -350,7 +345,7 @@ func extractSystemPrompt(claudeBody []byte) string {
// checkThinkingMode checks if thinking mode is enabled in the Claude request
func checkThinkingMode(claudeBody []byte) (bool, int64) {
thinkingEnabled := false
- var budgetTokens int64 = 16000
+ var budgetTokens int64 = 24000
thinkingField := gjson.GetBytes(claudeBody, "thinking")
if thinkingField.Exists() {
@@ -373,6 +368,32 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
return thinkingEnabled, budgetTokens
}
+// hasThinkingTagInBody checks if the request body already contains thinking configuration tags.
+// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config.
+func hasThinkingTagInBody(body []byte) bool {
+ bodyStr := string(body)
+ return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "")
+}
+
+
+// IsThinkingEnabledFromHeader checks if thinking mode is enabled via Anthropic-Beta header.
+// Claude CLI uses "Anthropic-Beta: interleaved-thinking-2025-05-14" to enable thinking.
+func IsThinkingEnabledFromHeader(headers http.Header) bool {
+ if headers == nil {
+ return false
+ }
+ betaHeader := headers.Get("Anthropic-Beta")
+ if betaHeader == "" {
+ return false
+ }
+ // Check for interleaved-thinking beta feature
+ if strings.Contains(betaHeader, "interleaved-thinking") {
+ log.Debugf("kiro: thinking mode enabled via Anthropic-Beta header: %s", betaHeader)
+ return true
+ }
+ return false
+}
+
// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
// This is used by the executor to determine whether to parse tags in responses.
// When thinking is NOT enabled in the request, tags in responses should be
@@ -383,6 +404,21 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
// - OpenAI format: reasoning_effort parameter
// - AMP/Cursor format: interleaved in system prompt
func IsThinkingEnabled(body []byte) bool {
+ return IsThinkingEnabledWithHeaders(body, nil)
+}
+
+// IsThinkingEnabledWithHeaders checks if thinking mode is enabled from body or headers.
+// This is the comprehensive check that supports all thinking detection methods:
+// - Claude API format: thinking.type = "enabled"
+// - OpenAI format: reasoning_effort parameter
+// - AMP/Cursor format: interleaved in system prompt
+// - Anthropic-Beta header: interleaved-thinking-2025-05-14
+func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool {
+ // Check Anthropic-Beta header first (Claude Code uses this)
+ if IsThinkingEnabledFromHeader(headers) {
+ return true
+ }
+
// Check Claude API format first (thinking.type = "enabled")
enabled, _ := checkThinkingMode(body)
if enabled {
@@ -771,4 +807,4 @@ func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage
Content: contentBuilder.String(),
ToolUses: toolUses,
}
-}
\ No newline at end of file
+}
diff --git a/internal/translator/kiro/claude/kiro_claude_stream.go b/internal/translator/kiro/claude/kiro_claude_stream.go
index 6ea6e4cd..84fd6621 100644
--- a/internal/translator/kiro/claude/kiro_claude_stream.go
+++ b/internal/translator/kiro/claude/kiro_claude_stream.go
@@ -99,6 +99,16 @@ func BuildClaudeContentBlockStopEvent(index int) []byte {
return []byte("event: content_block_stop\ndata: " + string(result))
}
+// BuildClaudeThinkingBlockStopEvent creates a content_block_stop SSE event for thinking blocks.
+func BuildClaudeThinkingBlockStopEvent(index int) []byte {
+ event := map[string]interface{}{
+ "type": "content_block_stop",
+ "index": index,
+ }
+ result, _ := json.Marshal(event)
+ return []byte("event: content_block_stop\ndata: " + string(result))
+}
+
// BuildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage
func BuildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
deltaEvent := map[string]interface{}{
diff --git a/internal/translator/kiro/openai/kiro_openai.go b/internal/translator/kiro/openai/kiro_openai.go
index d5822998..cec17e07 100644
--- a/internal/translator/kiro/openai/kiro_openai.go
+++ b/internal/translator/kiro/openai/kiro_openai.go
@@ -187,6 +187,7 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
// Extract content
var content string
+ var reasoningContent string
var toolUses []KiroToolUse
var stopReason string
@@ -202,7 +203,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
case "text":
content += block.Get("text").String()
case "thinking":
- // Skip thinking blocks for OpenAI format (or convert to reasoning_content if needed)
+ // Convert thinking blocks to reasoning_content for OpenAI format
+ reasoningContent += block.Get("thinking").String()
case "tool_use":
toolUseID := block.Get("id").String()
toolName := block.Get("name").String()
@@ -233,8 +235,8 @@ func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalReq
}
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
- // Build OpenAI response
- openaiResponse := BuildOpenAIResponse(content, toolUses, model, usageInfo, stopReason)
+ // Build OpenAI response with reasoning_content support
+ openaiResponse := BuildOpenAIResponseWithReasoning(content, reasoningContent, toolUses, model, usageInfo, stopReason)
return string(openaiResponse)
}
diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go
index cb97a340..00a05854 100644
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -6,11 +6,13 @@ package openai
import (
"encoding/json"
"fmt"
+ "net/http"
"strings"
"time"
"unicode/utf8"
"github.com/google/uuid"
+ kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude"
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
@@ -133,8 +135,10 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
+// headers parameter allows checking Anthropic-Beta header for thinking mode detection.
+// metadata parameter is kept for API compatibility but no longer used for thinking configuration.
// Returns the payload and a boolean indicating whether thinking mode was injected.
-func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
+func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, headers http.Header, metadata map[string]any) ([]byte, bool) {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
@@ -219,35 +223,30 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
log.Debugf("kiro-openai: injected response_format hint into system prompt")
}
- // Check for thinking mode and inject thinking hint
- // Supports OpenAI reasoning_effort parameter and model name hints
- thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
- if thinkingEnabled {
- // Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
- // Use 50% of max_tokens for thinking, with min 8000 and max 24000
- if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
- calculatedBudget := maxTokens / 2
- if calculatedBudget < 8000 {
- calculatedBudget = 8000
- }
- if calculatedBudget > 24000 {
- calculatedBudget = 24000
- }
- budgetTokens = calculatedBudget
- log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
- }
-
- if systemPrompt != "" {
- systemPrompt += "\n"
- }
- dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens)
- systemPrompt += dynamicThinkingHint
- log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
- }
+ // Check for thinking mode
+ // Supports OpenAI reasoning_effort parameter, model name hints, and Anthropic-Beta header
+ thinkingEnabled := checkThinkingModeFromOpenAIWithHeaders(openaiBody, headers)
// Convert OpenAI tools to Kiro format
kiroTools := convertOpenAIToolsToKiro(tools)
+ // Thinking mode implementation:
+ // Kiro API doesn't accept max_tokens for thinking. Instead, thinking mode is enabled
+ // by injecting and tags into the system prompt.
+ // We use a fixed max_thinking_length value since Kiro handles the actual budget internally.
+ if thinkingEnabled {
+ thinkingHint := `interleaved
+200000
+
+IMPORTANT: You MUST use ... tags to show your reasoning process before providing your final response. Think step by step inside the thinking tags.`
+ if systemPrompt != "" {
+ systemPrompt = thinkingHint + "\n\n" + systemPrompt
+ } else {
+ systemPrompt = thinkingHint
+ }
+ log.Infof("kiro-openai: injected thinking prompt")
+ }
+
// Process messages and build history
history, currentUserMsg, currentToolResults := processOpenAIMessages(messages, modelID, origin)
@@ -284,6 +283,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
}
// Build inferenceConfig if we have any inference parameters
+ // Note: Kiro API doesn't actually use max_tokens for thinking budget
var inferenceConfig *KiroInferenceConfig
if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
@@ -682,13 +682,28 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul
}
// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
-// Returns (thinkingEnabled, budgetTokens).
+// Returns thinkingEnabled.
// Supports:
// - reasoning_effort parameter (low/medium/high/auto)
// - Model name containing "thinking" or "reason"
// - tag in system prompt (AMP/Cursor format)
-func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
- var budgetTokens int64 = 16000 // Default budget
+func checkThinkingModeFromOpenAI(openaiBody []byte) bool {
+ return checkThinkingModeFromOpenAIWithHeaders(openaiBody, nil)
+}
+
+// checkThinkingModeFromOpenAIWithHeaders checks if thinking mode is enabled in the OpenAI request.
+// Returns thinkingEnabled.
+// Supports:
+// - Anthropic-Beta header with interleaved-thinking (Claude CLI)
+// - reasoning_effort parameter (low/medium/high/auto)
+// - Model name containing "thinking" or "reason"
+// - tag in system prompt (AMP/Cursor format)
+func checkThinkingModeFromOpenAIWithHeaders(openaiBody []byte, headers http.Header) bool {
+ // Check Anthropic-Beta header first (Claude CLI uses this)
+ if kiroclaude.IsThinkingEnabledFromHeader(headers) {
+ log.Debugf("kiro-openai: thinking mode enabled via Anthropic-Beta header")
+ return true
+ }
// Check OpenAI format: reasoning_effort parameter
// Valid values: "low", "medium", "high", "auto" (not "none")
@@ -697,18 +712,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
effort := reasoningEffort.String()
if effort != "" && effort != "none" {
log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
- // Adjust budget based on effort level
- switch effort {
- case "low":
- budgetTokens = 8000
- case "medium":
- budgetTokens = 16000
- case "high":
- budgetTokens = 32000
- case "auto":
- budgetTokens = 16000
- }
- return true, budgetTokens
+ return true
}
}
@@ -725,17 +729,7 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
- // Try to extract max_thinking_length if present
- if maxLenStart := strings.Index(bodyStr, ""); maxLenStart >= 0 {
- maxLenStart += len("")
- if maxLenEnd := strings.Index(bodyStr[maxLenStart:], ""); maxLenEnd >= 0 {
- maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
- if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
- log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
- }
- }
- }
- return true, budgetTokens
+ return true
}
}
}
@@ -746,13 +740,21 @@ func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
modelLower := strings.ToLower(model)
if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
- return true, budgetTokens
+ return true
}
log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
- return false, budgetTokens
+ return false
}
+// hasThinkingTagInBody checks if the request body already contains thinking configuration tags.
+// This is used to prevent duplicate injection when client (e.g., AMP/Cursor) already includes thinking config.
+func hasThinkingTagInBody(body []byte) bool {
+ bodyStr := string(body)
+ return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "")
+}
+
+
// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
// OpenAI tool_choice values:
// - "none": Don't use any tools
@@ -845,4 +847,4 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
}
}
return unique
-}
\ No newline at end of file
+}
diff --git a/internal/translator/kiro/openai/kiro_openai_response.go b/internal/translator/kiro/openai/kiro_openai_response.go
index b7da1373..edc70ad8 100644
--- a/internal/translator/kiro/openai/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/kiro_openai_response.go
@@ -21,12 +21,25 @@ var functionCallIDCounter uint64
// Supports tool_calls when tools are present in the response.
// stopReason is passed from upstream; fallback logic applied if empty.
func BuildOpenAIResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
+ return BuildOpenAIResponseWithReasoning(content, "", toolUses, model, usageInfo, stopReason)
+}
+
+// BuildOpenAIResponseWithReasoning constructs an OpenAI Chat Completions-compatible response with reasoning_content support.
+// Supports tool_calls when tools are present in the response.
+// reasoningContent is included as reasoning_content field in the message when present.
+// stopReason is passed from upstream; fallback logic applied if empty.
+func BuildOpenAIResponseWithReasoning(content, reasoningContent string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
// Build the message object
message := map[string]interface{}{
"role": "assistant",
"content": content,
}
+ // Add reasoning_content if present (for thinking/reasoning models)
+ if reasoningContent != "" {
+ message["reasoning_content"] = reasoningContent
+ }
+
// Add tool_calls if present
if len(toolUses) > 0 {
var toolCalls []map[string]interface{}