diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go
index 1d4d85a5..b9a38272 100644
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -19,6 +19,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude"
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
+ kiroopenai "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -161,6 +162,22 @@ type KiroExecutor struct {
refreshMu sync.Mutex // Serializes token refresh operations to prevent race conditions
}
+// buildKiroPayloadForFormat builds the Kiro API payload based on the source format.
+// This is critical because OpenAI and Claude formats have different tool structures:
+// - OpenAI: tools[].function.name, tools[].function.description
+// - Claude: tools[].name, tools[].description
+func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) []byte {
+ switch sourceFormat.String() {
+ case "openai":
+ log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String())
+ return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly)
+ default:
+ // Default to Claude format (also handles "claude", "kiro", etc.)
+ log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String())
+ return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly)
+ }
+}
+
// NewKiroExecutor creates a new Kiro executor instance.
func NewKiroExecutor(cfg *config.Config) *KiroExecutor {
return &KiroExecutor{cfg: cfg}
@@ -231,7 +248,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -341,7 +358,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed successfully, retrying request")
continue
}
@@ -398,7 +415,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed for 403, retrying request")
continue
}
@@ -537,7 +554,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -660,7 +677,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed successfully, retrying stream request")
continue
}
@@ -717,7 +734,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
- kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+ kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed for 403, retrying stream request")
continue
}
@@ -755,7 +772,20 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
}
}()
- e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter)
+ // Check if thinking mode was enabled in the original request
+ // Only parse tags when thinking was explicitly requested
+ // Check multiple sources: original request, pre-translation payload, and translated body
+ // This handles different client formats (Claude API, OpenAI, AMP/Cursor)
+ thinkingEnabled := kiroclaude.IsThinkingEnabled(opts.OriginalRequest)
+ if !thinkingEnabled {
+ thinkingEnabled = kiroclaude.IsThinkingEnabled(req.Payload)
+ }
+ if !thinkingEnabled {
+ thinkingEnabled = kiroclaude.IsThinkingEnabled(body)
+ }
+ log.Debugf("kiro: stream thinkingEnabled = %v", thinkingEnabled)
+
+ e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter, thinkingEnabled)
}(httpResp)
return out, nil
@@ -807,6 +837,153 @@ func kiroCredentials(auth *cliproxyauth.Auth) (accessToken, profileArn string) {
return accessToken, profileArn
}
+// findRealThinkingEndTag finds the real end tag, skipping false positives.
+// Returns -1 if no real end tag is found.
+//
+// Real tags from Kiro API have specific characteristics:
+// - Usually preceded by newline (.\n)
+// - Usually followed by newline (\n\n)
+// - Not inside code blocks or inline code
+//
+// False positives (discussion text) have characteristics:
+// - In the middle of a sentence
+// - Preceded by discussion words like "标签", "tag", "returns"
+// - Inside code blocks or inline code
+//
+// Parameters:
+// - content: the content to search in
+// - alreadyInCodeBlock: whether we're already inside a code block from previous chunks
+// - alreadyInInlineCode: whether we're already inside inline code from previous chunks
+func findRealThinkingEndTag(content string, alreadyInCodeBlock, alreadyInInlineCode bool) int {
+ searchStart := 0
+ for {
+ endIdx := strings.Index(content[searchStart:], kirocommon.ThinkingEndTag)
+ if endIdx < 0 {
+ return -1
+ }
+ endIdx += searchStart // Adjust to absolute position
+
+ textBeforeEnd := content[:endIdx]
+ textAfterEnd := content[endIdx+len(kirocommon.ThinkingEndTag):]
+
+ // Check 1: Is it inside inline code?
+ // Count backticks in current content and add state from previous chunks
+ backtickCount := strings.Count(textBeforeEnd, "`")
+ effectiveInInlineCode := alreadyInInlineCode
+ if backtickCount%2 == 1 {
+ effectiveInInlineCode = !effectiveInInlineCode
+ }
+ if effectiveInInlineCode {
+ log.Debugf("kiro: found inside inline code at pos %d, skipping", endIdx)
+ searchStart = endIdx + len(kirocommon.ThinkingEndTag)
+ continue
+ }
+
+ // Check 2: Is it inside a code block?
+ // Count fences in current content and add state from previous chunks
+ fenceCount := strings.Count(textBeforeEnd, "```")
+ altFenceCount := strings.Count(textBeforeEnd, "~~~")
+ effectiveInCodeBlock := alreadyInCodeBlock
+ if fenceCount%2 == 1 || altFenceCount%2 == 1 {
+ effectiveInCodeBlock = !effectiveInCodeBlock
+ }
+ if effectiveInCodeBlock {
+ log.Debugf("kiro: found inside code block at pos %d, skipping", endIdx)
+ searchStart = endIdx + len(kirocommon.ThinkingEndTag)
+ continue
+ }
+
+ // Check 3: Real tags are usually preceded by newline or at start
+ // and followed by newline or at end. Check the format.
+ charBeforeTag := byte(0)
+ if endIdx > 0 {
+ charBeforeTag = content[endIdx-1]
+ }
+ charAfterTag := byte(0)
+ if len(textAfterEnd) > 0 {
+ charAfterTag = textAfterEnd[0]
+ }
+
+ // Real end tag format: preceded by newline OR end of sentence (. ! ?)
+ // and followed by newline OR end of content
+ isPrecededByNewlineOrSentenceEnd := charBeforeTag == '\n' || charBeforeTag == '.' ||
+ charBeforeTag == '!' || charBeforeTag == '?' || charBeforeTag == 0
+ isFollowedByNewlineOrEnd := charAfterTag == '\n' || charAfterTag == 0
+
+ // If the tag has proper formatting (newline before/after), it's likely real
+ if isPrecededByNewlineOrSentenceEnd && isFollowedByNewlineOrEnd {
+ log.Debugf("kiro: found properly formatted at pos %d", endIdx)
+ return endIdx
+ }
+
+ // Check 4: Is the tag preceded by discussion keywords on the same line?
+ lastNewlineIdx := strings.LastIndex(textBeforeEnd, "\n")
+ lineBeforeTag := textBeforeEnd
+ if lastNewlineIdx >= 0 {
+ lineBeforeTag = textBeforeEnd[lastNewlineIdx+1:]
+ }
+ lineBeforeTagLower := strings.ToLower(lineBeforeTag)
+
+ // Discussion patterns - if found, this is likely discussion text
+ discussionPatterns := []string{
+ "标签", "返回", "输出", "包含", "使用", "解析", "转换", "生成", // Chinese
+ "tag", "return", "output", "contain", "use", "parse", "emit", "convert", "generate", // English
+ "", // discussing both tags together
+ "``", // explicitly in inline code
+ }
+ isDiscussion := false
+ for _, pattern := range discussionPatterns {
+ if strings.Contains(lineBeforeTagLower, pattern) {
+ isDiscussion = true
+ break
+ }
+ }
+ if isDiscussion {
+ log.Debugf("kiro: found after discussion text at pos %d, skipping", endIdx)
+ searchStart = endIdx + len(kirocommon.ThinkingEndTag)
+ continue
+ }
+
+ // Check 5: Is there text immediately after on the same line?
+ // Real end tags don't have text immediately after on the same line
+ if len(textAfterEnd) > 0 && charAfterTag != '\n' && charAfterTag != 0 {
+ // Find the next newline
+ nextNewline := strings.Index(textAfterEnd, "\n")
+ var textOnSameLine string
+ if nextNewline >= 0 {
+ textOnSameLine = textAfterEnd[:nextNewline]
+ } else {
+ textOnSameLine = textAfterEnd
+ }
+ // If there's non-whitespace text on the same line after the tag, it's discussion
+ if strings.TrimSpace(textOnSameLine) != "" {
+ log.Debugf("kiro: found with text after on same line at pos %d, skipping", endIdx)
+ searchStart = endIdx + len(kirocommon.ThinkingEndTag)
+ continue
+ }
+ }
+
+ // Check 6: Is there another tag after this ?
+ if strings.Contains(textAfterEnd, kirocommon.ThinkingStartTag) {
+ nextStartIdx := strings.Index(textAfterEnd, kirocommon.ThinkingStartTag)
+ textBeforeNextStart := textAfterEnd[:nextStartIdx]
+ nextBacktickCount := strings.Count(textBeforeNextStart, "`")
+ nextFenceCount := strings.Count(textBeforeNextStart, "```")
+ nextAltFenceCount := strings.Count(textBeforeNextStart, "~~~")
+
+ // If the next is NOT in code, then this is discussion text
+ if nextBacktickCount%2 == 0 && nextFenceCount%2 == 0 && nextAltFenceCount%2 == 0 {
+ log.Debugf("kiro: found followed by at pos %d, likely discussion text, skipping", endIdx)
+ searchStart = endIdx + len(kirocommon.ThinkingEndTag)
+ continue
+ }
+ }
+
+ // This looks like a real end tag
+ return endIdx
+ }
+}
+
// determineAgenticMode determines if the model is an agentic or chat-only variant.
// Returns (isAgentic, isChatOnly) based on model name suffixes.
func determineAgenticMode(model string) (isAgentic, isChatOnly bool) {
@@ -963,6 +1140,9 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
processedIDs := make(map[string]bool)
var currentToolUse *kiroclaude.ToolUseState
+ // Upstream usage tracking - Kiro API returns credit usage and context percentage
+ var upstreamContextPercentage float64 // Context usage percentage from upstream (e.g., 78.56)
+
for {
msg, eventErr := e.readEventStreamMessage(reader)
if eventErr != nil {
@@ -1119,6 +1299,119 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
stopReason = sr
log.Debugf("kiro: parseEventStream found stopReason in messageStopEvent: %s", stopReason)
}
+
+ case "messageMetadataEvent":
+ // Handle message metadata events which may contain token counts
+ if metadata, ok := event["messageMetadataEvent"].(map[string]interface{}); ok {
+ if inputTokens, ok := metadata["inputTokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: parseEventStream found inputTokens in messageMetadataEvent: %d", usageInfo.InputTokens)
+ }
+ if outputTokens, ok := metadata["outputTokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: parseEventStream found outputTokens in messageMetadataEvent: %d", usageInfo.OutputTokens)
+ }
+ if totalTokens, ok := metadata["totalTokens"].(float64); ok {
+ usageInfo.TotalTokens = int64(totalTokens)
+ log.Debugf("kiro: parseEventStream found totalTokens in messageMetadataEvent: %d", usageInfo.TotalTokens)
+ }
+ }
+
+ case "usageEvent", "usage":
+ // Handle dedicated usage events
+ if inputTokens, ok := event["inputTokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: parseEventStream found inputTokens in usageEvent: %d", usageInfo.InputTokens)
+ }
+ if outputTokens, ok := event["outputTokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: parseEventStream found outputTokens in usageEvent: %d", usageInfo.OutputTokens)
+ }
+ if totalTokens, ok := event["totalTokens"].(float64); ok {
+ usageInfo.TotalTokens = int64(totalTokens)
+ log.Debugf("kiro: parseEventStream found totalTokens in usageEvent: %d", usageInfo.TotalTokens)
+ }
+ // Also check nested usage object
+ if usageObj, ok := event["usage"].(map[string]interface{}); ok {
+ if inputTokens, ok := usageObj["input_tokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ }
+ if outputTokens, ok := usageObj["output_tokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ }
+ if totalTokens, ok := usageObj["total_tokens"].(float64); ok {
+ usageInfo.TotalTokens = int64(totalTokens)
+ }
+ log.Debugf("kiro: parseEventStream found usage object: input=%d, output=%d, total=%d",
+ usageInfo.InputTokens, usageInfo.OutputTokens, usageInfo.TotalTokens)
+ }
+
+ case "metricsEvent":
+ // Handle metrics events which may contain usage data
+ if metrics, ok := event["metricsEvent"].(map[string]interface{}); ok {
+ if inputTokens, ok := metrics["inputTokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ }
+ if outputTokens, ok := metrics["outputTokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ }
+ log.Debugf("kiro: parseEventStream found metricsEvent: input=%d, output=%d",
+ usageInfo.InputTokens, usageInfo.OutputTokens)
+ }
+
+ default:
+ // Check for contextUsagePercentage in any event
+ if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+ upstreamContextPercentage = ctxPct
+ log.Debugf("kiro: parseEventStream received context usage: %.2f%%", upstreamContextPercentage)
+ }
+ // Log unknown event types for debugging (to discover new event formats)
+ log.Debugf("kiro: parseEventStream unknown event type: %s, payload: %s", eventType, string(payload))
+ }
+
+ // Check for direct token fields in any event (fallback)
+ if usageInfo.InputTokens == 0 {
+ if inputTokens, ok := event["inputTokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: parseEventStream found direct inputTokens: %d", usageInfo.InputTokens)
+ }
+ }
+ if usageInfo.OutputTokens == 0 {
+ if outputTokens, ok := event["outputTokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: parseEventStream found direct outputTokens: %d", usageInfo.OutputTokens)
+ }
+ }
+
+ // Check for usage object in any event (OpenAI format)
+ if usageInfo.InputTokens == 0 || usageInfo.OutputTokens == 0 {
+ if usageObj, ok := event["usage"].(map[string]interface{}); ok {
+ if usageInfo.InputTokens == 0 {
+ if inputTokens, ok := usageObj["input_tokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok {
+ usageInfo.InputTokens = int64(inputTokens)
+ }
+ }
+ if usageInfo.OutputTokens == 0 {
+ if outputTokens, ok := usageObj["output_tokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok {
+ usageInfo.OutputTokens = int64(outputTokens)
+ }
+ }
+ if usageInfo.TotalTokens == 0 {
+ if totalTokens, ok := usageObj["total_tokens"].(float64); ok {
+ usageInfo.TotalTokens = int64(totalTokens)
+ }
+ }
+ log.Debugf("kiro: parseEventStream found usage object (fallback): input=%d, output=%d, total=%d",
+ usageInfo.InputTokens, usageInfo.OutputTokens, usageInfo.TotalTokens)
+ }
}
// Also check nested supplementaryWebLinksEvent
@@ -1157,6 +1450,20 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
log.Warnf("kiro: response truncated due to max_tokens limit")
}
+ // Use contextUsagePercentage to calculate more accurate input tokens
+ // Kiro model has 200k max context, contextUsagePercentage represents the percentage used
+ // Formula: input_tokens = contextUsagePercentage * 200000 / 100
+ if upstreamContextPercentage > 0 {
+ calculatedInputTokens := int64(upstreamContextPercentage * 200000 / 100)
+ if calculatedInputTokens > 0 {
+ localEstimate := usageInfo.InputTokens
+ usageInfo.InputTokens = calculatedInputTokens
+ usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
+ log.Infof("kiro: parseEventStream using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
+ upstreamContextPercentage, calculatedInputTokens, localEstimate)
+ }
+ }
+
return cleanedContent, toolUses, usageInfo, stopReason, nil
}
@@ -1357,7 +1664,8 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string {
// Includes embedded [Called ...] tool call parsing and input buffering for toolUseEvent.
// Implements duplicate content filtering using lastContentEvent detection (based on AIClient-2-API).
// Extracts stop_reason from upstream events when available.
-func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) {
+// thinkingEnabled controls whether tags are parsed - only parse when request enabled thinking.
+func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter, thinkingEnabled bool) {
reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
var totalUsage usage.Detail
var hasToolUses bool // Track if any tool uses were emitted
@@ -1383,6 +1691,11 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
var lastUsageUpdateTime = time.Now() // Last time usage update was sent
var lastReportedOutputTokens int64 // Last reported output token count
+ // Upstream usage tracking - Kiro API returns credit usage and context percentage
+ var upstreamCreditUsage float64 // Credit usage from upstream (e.g., 1.458)
+ var upstreamContextPercentage float64 // Context usage percentage from upstream (e.g., 78.56)
+ var hasUpstreamUsage bool // Whether we received usage from upstream
+
// Translator param for maintaining tool call state across streaming events
// IMPORTANT: This must persist across all TranslateStream calls
var translatorParam any
@@ -1395,6 +1708,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
isThinkingBlockOpen := false // Track if thinking content block is open
thinkingBlockIndex := -1 // Index of the thinking content block
+ // Code block state tracking for heuristic thinking tag parsing
+ // When inside a markdown code block, tags should NOT be parsed
+ // This prevents false positives when the model outputs code examples containing these tags
+ inCodeBlock := false
+ codeFenceType := "" // Track which fence type opened the block ("```" or "~~~")
+
+ // Inline code state tracking - when inside backticks, don't parse thinking tags
+ // This handles cases like `` being discussed in text
+ inInlineCode := false
+
+ // Track if we've seen any non-whitespace content before a thinking tag
+ // Real thinking blocks from Kiro always start at the very beginning of the response
+ // If we see content before , subsequent tags are likely discussion text
+ hasSeenNonThinkingContent := false
+ thinkingBlockCompleted := false // Track if we've already completed a thinking block
+
// Pre-calculate input tokens from request if possible
// Kiro uses Claude format, so try Claude format first, then OpenAI format, then fallback
if enc, err := getTokenizer(model); err == nil {
@@ -1629,6 +1958,66 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
log.Debugf("kiro: streamToChannel found stopReason in messageStopEvent: %s", upstreamStopReason)
}
+ default:
+ // Check for upstream usage events from Kiro API
+ // Format: {"unit":"credit","unitPlural":"credits","usage":1.458}
+ if unit, ok := event["unit"].(string); ok && unit == "credit" {
+ if usage, ok := event["usage"].(float64); ok {
+ upstreamCreditUsage = usage
+ hasUpstreamUsage = true
+ log.Debugf("kiro: received upstream credit usage: %.4f", upstreamCreditUsage)
+ }
+ }
+ // Format: {"contextUsagePercentage":78.56}
+ if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+ upstreamContextPercentage = ctxPct
+ log.Debugf("kiro: received upstream context usage: %.2f%%", upstreamContextPercentage)
+ }
+
+ // Check for token counts in unknown events
+ if inputTokens, ok := event["inputTokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ hasUpstreamUsage = true
+ log.Debugf("kiro: streamToChannel found inputTokens in event %s: %d", eventType, totalUsage.InputTokens)
+ }
+ if outputTokens, ok := event["outputTokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ hasUpstreamUsage = true
+ log.Debugf("kiro: streamToChannel found outputTokens in event %s: %d", eventType, totalUsage.OutputTokens)
+ }
+ if totalTokens, ok := event["totalTokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ log.Debugf("kiro: streamToChannel found totalTokens in event %s: %d", eventType, totalUsage.TotalTokens)
+ }
+
+ // Check for usage object in unknown events (OpenAI/Claude format)
+ if usageObj, ok := event["usage"].(map[string]interface{}); ok {
+ if inputTokens, ok := usageObj["input_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ hasUpstreamUsage = true
+ } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ hasUpstreamUsage = true
+ }
+ if outputTokens, ok := usageObj["output_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ hasUpstreamUsage = true
+ } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ hasUpstreamUsage = true
+ }
+ if totalTokens, ok := usageObj["total_tokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ }
+ log.Debugf("kiro: streamToChannel found usage object in event %s: input=%d, output=%d, total=%d",
+ eventType, totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
+ }
+
+ // Log unknown event types for debugging (to discover new event formats)
+ if eventType != "" {
+ log.Debugf("kiro: streamToChannel unknown event type: %s, payload: %s", eventType, string(payload))
+ }
+
case "assistantResponseEvent":
var contentDelta string
var toolUses []map[string]interface{}
@@ -1742,9 +2131,243 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
for len(remaining) > 0 {
+ // CRITICAL FIX: Only parse tags when thinking mode was enabled in the request.
+ // When thinking is NOT enabled, tags in responses should be treated as
+ // regular text content, not as thinking blocks. This prevents normal text content
+ // from being incorrectly parsed as thinking when the model outputs tags
+ // without the user requesting thinking mode.
+ if !thinkingEnabled {
+ // Thinking not enabled - emit all content as regular text without parsing tags
+ if remaining != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ break // Exit the for loop - all content processed as text
+ }
+
+ // HEURISTIC FIX: Track code block and inline code state to avoid parsing tags
+ // inside code contexts. When the model outputs code examples containing these tags,
+ // they should be treated as text.
+ if !inThinkBlock {
+ // Check for inline code backticks first (higher priority than code fences)
+ // This handles cases like `` being discussed in text
+ backtickIdx := strings.Index(remaining, kirocommon.InlineCodeMarker)
+ thinkingIdx := strings.Index(remaining, kirocommon.ThinkingStartTag)
+
+ // If backtick comes before thinking tag, handle inline code
+ if backtickIdx >= 0 && (thinkingIdx < 0 || backtickIdx < thinkingIdx) {
+ if inInlineCode {
+ // Closing backtick - emit content up to and including backtick, exit inline code
+ textToEmit := remaining[:backtickIdx+1]
+ if textToEmit != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ remaining = remaining[backtickIdx+1:]
+ inInlineCode = false
+ continue
+ } else {
+ // Opening backtick - emit content before backtick, enter inline code
+ textToEmit := remaining[:backtickIdx+1]
+ if textToEmit != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ remaining = remaining[backtickIdx+1:]
+ inInlineCode = true
+ continue
+ }
+ }
+
+ // If inside inline code, emit all content as text (don't parse thinking tags)
+ if inInlineCode {
+ if remaining != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ break // Exit loop - remaining content is inside inline code
+ }
+
+ // Check for code fence markers (``` or ~~~) to toggle code block state
+ fenceIdx := strings.Index(remaining, kirocommon.CodeFenceMarker)
+ altFenceIdx := strings.Index(remaining, kirocommon.AltCodeFenceMarker)
+
+ // Find the earliest fence marker
+ earliestFenceIdx := -1
+ earliestFenceType := ""
+ if fenceIdx >= 0 && (altFenceIdx < 0 || fenceIdx < altFenceIdx) {
+ earliestFenceIdx = fenceIdx
+ earliestFenceType = kirocommon.CodeFenceMarker
+ } else if altFenceIdx >= 0 {
+ earliestFenceIdx = altFenceIdx
+ earliestFenceType = kirocommon.AltCodeFenceMarker
+ }
+
+ if earliestFenceIdx >= 0 {
+ // Check if this fence comes before any thinking tag
+ thinkingIdx := strings.Index(remaining, kirocommon.ThinkingStartTag)
+ if inCodeBlock {
+ // Inside code block - check if this fence closes it
+ if earliestFenceType == codeFenceType {
+ // This fence closes the code block
+ // Emit content up to and including the fence as text
+ textToEmit := remaining[:earliestFenceIdx+len(earliestFenceType)]
+ if textToEmit != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ remaining = remaining[earliestFenceIdx+len(earliestFenceType):]
+ inCodeBlock = false
+ codeFenceType = ""
+ log.Debugf("kiro: exited code block")
+ continue
+ }
+ } else if thinkingIdx < 0 || earliestFenceIdx < thinkingIdx {
+ // Not in code block, and fence comes before thinking tag (or no thinking tag)
+ // Emit content up to and including the fence as text, then enter code block
+ textToEmit := remaining[:earliestFenceIdx+len(earliestFenceType)]
+ if textToEmit != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ remaining = remaining[earliestFenceIdx+len(earliestFenceType):]
+ inCodeBlock = true
+ codeFenceType = earliestFenceType
+ log.Debugf("kiro: entered code block with fence: %s", earliestFenceType)
+ continue
+ }
+ }
+
+ // If inside code block, emit all content as text (don't parse thinking tags)
+ if inCodeBlock {
+ if remaining != "" {
+ if !isTextBlockOpen {
+ contentBlockIndex++
+ isTextBlockOpen = true
+ blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex)
+ sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+ for _, chunk := range sseData {
+ if chunk != "" {
+ out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+ }
+ }
+ }
+ break // Exit loop - all remaining content is inside code block
+ }
+ }
+
if inThinkBlock {
// Inside thinking block - look for end tag
- endIdx := strings.Index(remaining, kirocommon.ThinkingEndTag)
+ // CRITICAL FIX: Skip tags that are not the real end tag
+ // This prevents false positives when thinking content discusses these tags
+ // Pass current code block/inline code state for accurate detection
+ endIdx := findRealThinkingEndTag(remaining, inCodeBlock, inInlineCode)
+
if endIdx >= 0 {
// Found end tag - emit any content before end tag, then close block
thinkContent := remaining[:endIdx]
@@ -1790,8 +2413,9 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
inThinkBlock = false
+ thinkingBlockCompleted = true // Mark that we've completed a thinking block
remaining = remaining[endIdx+len(kirocommon.ThinkingEndTag):]
- log.Debugf("kiro: exited thinking block")
+ log.Debugf("kiro: exited thinking block, subsequent tags will be treated as text")
} else {
// No end tag found - TRUE STREAMING: emit content immediately
// Only save potential partial tag length for next iteration
@@ -1837,11 +2461,29 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
} else {
// Outside thinking block - look for start tag
- startIdx := strings.Index(remaining, kirocommon.ThinkingStartTag)
+ // CRITICAL FIX: Only parse tags at the very beginning of the response
+ // or if we haven't completed a thinking block yet.
+ // After a thinking block is completed, subsequent tags are likely
+ // discussion text (e.g., "Kiro returns `` tags") and should NOT be parsed.
+ startIdx := -1
+ if !thinkingBlockCompleted && !hasSeenNonThinkingContent {
+ startIdx = strings.Index(remaining, kirocommon.ThinkingStartTag)
+ // If there's non-whitespace content before the tag, it's not a real thinking block
+ if startIdx > 0 {
+ textBefore := remaining[:startIdx]
+ if strings.TrimSpace(textBefore) != "" {
+ // There's real content before the tag - this is discussion text, not thinking
+ hasSeenNonThinkingContent = true
+ startIdx = -1
+ log.Debugf("kiro: found tag after non-whitespace content, treating as text")
+ }
+ }
+ }
if startIdx >= 0 {
// Found start tag - emit text before it and switch to thinking mode
textBefore := remaining[:startIdx]
if textBefore != "" {
+ // Only whitespace before thinking tag is allowed
// Start text content block if needed
if !isTextBlockOpen {
contentBlockIndex++
@@ -1881,11 +2523,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
log.Debugf("kiro: entered thinking block")
} else {
// No start tag found - check for partial start tag at buffer end
- pendingStart := kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingStartTag)
+ // Only check for partial tags if we haven't completed a thinking block yet
+ pendingStart := 0
+ if !thinkingBlockCompleted && !hasSeenNonThinkingContent {
+ pendingStart = kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingStartTag)
+ }
if pendingStart > 0 {
// Emit text except potential partial tag
textToEmit := remaining[:len(remaining)-pendingStart]
if textToEmit != "" {
+ // Mark that we've seen non-thinking content
+ if strings.TrimSpace(textToEmit) != "" {
+ hasSeenNonThinkingContent = true
+ }
// Start text content block if needed
if !isTextBlockOpen {
contentBlockIndex++
@@ -1912,6 +2562,10 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
} else {
// No partial tag - emit all as text
if remaining != "" {
+ // Mark that we've seen non-thinking content
+ if strings.TrimSpace(remaining) != "" {
+ hasSeenNonThinkingContent = true
+ }
// Start text content block if needed
if !isTextBlockOpen {
contentBlockIndex++
@@ -2065,6 +2719,69 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
if outputTokens, ok := event["outputTokens"].(float64); ok {
totalUsage.OutputTokens = int64(outputTokens)
}
+
+ case "messageMetadataEvent":
+ // Handle message metadata events which may contain token counts
+ if metadata, ok := event["messageMetadataEvent"].(map[string]interface{}); ok {
+ if inputTokens, ok := metadata["inputTokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: streamToChannel found inputTokens in messageMetadataEvent: %d", totalUsage.InputTokens)
+ }
+ if outputTokens, ok := metadata["outputTokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: streamToChannel found outputTokens in messageMetadataEvent: %d", totalUsage.OutputTokens)
+ }
+ if totalTokens, ok := metadata["totalTokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ log.Debugf("kiro: streamToChannel found totalTokens in messageMetadataEvent: %d", totalUsage.TotalTokens)
+ }
+ }
+
+ case "usageEvent", "usage":
+ // Handle dedicated usage events
+ if inputTokens, ok := event["inputTokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: streamToChannel found inputTokens in usageEvent: %d", totalUsage.InputTokens)
+ }
+ if outputTokens, ok := event["outputTokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: streamToChannel found outputTokens in usageEvent: %d", totalUsage.OutputTokens)
+ }
+ if totalTokens, ok := event["totalTokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ log.Debugf("kiro: streamToChannel found totalTokens in usageEvent: %d", totalUsage.TotalTokens)
+ }
+ // Also check nested usage object
+ if usageObj, ok := event["usage"].(map[string]interface{}); ok {
+ if inputTokens, ok := usageObj["input_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ }
+ if outputTokens, ok := usageObj["output_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ }
+ if totalTokens, ok := usageObj["total_tokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ }
+ log.Debugf("kiro: streamToChannel found usage object: input=%d, output=%d, total=%d",
+ totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
+ }
+
+ case "metricsEvent":
+ // Handle metrics events which may contain usage data
+ if metrics, ok := event["metricsEvent"].(map[string]interface{}); ok {
+ if inputTokens, ok := metrics["inputTokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ }
+ if outputTokens, ok := metrics["outputTokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ }
+ log.Debugf("kiro: streamToChannel found metricsEvent: input=%d, output=%d",
+ totalUsage.InputTokens, totalUsage.OutputTokens)
+ }
}
// Check nested usage event
@@ -2076,6 +2793,47 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
totalUsage.OutputTokens = int64(outputTokens)
}
}
+
+ // Check for direct token fields in any event (fallback)
+ if totalUsage.InputTokens == 0 {
+ if inputTokens, ok := event["inputTokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ log.Debugf("kiro: streamToChannel found direct inputTokens: %d", totalUsage.InputTokens)
+ }
+ }
+ if totalUsage.OutputTokens == 0 {
+ if outputTokens, ok := event["outputTokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ log.Debugf("kiro: streamToChannel found direct outputTokens: %d", totalUsage.OutputTokens)
+ }
+ }
+
+ // Check for usage object in any event (OpenAI format)
+ if totalUsage.InputTokens == 0 || totalUsage.OutputTokens == 0 {
+ if usageObj, ok := event["usage"].(map[string]interface{}); ok {
+ if totalUsage.InputTokens == 0 {
+ if inputTokens, ok := usageObj["input_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok {
+ totalUsage.InputTokens = int64(inputTokens)
+ }
+ }
+ if totalUsage.OutputTokens == 0 {
+ if outputTokens, ok := usageObj["output_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok {
+ totalUsage.OutputTokens = int64(outputTokens)
+ }
+ }
+ if totalUsage.TotalTokens == 0 {
+ if totalTokens, ok := usageObj["total_tokens"].(float64); ok {
+ totalUsage.TotalTokens = int64(totalTokens)
+ }
+ }
+ log.Debugf("kiro: streamToChannel found usage object (fallback): input=%d, output=%d, total=%d",
+ totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
+ }
+ }
}
// Close content block if open
@@ -2120,8 +2878,35 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
totalUsage.OutputTokens = 1
}
}
+
+ // Use contextUsagePercentage to calculate more accurate input tokens
+ // Kiro model has 200k max context, contextUsagePercentage represents the percentage used
+ // Formula: input_tokens = contextUsagePercentage * 200000 / 100
+ // Note: The effective input context is ~170k (200k - 30k reserved for output)
+ if upstreamContextPercentage > 0 {
+ // Calculate input tokens from context percentage
+ // Using 200k as the base since that's what Kiro reports against
+ calculatedInputTokens := int64(upstreamContextPercentage * 200000 / 100)
+
+ // Only use calculated value if it's significantly different from local estimate
+ // This provides more accurate token counts based on upstream data
+ if calculatedInputTokens > 0 {
+ localEstimate := totalUsage.InputTokens
+ totalUsage.InputTokens = calculatedInputTokens
+ log.Infof("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)",
+ upstreamContextPercentage, calculatedInputTokens, localEstimate)
+ }
+ }
+
totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens
+ // Log upstream usage information if received
+ if hasUpstreamUsage {
+ log.Infof("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d",
+ upstreamCreditUsage, upstreamContextPercentage,
+ totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens)
+ }
+
// Determine stop reason: prefer upstream, then detect tool_use, default to end_turn
stopReason := upstreamStopReason
if stopReason == "" {
@@ -2162,10 +2947,48 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// NOTE: Claude SSE event builders moved to internal/translator/kiro/claude/kiro_claude_stream.go
// The executor now uses kiroclaude.BuildClaude*Event() functions instead
-// CountTokens is not supported for Kiro provider.
-// Kiro/Amazon Q backend doesn't expose a token counting API.
-func (e *KiroExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
- return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for kiro"}
+// CountTokens counts tokens locally using tiktoken since Kiro API doesn't expose a token counting endpoint.
+// This provides approximate token counts for client requests.
+func (e *KiroExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+ // Use tiktoken for local token counting
+ enc, err := getTokenizer(req.Model)
+ if err != nil {
+ log.Warnf("kiro: CountTokens failed to get tokenizer: %v, falling back to estimate", err)
+ // Fallback: estimate from payload size (roughly 4 chars per token)
+ estimatedTokens := len(req.Payload) / 4
+ if estimatedTokens == 0 && len(req.Payload) > 0 {
+ estimatedTokens = 1
+ }
+ return cliproxyexecutor.Response{
+ Payload: []byte(fmt.Sprintf(`{"count":%d}`, estimatedTokens)),
+ }, nil
+ }
+
+ // Try to count tokens from the request payload
+ var totalTokens int64
+
+ // Try OpenAI chat format first
+ if tokens, countErr := countOpenAIChatTokens(enc, req.Payload); countErr == nil && tokens > 0 {
+ totalTokens = tokens
+ log.Debugf("kiro: CountTokens counted %d tokens using OpenAI chat format", totalTokens)
+ } else {
+ // Fallback: count raw payload tokens
+ if tokenCount, countErr := enc.Count(string(req.Payload)); countErr == nil {
+ totalTokens = int64(tokenCount)
+ log.Debugf("kiro: CountTokens counted %d tokens from raw payload", totalTokens)
+ } else {
+ // Final fallback: estimate from payload size
+ totalTokens = int64(len(req.Payload) / 4)
+ if totalTokens == 0 && len(req.Payload) > 0 {
+ totalTokens = 1
+ }
+ log.Debugf("kiro: CountTokens estimated %d tokens from payload size", totalTokens)
+ }
+ }
+
+ return cliproxyexecutor.Response{
+ Payload: []byte(fmt.Sprintf(`{"count":%d}`, totalTokens)),
+ }, nil
}
// Refresh refreshes the Kiro OAuth token.
diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go
index 07472be4..ae42b186 100644
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -30,6 +30,7 @@ type KiroPayload struct {
type KiroInferenceConfig struct {
MaxTokens int `json:"maxTokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
+ TopP float64 `json:"topP,omitempty"`
}
// KiroConversationState holds the conversation context
@@ -136,9 +137,15 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Extract max_tokens for potential use in inferenceConfig
+ // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
+ const kiroMaxOutputTokens = 32000
var maxTokens int64
if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() {
maxTokens = mt.Int()
+ if maxTokens == -1 {
+ maxTokens = kiroMaxOutputTokens
+ log.Debugf("kiro: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
+ }
}
// Extract temperature if specified
@@ -149,6 +156,15 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
hasTemperature = true
}
+ // Extract top_p if specified
+ var topP float64
+ var hasTopP bool
+ if tp := gjson.GetBytes(claudeBody, "top_p"); tp.Exists() {
+ topP = tp.Float()
+ hasTopP = true
+ log.Debugf("kiro: extracted top_p: %.2f", topP)
+ }
+
// Normalize origin value for Kiro API compatibility
origin = normalizeOrigin(origin)
log.Debugf("kiro: normalized origin value: %s", origin)
@@ -164,8 +180,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Extract system prompt
systemPrompt := extractSystemPrompt(claudeBody)
- // Check for thinking mode
- thinkingEnabled, budgetTokens := checkThinkingMode(claudeBody)
+ // Check for thinking mode using the comprehensive IsThinkingEnabled function
+ // This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
+ thinkingEnabled := IsThinkingEnabled(claudeBody)
+ _, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
+ if budgetTokens <= 0 {
+ // Calculate budgetTokens based on max_tokens if available
+ // Use 50% of max_tokens for thinking, with min 8000 and max 24000
+ if maxTokens > 0 {
+ budgetTokens = maxTokens / 2
+ if budgetTokens < 8000 {
+ budgetTokens = 8000
+ }
+ if budgetTokens > 24000 {
+ budgetTokens = 24000
+ }
+ log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
+ } else {
+ budgetTokens = 16000 // Default budget tokens
+ }
+ }
// Inject timestamp context
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
@@ -185,6 +219,17 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
systemPrompt += kirocommon.KiroAgenticSystemPrompt
}
+ // Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
+ // Claude tool_choice values: {"type": "auto/any/tool", "name": "..."}
+ toolChoiceHint := extractClaudeToolChoiceHint(claudeBody)
+ if toolChoiceHint != "" {
+ if systemPrompt != "" {
+ systemPrompt += "\n"
+ }
+ systemPrompt += toolChoiceHint
+ log.Debugf("kiro: injected tool_choice hint into system prompt")
+ }
+
// Inject thinking hint when thinking mode is enabled
if thinkingEnabled {
if systemPrompt != "" {
@@ -235,7 +280,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Build inferenceConfig if we have any inference parameters
var inferenceConfig *KiroInferenceConfig
- if maxTokens > 0 || hasTemperature {
+ if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
if maxTokens > 0 {
inferenceConfig.MaxTokens = int(maxTokens)
@@ -243,6 +288,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
if hasTemperature {
inferenceConfig.Temperature = temperature
}
+ if hasTopP {
+ inferenceConfig.TopP = topP
+ }
}
payload := KiroPayload{
@@ -324,6 +372,93 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
return thinkingEnabled, budgetTokens
}
+// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
+// This is used by the executor to determine whether to parse tags in responses.
+// When thinking is NOT enabled in the request, tags in responses should be
+// treated as regular text content, not as thinking blocks.
+//
+// Supports multiple formats:
+// - Claude API format: thinking.type = "enabled"
+// - OpenAI format: reasoning_effort parameter
+// - AMP/Cursor format: interleaved in system prompt
+func IsThinkingEnabled(body []byte) bool {
+ // Check Claude API format first (thinking.type = "enabled")
+ enabled, _ := checkThinkingMode(body)
+ if enabled {
+ log.Debugf("kiro: IsThinkingEnabled returning true (Claude API format)")
+ return true
+ }
+
+ // Check OpenAI format: reasoning_effort parameter
+ // Valid values: "low", "medium", "high", "auto" (not "none")
+ reasoningEffort := gjson.GetBytes(body, "reasoning_effort")
+ if reasoningEffort.Exists() {
+ effort := reasoningEffort.String()
+ if effort != "" && effort != "none" {
+ log.Debugf("kiro: thinking mode enabled via OpenAI reasoning_effort: %s", effort)
+ return true
+ }
+ }
+
+ // Check AMP/Cursor format: interleaved in system prompt
+ // This is how AMP client passes thinking configuration
+ bodyStr := string(body)
+ if strings.Contains(bodyStr, "") && strings.Contains(bodyStr, "") {
+ // Extract thinking mode value
+ startTag := ""
+ endTag := ""
+ startIdx := strings.Index(bodyStr, startTag)
+ if startIdx >= 0 {
+ startIdx += len(startTag)
+ endIdx := strings.Index(bodyStr[startIdx:], endTag)
+ if endIdx >= 0 {
+ thinkingMode := bodyStr[startIdx : startIdx+endIdx]
+ if thinkingMode == "interleaved" || thinkingMode == "enabled" {
+ log.Debugf("kiro: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
+ return true
+ }
+ }
+ }
+ }
+
+ // Check OpenAI format: max_completion_tokens with reasoning (o1-style)
+ // Some clients use this to indicate reasoning mode
+ if gjson.GetBytes(body, "max_completion_tokens").Exists() {
+ // If max_completion_tokens is set, check if model name suggests reasoning
+ model := gjson.GetBytes(body, "model").String()
+ if strings.Contains(strings.ToLower(model), "thinking") ||
+ strings.Contains(strings.ToLower(model), "reason") {
+ log.Debugf("kiro: thinking mode enabled via model name hint: %s", model)
+ return true
+ }
+ }
+
+ log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)")
+ return false
+}
+
+// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
+// MCP tools often have long names like "mcp__server-name__tool-name".
+// This preserves the "mcp__" prefix and last segment when possible.
+func shortenToolNameIfNeeded(name string) string {
+ const limit = 64
+ if len(name) <= limit {
+ return name
+ }
+ // For MCP tools, try to preserve prefix and last segment
+ if strings.HasPrefix(name, "mcp__") {
+ idx := strings.LastIndex(name, "__")
+ if idx > 0 {
+ cand := "mcp__" + name[idx+2:]
+ if len(cand) > limit {
+ return cand[:limit]
+ }
+ return cand
+ }
+ }
+ return name[:limit]
+}
+
// convertClaudeToolsToKiro converts Claude tools to Kiro format
func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
var kiroTools []KiroToolWrapper
@@ -336,6 +471,13 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
description := tool.Get("description").String()
inputSchema := tool.Get("input_schema").Value()
+ // Shorten tool name if it exceeds 64 characters (common with MCP tools)
+ originalName := name
+ name = shortenToolNameIfNeeded(name)
+ if name != originalName {
+ log.Debugf("kiro: shortened tool name from '%s' to '%s'", originalName, name)
+ }
+
// CRITICAL FIX: Kiro API requires non-empty description
if strings.TrimSpace(description) == "" {
description = fmt.Sprintf("Tool: %s", name)
@@ -467,6 +609,34 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
return unique
}
+// extractClaudeToolChoiceHint extracts tool_choice from Claude request and returns a system prompt hint.
+// Claude tool_choice values:
+// - {"type": "auto"}: Model decides (default, no hint needed)
+// - {"type": "any"}: Must use at least one tool
+// - {"type": "tool", "name": "..."}: Must use specific tool
+func extractClaudeToolChoiceHint(claudeBody []byte) string {
+ toolChoice := gjson.GetBytes(claudeBody, "tool_choice")
+ if !toolChoice.Exists() {
+ return ""
+ }
+
+ toolChoiceType := toolChoice.Get("type").String()
+ switch toolChoiceType {
+ case "any":
+ return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
+ case "tool":
+ toolName := toolChoice.Get("name").String()
+ if toolName != "" {
+ return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
+ }
+ case "auto":
+ // Default behavior, no hint needed
+ return ""
+ }
+
+ return ""
+}
+
// BuildUserMessageStruct builds a user message and extracts tool results
func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
content := msg.Get("content")
diff --git a/internal/translator/kiro/common/constants.go b/internal/translator/kiro/common/constants.go
index 1d4b0330..96174b8c 100644
--- a/internal/translator/kiro/common/constants.go
+++ b/internal/translator/kiro/common/constants.go
@@ -12,6 +12,15 @@ const (
// ThinkingEndTag is the end tag for thinking blocks in responses.
ThinkingEndTag = ""
+ // CodeFenceMarker is the markdown code fence marker.
+ CodeFenceMarker = "```"
+
+ // AltCodeFenceMarker is the alternative markdown code fence marker.
+ AltCodeFenceMarker = "~~~"
+
+ // InlineCodeMarker is the markdown inline code marker (backtick).
+ InlineCodeMarker = "`"
+
// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
// AWS Kiro API has a 2-3 minute timeout for large file write operations.
KiroAgenticSystemPrompt = `
diff --git a/internal/translator/kiro/openai/kiro_openai.go b/internal/translator/kiro/openai/kiro_openai.go
index 35cd0424..d5822998 100644
--- a/internal/translator/kiro/openai/kiro_openai.go
+++ b/internal/translator/kiro/openai/kiro_openai.go
@@ -156,8 +156,9 @@ func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalReques
}
case "message_stop":
- // Final event - emit [DONE]
- results = append(results, BuildOpenAISSEDone())
+ // Final event - do NOT emit [DONE] here
+ // The handler layer (openai_handlers.go) will send [DONE] when the stream closes
+ // Emitting [DONE] here would cause duplicate [DONE] markers
case "ping":
// Ping event with usage - optionally emit usage chunk
diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go
index 4aaa8b4e..21b15aa0 100644
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -29,6 +29,7 @@ type KiroPayload struct {
type KiroInferenceConfig struct {
MaxTokens int `json:"maxTokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
+ TopP float64 `json:"topP,omitempty"`
}
// KiroConversationState holds the conversation context
@@ -134,9 +135,15 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Extract max_tokens for potential use in inferenceConfig
+ // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
+ const kiroMaxOutputTokens = 32000
var maxTokens int64
if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() {
maxTokens = mt.Int()
+ if maxTokens == -1 {
+ maxTokens = kiroMaxOutputTokens
+ log.Debugf("kiro-openai: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
+ }
}
// Extract temperature if specified
@@ -147,6 +154,15 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
hasTemperature = true
}
+ // Extract top_p if specified
+ var topP float64
+ var hasTopP bool
+ if tp := gjson.GetBytes(openaiBody, "top_p"); tp.Exists() {
+ topP = tp.Float()
+ hasTopP = true
+ log.Debugf("kiro-openai: extracted top_p: %.2f", topP)
+ }
+
// Normalize origin value for Kiro API compatibility
origin = normalizeOrigin(origin)
log.Debugf("kiro-openai: normalized origin value: %s", origin)
@@ -180,6 +196,54 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
systemPrompt += kirocommon.KiroAgenticSystemPrompt
}
+ // Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
+ // OpenAI tool_choice values: "none", "auto", "required", or {"type":"function","function":{"name":"..."}}
+ toolChoiceHint := extractToolChoiceHint(openaiBody)
+ if toolChoiceHint != "" {
+ if systemPrompt != "" {
+ systemPrompt += "\n"
+ }
+ systemPrompt += toolChoiceHint
+ log.Debugf("kiro-openai: injected tool_choice hint into system prompt")
+ }
+
+ // Handle response_format parameter - Kiro doesn't support it natively, so we inject system prompt hints
+ // OpenAI response_format: {"type": "json_object"} or {"type": "json_schema", "json_schema": {...}}
+ responseFormatHint := extractResponseFormatHint(openaiBody)
+ if responseFormatHint != "" {
+ if systemPrompt != "" {
+ systemPrompt += "\n"
+ }
+ systemPrompt += responseFormatHint
+ log.Debugf("kiro-openai: injected response_format hint into system prompt")
+ }
+
+ // Check for thinking mode and inject thinking hint
+ // Supports OpenAI reasoning_effort parameter and model name hints
+ thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
+ if thinkingEnabled {
+ // Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
+ // Use 50% of max_tokens for thinking, with min 8000 and max 24000
+ if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
+ calculatedBudget := maxTokens / 2
+ if calculatedBudget < 8000 {
+ calculatedBudget = 8000
+ }
+ if calculatedBudget > 24000 {
+ calculatedBudget = 24000
+ }
+ budgetTokens = calculatedBudget
+ log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
+ }
+
+ if systemPrompt != "" {
+ systemPrompt += "\n"
+ }
+ dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens)
+ systemPrompt += dynamicThinkingHint
+ log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
+ }
+
// Convert OpenAI tools to Kiro format
kiroTools := convertOpenAIToolsToKiro(tools)
@@ -220,7 +284,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
// Build inferenceConfig if we have any inference parameters
var inferenceConfig *KiroInferenceConfig
- if maxTokens > 0 || hasTemperature {
+ if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
if maxTokens > 0 {
inferenceConfig.MaxTokens = int(maxTokens)
@@ -228,6 +292,9 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
if hasTemperature {
inferenceConfig.Temperature = temperature
}
+ if hasTopP {
+ inferenceConfig.TopP = topP
+ }
}
payload := KiroPayload{
@@ -292,6 +359,28 @@ func extractSystemPromptFromOpenAI(messages gjson.Result) string {
return strings.Join(systemParts, "\n")
}
+// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
+// MCP tools often have long names like "mcp__server-name__tool-name".
+// This preserves the "mcp__" prefix and last segment when possible.
+func shortenToolNameIfNeeded(name string) string {
+ const limit = 64
+ if len(name) <= limit {
+ return name
+ }
+ // For MCP tools, try to preserve prefix and last segment
+ if strings.HasPrefix(name, "mcp__") {
+ idx := strings.LastIndex(name, "__")
+ if idx > 0 {
+ cand := "mcp__" + name[idx+2:]
+ if len(cand) > limit {
+ return cand[:limit]
+ }
+ return cand
+ }
+ }
+ return name[:limit]
+}
+
// convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
var kiroTools []KiroToolWrapper
@@ -314,6 +403,13 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
description := fn.Get("description").String()
parameters := fn.Get("parameters").Value()
+ // Shorten tool name if it exceeds 64 characters (common with MCP tools)
+ originalName := name
+ name = shortenToolNameIfNeeded(name)
+ if name != originalName {
+ log.Debugf("kiro-openai: shortened tool name from '%s' to '%s'", originalName, name)
+ }
+
// CRITICAL FIX: Kiro API requires non-empty description
if strings.TrimSpace(description) == "" {
description = fmt.Sprintf("Tool: %s", name)
@@ -584,6 +680,153 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul
return finalContent
}
+// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
+// Returns (thinkingEnabled, budgetTokens).
+// Supports:
+// - reasoning_effort parameter (low/medium/high/auto)
+// - Model name containing "thinking" or "reason"
+// - tag in system prompt (AMP/Cursor format)
+func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
+ var budgetTokens int64 = 16000 // Default budget
+
+ // Check OpenAI format: reasoning_effort parameter
+ // Valid values: "low", "medium", "high", "auto" (not "none")
+ reasoningEffort := gjson.GetBytes(openaiBody, "reasoning_effort")
+ if reasoningEffort.Exists() {
+ effort := reasoningEffort.String()
+ if effort != "" && effort != "none" {
+ log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
+ // Adjust budget based on effort level
+ switch effort {
+ case "low":
+ budgetTokens = 8000
+ case "medium":
+ budgetTokens = 16000
+ case "high":
+ budgetTokens = 32000
+ case "auto":
+ budgetTokens = 16000
+ }
+ return true, budgetTokens
+ }
+ }
+
+ // Check AMP/Cursor format: interleaved in system prompt
+ bodyStr := string(openaiBody)
+ if strings.Contains(bodyStr, "") && strings.Contains(bodyStr, "") {
+ startTag := ""
+ endTag := ""
+ startIdx := strings.Index(bodyStr, startTag)
+ if startIdx >= 0 {
+ startIdx += len(startTag)
+ endIdx := strings.Index(bodyStr[startIdx:], endTag)
+ if endIdx >= 0 {
+ thinkingMode := bodyStr[startIdx : startIdx+endIdx]
+ if thinkingMode == "interleaved" || thinkingMode == "enabled" {
+ log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
+ // Try to extract max_thinking_length if present
+ if maxLenStart := strings.Index(bodyStr, ""); maxLenStart >= 0 {
+ maxLenStart += len("")
+ if maxLenEnd := strings.Index(bodyStr[maxLenStart:], ""); maxLenEnd >= 0 {
+ maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
+ if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
+ log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
+ }
+ }
+ }
+ return true, budgetTokens
+ }
+ }
+ }
+ }
+
+ // Check model name for thinking hints
+ model := gjson.GetBytes(openaiBody, "model").String()
+ modelLower := strings.ToLower(model)
+ if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
+ log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
+ return true, budgetTokens
+ }
+
+ log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
+ return false, budgetTokens
+}
+
+// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
+// OpenAI tool_choice values:
+// - "none": Don't use any tools
+// - "auto": Model decides (default, no hint needed)
+// - "required": Must use at least one tool
+// - {"type":"function","function":{"name":"..."}} : Must use specific tool
+func extractToolChoiceHint(openaiBody []byte) string {
+ toolChoice := gjson.GetBytes(openaiBody, "tool_choice")
+ if !toolChoice.Exists() {
+ return ""
+ }
+
+ // Handle string values
+ if toolChoice.Type == gjson.String {
+ switch toolChoice.String() {
+ case "none":
+ // Note: When tool_choice is "none", we should ideally not pass tools at all
+ // But since we can't modify tool passing here, we add a strong hint
+ return "[INSTRUCTION: Do NOT use any tools. Respond with text only.]"
+ case "required":
+ return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
+ case "auto":
+ // Default behavior, no hint needed
+ return ""
+ }
+ }
+
+ // Handle object value: {"type":"function","function":{"name":"..."}}
+ if toolChoice.IsObject() {
+ if toolChoice.Get("type").String() == "function" {
+ toolName := toolChoice.Get("function.name").String()
+ if toolName != "" {
+ return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
+ }
+ }
+ }
+
+ return ""
+}
+
+// extractResponseFormatHint extracts response_format from OpenAI request and returns a system prompt hint.
+// OpenAI response_format values:
+// - {"type": "text"}: Default, no hint needed
+// - {"type": "json_object"}: Must respond with valid JSON
+// - {"type": "json_schema", "json_schema": {...}}: Must respond with JSON matching schema
+func extractResponseFormatHint(openaiBody []byte) string {
+ responseFormat := gjson.GetBytes(openaiBody, "response_format")
+ if !responseFormat.Exists() {
+ return ""
+ }
+
+ formatType := responseFormat.Get("type").String()
+ switch formatType {
+ case "json_object":
+ return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
+ case "json_schema":
+ // Extract schema if provided
+ schema := responseFormat.Get("json_schema.schema")
+ if schema.Exists() {
+ schemaStr := schema.Raw
+ // Truncate if too long
+ if len(schemaStr) > 500 {
+ schemaStr = schemaStr[:500] + "..."
+ }
+ return fmt.Sprintf("[INSTRUCTION: You MUST respond with valid JSON that matches this schema: %s. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]", schemaStr)
+ }
+ return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
+ case "text":
+ // Default behavior, no hint needed
+ return ""
+ }
+
+ return ""
+}
+
// deduplicateToolResults removes duplicate tool results
func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
if len(toolResults) == 0 {
diff --git a/internal/translator/kiro/openai/kiro_openai_stream.go b/internal/translator/kiro/openai/kiro_openai_stream.go
index d550a8d8..e72d970e 100644
--- a/internal/translator/kiro/openai/kiro_openai_stream.go
+++ b/internal/translator/kiro/openai/kiro_openai_stream.go
@@ -5,7 +5,6 @@ package openai
import (
"encoding/json"
- "fmt"
"time"
"github.com/google/uuid"
@@ -34,9 +33,12 @@ func NewOpenAIStreamState(model string) *OpenAIStreamState {
}
}
-// FormatSSEEvent formats a JSON payload as an SSE event
+// FormatSSEEvent formats a JSON payload for SSE streaming.
+// Note: This returns raw JSON data without "data:" prefix.
+// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
+// to maintain architectural consistency and avoid double-prefix issues.
func FormatSSEEvent(data []byte) string {
- return fmt.Sprintf("data: %s", string(data))
+ return string(data)
}
// BuildOpenAISSETextDelta creates an SSE event for text content delta
@@ -130,9 +132,12 @@ func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) strin
return FormatSSEEvent(result)
}
-// BuildOpenAISSEDone creates the final [DONE] SSE event
+// BuildOpenAISSEDone creates the final [DONE] SSE event.
+// Note: This returns raw "[DONE]" without "data:" prefix.
+// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
+// to maintain architectural consistency and avoid double-prefix issues.
func BuildOpenAISSEDone() string {
- return "data: [DONE]"
+ return "[DONE]"
}
// buildBaseChunk creates a base chunk structure for streaming